diff --git a/.dir-locals.el b/.dir-locals.el deleted file mode 100644 index 2a2af98371b45..0000000000000 --- a/.dir-locals.el +++ /dev/null @@ -1,23 +0,0 @@ -;;; Licensed to the Apache Software Foundation (ASF) under one -;;; or more contributor license agreements. See the NOTICE file -;;; distributed with this work for additional information -;;; regarding copyright ownership. The ASF licenses this file -;;; to you under the Apache License, Version 2.0 (the -;;; "License"); you may not use this file except in compliance -;;; with the License. You may obtain a copy of the License at -;;; -;;; http://www.apache.org/licenses/LICENSE-2.0 -;;; -;;; Unless required by applicable law or agreed to in writing, -;;; software distributed under the License is distributed on an -;;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -;;; KIND, either express or implied. See the License for the -;;; specific language governing permissions and limitations -;;; under the License. - -((cmake-mode . ((indent-tabs-mode . nil))) - (powershell-mode . ((indent-tabs-mode . nil))) - (sh-mode . ((indent-tabs-mode . nil) - (sh-indentation . 2) - (sh-basic-offset . 2))) - (vala-mode . ((indent-tabs-mode . nil)))) diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000..999f94bae009b --- /dev/null +++ b/.editorconfig @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This is an EditorConfig file: https://editorconfig.org/ + +# This is the top-most config for this project +root = true + +# General settings + +[*] +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 +spelling_language = en + +# Language-specific settings, in approximate alphabetical order + +[*.{c,cc,cpp,h,hh,hpp}] +indent_size = 2 +indent_style = space + +[*.cmake] +indent_size = 2 +indent_style = space + +[CMakeLists.txt] +indent_size = 2 +indent_style = space + +[*.cs] +indent_size = 4 +indent_style = space + +[*.{fbs,proto,thrift}] +indent_size = 2 +indent_style = space + +[*.go] +indent_size = 8 +indent_style = tab +tab_width = 8 + +[*.{js,ts}] +indent_size = 4 +indent_style = space + +[*.{py,pyx,pxd,pxi}] +indent_size = 4 +indent_style = space + +[*.r] +indent_size = 2 +indent_style = space + +[*.rb] +indent_size = 2 +indent_style = space + +[*.rst] +indent_size = 3 +indent_style = space + +[*.sh] +indent_size = 2 +indent_style = space + +[*.vala] +indent_size = 4 +indent_style = space + +[*.{yml,yaml}] +indent_size = 2 +indent_style = space diff --git a/.github/.dir-locals.el b/.github/.dir-locals.el deleted file mode 100644 index a880e4a6bb697..0000000000000 --- a/.github/.dir-locals.el +++ /dev/null @@ -1,19 +0,0 @@ -;;; Licensed to the Apache Software Foundation (ASF) under one -;;; or more contributor license agreements. See the NOTICE file -;;; distributed with this work for additional information -;;; regarding copyright ownership. The ASF licenses this file -;;; to you under the Apache License, Version 2.0 (the -;;; "License"); you may not use this file except in compliance -;;; with the License. You may obtain a copy of the License at -;;; -;;; http://www.apache.org/licenses/LICENSE-2.0 -;;; -;;; Unless required by applicable law or agreed to in writing, -;;; software distributed under the License is distributed on an -;;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -;;; KIND, either express or implied. See the License for the -;;; specific language governing permissions and limitations -;;; under the License. - -((js-mode . ((indent-tabs-mode . nil) - (js-indent-level . 2)))) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index beb126eaf9496..82d7da0a8d3ef 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -45,8 +45,8 @@ issues][3] for the Apache Arrow project. Comment on the issue and/or contact [dev@arrow.apache.org](https://lists.apache.org/list.html?dev@arrow.apache.org) with your questions and ideas. -If you’d like to report a bug but don’t have time to fix it, you can still post -it on JIRA, or email the mailing list +If you’d like to report a bug but don’t have time to fix it, you can still create +a GitHub issue, or email the mailing list [dev@arrow.apache.org](https://lists.apache.org/list.html?dev@arrow.apache.org) To contribute a patch: @@ -57,8 +57,8 @@ harder to merge in a large change with a lot of disjoint features. GitHub](https://github.com/apache/arrow/issues). 3. Submit the patch as a GitHub pull request against the main branch. For a tutorial, see the GitHub guides on [forking a repo](https://help.github.com/en/articles/fork-a-repo) -and [sending a pull request](https://help.github.com/en/articles/creating-a-pull-request-from-a-fork). So that your pull request syncs with the JIRA issue, prefix your pull request -name with the JIRA issue id (ex: [ARROW-767: [C++] Filesystem abstraction](https://github.com/apache/arrow/pull/4225)) +and [sending a pull request](https://help.github.com/en/articles/creating-a-pull-request-from-a-fork). Prefix your pull request +name with the GitHub issue id (ex: [GH-767: [C++] Filesystem abstraction](https://github.com/apache/arrow/pull/4225)) 4. Make sure that your code passes the unit tests. You can find instructions how to run the unit tests for each Arrow component in its respective README file. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index ddcdb5ebfd9d9..3839d3e2fc889 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -19,10 +19,6 @@ or MINOR: [${COMPONENT}] ${SUMMARY} -In the case of PARQUET issues on JIRA the title also supports: - - PARQUET-${JIRA_ISSUE_ID}: [${COMPONENT}] ${SUMMARY} - --> ### Rationale for this change diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index e4d3cae96a1e5..52eae68c4f498 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -124,7 +124,7 @@ jobs: shell: bash run: | gem install test-unit - pip install "cython>=0.29.31" setuptools pytest jira setuptools-scm + pip install "cython>=0.29.31" setuptools pytest requests setuptools-scm - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml index 4e37432afaad5..96bf3993f8242 100644 --- a/.github/workflows/dev_pr.yml +++ b/.github/workflows/dev_pr.yml @@ -49,7 +49,7 @@ jobs: ref: main persist-credentials: false - - name: Comment JIRA link + - name: Add Issue link if: | (github.event.action == 'opened' || github.event.action == 'edited') diff --git a/.github/workflows/dev_pr/helpers.js b/.github/workflows/dev_pr/helpers.js index e4da5050d5dd5..222d2213771cf 100644 --- a/.github/workflows/dev_pr/helpers.js +++ b/.github/workflows/dev_pr/helpers.js @@ -20,14 +20,12 @@ const https = require('https'); /** * Given the title of a PullRequest return the Issue * - * @param {String} title + * @param {String} title * @returns {Issue} or null if no issue detected. * * @typedef {Object} Issue - * @property {string} kind - The kind of issue: minor, jira or github - * @property {string} id - The id of the issue: - * PARQUET-XXXX for jira - * The numeric issue id for github + * @property {string} kind - The kind of issue: minor or github + * @property {string} id - The numeric issue id of the issue */ function detectIssue(title) { if (!title) { @@ -36,10 +34,6 @@ function detectIssue(title) { if (title.startsWith("MINOR: ")) { return {"kind": "minor"}; } - const matched_jira = /^(WIP:?\s*)?((PARQUET)-\d+)/.exec(title); - if (matched_jira) { - return {"kind": "jira", "id": matched_jira[2]}; - } const matched_gh = /^(WIP:?\s*)?GH-(\d+)/.exec(title); if (matched_gh) { return {"kind": "github", "id": matched_gh[2]}; @@ -47,27 +41,6 @@ function detectIssue(title) { return null; } -/** - * Retrieves information about a JIRA issue. - * @param {String} jiraID - * @returns {Object} the information about a JIRA issue. - */ -async function getJiraInfo(jiraID) { - const jiraURL = `https://issues.apache.org/jira/rest/api/2/issue/${jiraID}`; - - return new Promise((resolve) => { - https.get(jiraURL, res => { - let data = ''; - - res.on('data', chunk => { data += chunk }) - - res.on('end', () => { - resolve(JSON.parse(data)); - }) - }) - }); -} - /** * Retrieves information about a GitHub issue. * @param {String} issueID @@ -89,6 +62,5 @@ async function getJiraInfo(jiraID) { module.exports = { detectIssue, - getJiraInfo, getGitHubInfo }; diff --git a/.github/workflows/dev_pr/issue_check.js b/.github/workflows/dev_pr/issue_check.js index fb5d986dff2f7..176349cdd0512 100644 --- a/.github/workflows/dev_pr/issue_check.js +++ b/.github/workflows/dev_pr/issue_check.js @@ -17,91 +17,6 @@ const helpers = require("./helpers.js"); -/** - * Performs checks on the JIRA Issue: - * - The issue is started in JIRA. - * - The issue contains components. - * - * @param {Object} github - * @param {Object} context - * @param {String} pullRequestNumber - * @param {String} jiraID - */ -async function verifyJIRAIssue(github, context, pullRequestNumber, jiraID) { - const ticketInfo = await helpers.getJiraInfo(jiraID); - if(!ticketInfo["fields"]["components"].length) { - await commentMissingComponents(github, context, pullRequestNumber); - } - - if(ticketInfo["fields"]["status"]["id"] == 1) { - // "status": {"name":"Open","id":"1" - // "description":"The issue is open and ready for the assignee to start work on it.", - await commentNotStartedTicket(github, context, pullRequestNumber); - } -} - -/** - * Adds a comment to add components on the JIRA ticket. - * - * @param {Object} github - * @param {Object} context - * @param {String} pullRequestNumber - */ -async function commentMissingComponents(github, context, pullRequestNumber) { - const {data: comments} = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: pullRequestNumber, - per_page: 100 - }); - - var found = false; - for(var i=0; i { const pullRequestNumber = context.payload.number; const title = context.payload.pull_request.title; const issue = helpers.detectIssue(title) - if (issue){ - if (issue.kind == "jira") { - await verifyJIRAIssue(github, context, pullRequestNumber, issue.id); - } else if(issue.kind == "github") { - await verifyGitHubIssue(github, context, pullRequestNumber, issue.id); - } + if (issue && issue.kind === "github") { + await verifyGitHubIssue(github, context, pullRequestNumber, issue.id); } }; diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml index fed4b77295bd5..7ef92f0be9b87 100644 --- a/.github/workflows/dev_pr/labeler.yml +++ b/.github/workflows/dev_pr/labeler.yml @@ -35,11 +35,6 @@ - any-glob-to-any-file: - go/**/* -"Component: Java": -- changed-files: - - any-glob-to-any-file: - - java/**/* - "Component: JavaScript": - changed-files: - any-glob-to-any-file: @@ -82,7 +77,6 @@ - any-glob-to-any-file: - c_glib/gandiva-glib/**/* - cpp/src/gandiva/**/* - - java/gandiva/**/* - python/pyarrow/gandiva.* - ruby/red-gandiva/**/* diff --git a/.github/workflows/dev_pr/link.js b/.github/workflows/dev_pr/link.js index a70dbc604c377..3f50a5b9b9d4d 100644 --- a/.github/workflows/dev_pr/link.js +++ b/.github/workflows/dev_pr/link.js @@ -47,30 +47,6 @@ async function haveComment(github, context, pullRequestNumber, message) { return false; } -/** - * Adds a comment on the Pull Request linking the JIRA issue. - * - * @param {Object} github - * @param {Object} context - * @param {String} pullRequestNumber - * @param {String} jiraID - */ -async function commentJIRAURL(github, context, pullRequestNumber, jiraID) { - const issueInfo = await helpers.getJiraInfo(jiraID); - const jiraURL = `https://issues.apache.org/jira/browse/${jiraID}`; - if (await haveComment(github, context, pullRequestNumber, jiraURL)) { - return; - } - if (issueInfo){ - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: pullRequestNumber, - body: jiraURL - }); - } -} - /** * Adds a comment on the Pull Request linking the GitHub issue. * @@ -101,11 +77,7 @@ module.exports = async ({github, context}) => { const pullRequestNumber = context.payload.number; const title = context.payload.pull_request.title; const issue = helpers.detectIssue(title); - if (issue){ - if (issue.kind == "jira") { - await commentJIRAURL(github, context, pullRequestNumber, issue.id); - } else if (issue.kind == "github") { - await commentGitHubURL(github, context, pullRequestNumber, issue.id); - } + if (issue && issue.kind === "github") { + await commentGitHubURL(github, context, pullRequestNumber, issue.id); } }; diff --git a/.github/workflows/dev_pr/title_check.md b/.github/workflows/dev_pr/title_check.md index c810d7477947c..8de10a2962e92 100644 --- a/.github/workflows/dev_pr/title_check.md +++ b/.github/workflows/dev_pr/title_check.md @@ -31,10 +31,6 @@ or MINOR: [${COMPONENT}] ${SUMMARY} -In the case of PARQUET issues on JIRA the title also supports: - - PARQUET-${JIRA_ISSUE_ID}: [${COMPONENT}] ${SUMMARY} - See also: * [Other pull requests](https://github.com/apache/arrow/pulls/) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index da0e2004f1f3f..aef81df074888 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -91,6 +91,11 @@ jobs: with: repository: apache/arrow-go path: go + - name: Checkout Arrow Java + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + repository: apache/arrow-java + path: java - name: Free up disk space run: | ci/scripts/util_free_space.sh @@ -115,6 +120,7 @@ jobs: archery docker run \ -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ -e ARCHERY_INTEGRATION_WITH_GO=1 \ + -e ARCHERY_INTEGRATION_WITH_JAVA=1 \ -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \ -e ARCHERY_INTEGRATION_WITH_RUST=1 \ conda-integration diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml deleted file mode 100644 index f6b3fa748e6d2..0000000000000 --- a/.github/workflows/java.yml +++ /dev/null @@ -1,173 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Java - -on: - push: - branches: - - '**' - - '!dependabot/**' - tags: - - '**' - paths: - - '.dockerignore' - - '.github/workflows/java.yml' - - 'ci/docker/*java*' - - 'ci/scripts/java*.sh' - - 'ci/scripts/util_*.sh' - - 'docker-compose.yml' - - 'format/Flight.proto' - - 'java/**' - pull_request: - paths: - - '.dockerignore' - - '.github/workflows/java.yml' - - 'ci/docker/*java*' - - 'ci/scripts/java*.sh' - - 'ci/scripts/util_*.sh' - - 'docker-compose.yml' - - 'format/Flight.proto' - - 'java/**' - -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true - -permissions: - contents: read - -env: - ARCHERY_DEBUG: 1 - DOCKER_VOLUME_PREFIX: ".docker/" - -jobs: - ubuntu: - name: AMD64 Ubuntu 22.04 Java JDK ${{ matrix.jdk }} Maven ${{ matrix.maven }} - runs-on: ubuntu-latest - if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - jdk: [11, 17, 21, 22] - maven: [3.9.6] - image: [java] - env: - JDK: ${{ matrix.jdk }} - MAVEN: ${{ matrix.maven }} - steps: - - name: Checkout Arrow - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 - with: - fetch-depth: 0 - submodules: recursive - - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 - with: - path: .docker - key: maven-${{ hashFiles('java/**') }} - restore-keys: maven- - - name: Setup Python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: 3.12 - - name: Setup Archery - run: pip install -e dev/archery[docker] - - name: Execute Docker Build - env: - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} - run: | - archery docker run \ - -e CI=true \ - -e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \ - ${{ matrix.image }} - - name: Docker Push - if: >- - success() && - github.event_name == 'push' && - github.repository == 'apache/arrow' && - github.ref_name == 'main' - env: - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - continue-on-error: true - run: archery docker push ${{ matrix.image }} - - macos: - name: AMD64 macOS 13 Java JDK ${{ matrix.jdk }} - runs-on: macos-13 - if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - jdk: [11] - steps: - - name: Set up Java - uses: actions/setup-java@v4 - with: - distribution: 'zulu' - java-version: ${{ matrix.jdk }} - - name: Checkout Arrow - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: recursive - - name: Build - shell: bash - env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} - run: ci/scripts/java_build.sh $(pwd) $(pwd)/build - - name: Test - shell: bash - env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} - run: ci/scripts/java_test.sh $(pwd) $(pwd)/build - - windows: - name: AMD64 Windows Server 2022 Java JDK ${{ matrix.jdk }} - runs-on: windows-latest - if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 30 - strategy: - fail-fast: false - matrix: - jdk: [11] - steps: - - name: Set up Java - uses: actions/setup-java@v4 - with: - java-version: ${{ matrix.jdk }} - distribution: 'temurin' - - name: Checkout Arrow - uses: actions/checkout@v4 - with: - fetch-depth: 0 - submodules: recursive - - name: Build - shell: bash - env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} - run: ci/scripts/java_build.sh $(pwd) $(pwd)/build - - name: Test - shell: bash - env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} - run: ci/scripts/java_test.sh $(pwd) $(pwd)/build diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml deleted file mode 100644 index 5682f8e84167e..0000000000000 --- a/.github/workflows/java_jni.yml +++ /dev/null @@ -1,147 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Java JNI - -on: - push: - branches: - - '**' - - '!dependabot/**' - tags: - - '**' - paths: - - '.dockerignore' - - '.github/workflows/java_jni.yml' - - 'ci/docker/**' - - 'ci/scripts/cpp_build.sh' - - 'ci/scripts/java_*' - - 'cpp/**' - - 'docker-compose.yml' - - 'java/**' - pull_request: - paths: - - '.dockerignore' - - '.github/workflows/java_jni.yml' - - 'ci/docker/**' - - 'ci/scripts/cpp_build.sh' - - 'ci/scripts/java_*' - - 'cpp/**' - - 'docker-compose.yml' - - 'java/**' - -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true - -permissions: - contents: read - -env: - ARCHERY_DEBUG: 1 - DOCKER_VOLUME_PREFIX: ".docker/" - -jobs: - docker: - name: AMD64 manylinux2014 Java JNI - runs-on: ubuntu-latest - if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 240 - steps: - - name: Checkout Arrow - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 - with: - fetch-depth: 0 - submodules: recursive - - name: Free up disk space - run: | - ci/scripts/util_free_space.sh - - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 - with: - path: .docker - key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }} - restore-keys: java-jni-manylinux-2014- - - name: Setup Python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: 3.12 - - name: Setup Archery - run: pip install -e dev/archery[docker] - - name: Execute Docker Build - env: - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - run: | - source ci/scripts/util_enable_core_dumps.sh - archery docker run java-jni-manylinux-2014 - - name: Docker Push - if: >- - success() && - github.event_name == 'push' && - github.repository == 'apache/arrow' && - github.ref_name == 'main' - env: - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - continue-on-error: true - run: archery docker push java-jni-manylinux-2014 - - docker_integration_python: - name: AMD64 Conda Java C Data Interface Integration - runs-on: ubuntu-latest - if: ${{ !contains(github.event.pull_request.title, 'WIP') }} - timeout-minutes: 90 - steps: - - name: Checkout Arrow - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 - with: - fetch-depth: 0 - submodules: recursive - - name: Cache Docker Volumes - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 - with: - path: .docker - key: maven-${{ hashFiles('java/**') }} - restore-keys: maven- - - name: Setup Python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - python-version: 3.12 - - name: Setup Archery - run: pip install -e dev/archery[docker] - - name: Execute Docker Build - env: - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} - run: | - archery docker run \ - -e CI=true \ - -e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \ - conda-python-java-integration - - name: Docker Push - if: >- - success() && - github.event_name == 'push' && - github.repository == 'apache/arrow' && - github.ref_name == 'main' - env: - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - continue-on-error: true - run: archery docker push conda-python-java-integration diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml deleted file mode 100644 index 436cc324ddc45..0000000000000 --- a/.github/workflows/java_nightly.yml +++ /dev/null @@ -1,139 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Upload Java Nightly builds - -on: - workflow_dispatch: - inputs: - prefix: - description: Job prefix to use. - required: false - default: '' - keep: - description: Number of versions to keep. - required: false - default: 14 - schedule: - - cron: '0 14 * * *' - -permissions: - contents: read - -jobs: - upload: - if: github.repository == 'apache/arrow' - env: - PREFIX: ${{ github.event.inputs.prefix || ''}} - CROSSBOW_GITHUB_TOKEN: ${{ github.token }} - runs-on: ubuntu-latest - steps: - - name: Checkout Arrow - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 - with: - fetch-depth: 1 - path: arrow - repository: apache/arrow - ref: main - submodules: recursive - - name: Checkout Crossbow - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 - with: - fetch-depth: 0 - path: crossbow - repository: ursacomputing/crossbow - ref: main - - name: Set up Python - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 - with: - cache: 'pip' - python-version: 3.12 - - name: Install Archery - shell: bash - run: pip install -e arrow/dev/archery[all] - - run: mkdir -p binaries - - name: Download Artifacts - run: | - if [ -z $PREFIX ]; then - PREFIX=nightly-packaging-$(date +%Y-%m-%d)-0 - fi - echo $PREFIX - archery crossbow download-artifacts -f java-jars -t binaries $PREFIX - - name: Sync from Remote - uses: ./arrow/.github/actions/sync-nightlies - with: - switches: -avzh --update --delete --progress - local_path: repo - remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/java - remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} - remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} - remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} - remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} - remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} - - shell: bash - name: Show local repo sync from remote - run: | - for i in `ls -t repo/org/apache/arrow`; do - echo "- $i: $(find repo/org/apache/arrow/$i -mindepth 1 -maxdepth 1 -type d \ - | wc -l \ - | xargs) versions available" - done - - shell: bash - name: Build Repository - run: | - DATE=$(date +%Y-%m-%d) - if [ -z $PREFIX ]; then - PREFIX=nightly-packaging-${DATE}-0 - fi - PATTERN_TO_GET_LIB_AND_VERSION='([a-z].+)-([0-9]+.[0-9]+.[0-9]+-SNAPSHOT)' - mkdir -p repo/org/apache/arrow/ - for LIBRARY in $(ls binaries/$PREFIX/java-jars | grep -E '.jar|.json|.pom|.xml' | grep SNAPSHOT); do - [[ $LIBRARY =~ $PATTERN_TO_GET_LIB_AND_VERSION ]] - mkdir -p repo/org/apache/arrow/${BASH_REMATCH[1]}/${BASH_REMATCH[2]} - mkdir -p repo/org/apache/arrow/${BASH_REMATCH[1]}/${DATE} - # Copy twice to maintain a latest snapshot and some earlier versions - cp binaries/$PREFIX/java-jars/$LIBRARY repo/org/apache/arrow/${BASH_REMATCH[1]}/${BASH_REMATCH[2]} - touch repo/org/apache/arrow/${BASH_REMATCH[1]}/${BASH_REMATCH[2]} - cp binaries/$PREFIX/java-jars/$LIBRARY repo/org/apache/arrow/${BASH_REMATCH[1]}/${DATE} - echo "Artifacts $LIBRARY configured" - done - - name: Prune Repository - shell: bash - env: - KEEP: ${{ github.event.inputs.keep || 14 }} - run: | - for i in `ls -t repo/org/apache/arrow`; do - find repo/org/apache/arrow/$i -mindepth 1 -maxdepth 1 -type d -print0 \ - | xargs -0 ls -t -d \ - | tail -n +$((KEEP + 1)) \ - | xargs rm -rf - done - - name: Show repo contents - run: tree repo - - name: Sync to Remote - if: ${{ github.repository == 'apache/arrow' }} - uses: ./arrow/.github/actions/sync-nightlies - with: - upload: true - switches: -avzh --update --delete --progress - local_path: repo - remote_path: ${{ secrets.NIGHTLIES_RSYNC_PATH }}/arrow/java - remote_host: ${{ secrets.NIGHTLIES_RSYNC_HOST }} - remote_port: ${{ secrets.NIGHTLIES_RSYNC_PORT }} - remote_user: ${{ secrets.NIGHTLIES_RSYNC_USER }} - remote_key: ${{ secrets.NIGHTLIES_RSYNC_KEY }} - remote_host_key: ${{ secrets.NIGHTLIES_RSYNC_HOST_KEY }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6bde1cb2964e0..27823cae5fa28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -182,4 +182,5 @@ repos: ( ?^ci/scripts/c_glib_build\.sh$| ?^ci/scripts/c_glib_test\.sh$| + ?^c_glib/test/run-test\.sh$| ) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 03ce556add2bd..dc7d7a2244a60 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,8 +29,7 @@ To be assigned to an issue, add a comment "take" to that issue. Before you create a new bug entry, we recommend you first search among existing Arrow issues in -[Jira](https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20%3D%20Open) -or [GitHub](https://github.com/apache/arrow/issues). +[GitHub](https://github.com/apache/arrow/issues). We conventionally prefix the issue title with the component name in brackets, such as "[C++][Python] Ensure no validity bitmap in diff --git a/c_glib/test/run-test.sh b/c_glib/test/run-test.sh index c7bc6edca5f0d..8b1868942073c 100755 --- a/c_glib/test/run-test.sh +++ b/c_glib/test/run-test.sh @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. -test_dir="$(cd $(dirname $0); pwd)" +test_dir="$(cd "$(dirname "$0")" && pwd)" build_dir="$(cd .; pwd)" modules=( @@ -47,7 +47,7 @@ if [ "${BUILD}" != "no" ]; then fi for module in "${modules[@]}"; do - MODULE_TYPELIB_DIR_VAR_NAME="$(echo ${module} | tr a-z- A-Z_)_TYPELIB_DIR" + MODULE_TYPELIB_DIR_VAR_NAME="$(echo "${module}" | tr a-z- A-Z_)_TYPELIB_DIR" module_typelib_dir=$(eval "echo \${${MODULE_TYPELIB_DIR_VAR_NAME}}") if [ -z "${module_typelib_dir}" ]; then module_typelib_dir="${build_dir}/${module}" @@ -74,4 +74,4 @@ case "${DEBUGGER}" in DEBUGGER_ARGS+=(--) ;; esac -${DEBUGGER} "${DEBUGGER_ARGS[@]}" "${RUBY}" ${test_dir}/run-test.rb "$@" +${DEBUGGER} "${DEBUGGER_ARGS[@]}" "${RUBY}" "${test_dir}"/run-test.rb "$@" diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index 084117f38778a..b1237fc9958c1 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -34,7 +34,6 @@ IF "%ARROW_DEBUG_MEMORY_POOL%"=="" ( set CMAKE_BUILD_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS% set CTEST_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS% - call activate arrow @rem The "main" C++ build script for Windows CI @@ -113,12 +112,12 @@ ctest --output-on-failure || exit /B popd +pushd python + @rem @rem Build and install pyarrow @rem -pushd python - set PYARROW_CMAKE_GENERATOR=%GENERATOR% set PYARROW_CXXFLAGS=%ARROW_CXXFLAGS% set PYARROW_PARALLEL=2 @@ -137,6 +136,12 @@ set ARROW_HOME=%CONDA_PREFIX%\Library @rem ARROW-3075; pkgconfig is broken for Parquet for now set PARQUET_HOME=%CONDA_PREFIX%\Library +pip install --no-deps --no-build-isolation -vv --editable . + +@rem +@rem Run pyarrow tests +@rem + @rem Download IANA Timezone Database to a non-standard location to @rem test the configurability of the timezone database path curl https://data.iana.org/time-zones/releases/tzdata2024b.tar.gz --output tzdata.tar.gz || exit /B @@ -150,12 +155,9 @@ rmdir /s /q %USERPROFILE%\Downloads\tzdata @rem (only needed for testing purposes) set PYARROW_TZDATA_PATH=%USERPROFILE%\Downloads\test\tzdata -python setup.py develop -q || exit /B - +set AWS_EC2_METADATA_DISABLED=true set PYTHONDEVMODE=1 -py.test -r sxX --durations=15 --pyargs pyarrow.tests || exit /B +python -m pytest -r sxX --durations=15 pyarrow/tests || exit /B -@rem -@rem Wheels are built and tested separately (see ARROW-5142). -@rem +popd diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat index f9463e5074225..912b130acff45 100644 --- a/ci/appveyor-cpp-setup.bat +++ b/ci/appveyor-cpp-setup.bat @@ -17,7 +17,13 @@ @echo on -set "PATH=C:\Miniconda38-x64;C:\Miniconda38-x64\Scripts;C:\Miniconda38-x64\Library\bin;%PATH%" +@rem +@rem The miniconda install on AppVeyor is very outdated, use Mambaforge instead +@rem + +appveyor DownloadFile https://github.com/conda-forge/miniforge/releases/download/24.9.2-0/Mambaforge-Windows-x86_64.exe || exit /B +start /wait "" Mambaforge-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /D=C:\Mambaforge +set "PATH=C:\Mambaforge\scripts;C:\Mambaforge\condabin;%PATH%" @rem @rem Avoid picking up AppVeyor-installed OpenSSL (linker errors with gRPC) @@ -33,26 +39,15 @@ rd /s /q C:\OpenSSL-v30-Win32 rd /s /q C:\OpenSSL-v30-Win64 @rem -@rem Configure miniconda +@rem Configure conda @rem conda config --set auto_update_conda false -conda config --set show_channel_urls True +conda config --set show_channel_urls true +conda config --set always_yes true @rem Help with SSL timeouts to S3 conda config --set remote_connect_timeout_secs 12 -@rem Workaround for ARROW-13636 -conda config --append disallowed_packages pypy3 -conda info -a - -@rem -@rem Install Python to the base environment -@rem -conda install -q -y -c conda-forge python=%PYTHON% || exit /B -@rem Can't use conda-libmamba-solver 2.0.0 -conda config --set solver classic - -@rem Update for newer CA certificates -conda update -q -y -c conda-forge --all || exit /B +conda info -a || exit /B @rem @rem Create conda environment @@ -66,11 +61,8 @@ if "%ARROW_BUILD_GANDIVA%" == "ON" ( ) @rem Install pre-built "toolchain" packages for faster builds set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt -@rem Force conda to use conda-forge -conda config --add channels conda-forge -conda config --remove channels defaults @rem Arrow conda environment -conda create -n arrow -y -c conda-forge ^ +conda create -n arrow ^ --file=ci\conda_env_python.txt ^ %CONDA_PACKAGES% ^ "ccache" ^ @@ -97,7 +89,6 @@ if "%ARROW_S3%" == "ON" ( appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z -FileName C:\Windows\Minio.exe || exit /B ) - @rem @rem Download IANA Timezone Database for unit tests @rem diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index f0084894e19dc..6d4be52baec05 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -48,7 +48,7 @@ ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_gcs_testbench.sh default -# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to +# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to # be on the path for the tests to run. ENV PATH=/opt/conda/envs/arrow/bin:$PATH @@ -68,6 +68,7 @@ ENV ARROW_ACERO=ON \ ARROW_GANDIVA=ON \ ARROW_GCS=ON \ ARROW_HOME=$CONDA_PREFIX \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile index 8ad705c920ba8..47ff550cd59ca 100644 --- a/ci/docker/conda-python-emscripten.dockerfile +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -27,14 +27,14 @@ ARG required_python_min="(3,12)" # fail if python version < 3.12 RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)" -# install selenium and pyodide-build and recent python +# install selenium and recent pyodide-build and recent python # needs to be a login shell so ~/.profile is read SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"] RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \ - python -m pip install --no-cache-dir --upgrade pyodide-build==${pyodide_version} - + python -m pip install --no-cache-dir --upgrade pyodide-build>=${pyodide_version} + # install pyodide dist directory to /pyodide RUN pyodide_dist_url="https://github.com/pyodide/pyodide/releases/download/${pyodide_version}/pyodide-${pyodide_version}.tar.bz2" && \ wget -q "${pyodide_dist_url}" -O- | tar -xj -C / diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index d7a6f9df2c2ee..f486d07ff8894 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -108,6 +108,11 @@ RUN /arrow/ci/scripts/install_azurite.sh COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin +# Prioritize system packages and local installation. +# +# The following dependencies will be downloaded due to missing/invalid packages +# provided by the distribution: +# - opentelemetry-cpp-dev is not packaged ENV ARROW_ACERO=ON \ ARROW_AZURE=ON \ ARROW_BUILD_TESTS=ON \ @@ -119,6 +124,7 @@ ENV ARROW_ACERO=ON \ ARROW_GANDIVA=ON \ ARROW_GCS=ON \ ARROW_HOME=/usr/local \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ @@ -134,6 +140,7 @@ ENV ARROW_ACERO=ON \ AWSSDK_SOURCE=BUNDLED \ Azure_SOURCE=BUNDLED \ google_cloud_cpp_storage_SOURCE=BUNDLED \ + opentelemetry_cpp_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PATH=/usr/lib/ccache/:$PATH \ PYTHON=python3 \ diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile index 2ac5afe7b91f6..6c5edd444e253 100644 --- a/ci/docker/fedora-39-cpp.dockerfile +++ b/ci/docker/fedora-39-cpp.dockerfile @@ -87,6 +87,7 @@ ENV ARROW_ACERO=ON \ ARROW_GANDIVA=ON \ ARROW_GCS=ON \ ARROW_HOME=/usr/local \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ @@ -103,6 +104,7 @@ ENV ARROW_ACERO=ON \ CC=gcc \ CXX=g++ \ google_cloud_cpp_storage_SOURCE=BUNDLED \ + opentelemetry_cpp_SOURCE=BUNDLED \ PARQUET_BUILD_EXAMPLES=ON \ PARQUET_BUILD_EXECUTABLES=ON \ PATH=/usr/lib/ccache/:$PATH \ diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile index c6fa3cc0dce97..0b5645285b6e1 100644 --- a/ci/docker/python-wheel-manylinux.dockerfile +++ b/ci/docker/python-wheel-manylinux.dockerfile @@ -107,6 +107,7 @@ RUN --mount=type=secret,id=github_repository_owner \ --x-feature=flight \ --x-feature=gcs \ --x-feature=json \ + --x-feature=orc \ --x-feature=parquet \ --x-feature=s3 && \ rm -rf ~/.config/NuGet/ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index d78c7a99cf4d6..8dc778d544a6d 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -135,7 +135,8 @@ RUN /arrow/ci/scripts/install_ceph.sh COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin -# Prioritize system packages and local installation +# Prioritize system packages and local installation. +# # The following dependencies will be downloaded due to missing/invalid packages # provided by the distribution: # - Abseil is not packaged @@ -143,8 +144,10 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin # - flatbuffer is not packaged # - libgtest-dev only provide sources # - libprotobuf-dev only provide sources +# - opentelemetry-cpp-dev is not packaged +# # ARROW-17051: this build uses static Protobuf, so we must also use -# static Arrow to run Flight/Flight SQL tests +# static Arrow to run Flight/Flight SQL tests. ENV absl_SOURCE=BUNDLED \ ARROW_ACERO=ON \ ARROW_AZURE=OFF \ @@ -158,6 +161,7 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ @@ -179,6 +183,7 @@ ENV absl_SOURCE=BUNDLED \ google_cloud_cpp_storage_SOURCE=BUNDLED \ gRPC_SOURCE=BUNDLED \ GTest_SOURCE=BUNDLED \ + opentelemetry_cpp_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PARQUET_BUILD_EXAMPLES=ON \ PARQUET_BUILD_EXECUTABLES=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index bf5a282e7b8ca..2e4d658bf9549 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -184,11 +184,13 @@ RUN /arrow/ci/scripts/install_azurite.sh COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin -# Prioritize system packages and local installation +# Prioritize system packages and local installation. +# # The following dependencies will be downloaded due to missing/invalid packages # provided by the distribution: # - Abseil is old # - libc-ares-dev does not install CMake config files +# - opentelemetry-cpp-dev is not packaged ENV absl_SOURCE=BUNDLED \ ARROW_ACERO=ON \ ARROW_AZURE=ON \ @@ -203,6 +205,7 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ @@ -222,6 +225,7 @@ ENV absl_SOURCE=BUNDLED \ AWSSDK_SOURCE=BUNDLED \ Azure_SOURCE=BUNDLED \ google_cloud_cpp_storage_SOURCE=BUNDLED \ + opentelemetry_cpp_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PARQUET_BUILD_EXAMPLES=ON \ PARQUET_BUILD_EXECUTABLES=ON \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index f662edc5365bc..53113bccfe4fa 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -172,7 +172,11 @@ RUN /arrow/ci/scripts/install_azurite.sh COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin -# Prioritize system packages and local installation +# Prioritize system packages and local installation. +# +# The following dependencies will be downloaded due to missing/invalid packages +# provided by the distribution: +# - opentelemetry-cpp-dev is not packaged ENV ARROW_ACERO=ON \ ARROW_AZURE=ON \ ARROW_BUILD_STATIC=ON \ @@ -186,6 +190,7 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_JEMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ @@ -205,6 +210,7 @@ ENV ARROW_ACERO=ON \ AWSSDK_SOURCE=BUNDLED \ Azure_SOURCE=BUNDLED \ google_cloud_cpp_storage_SOURCE=BUNDLED \ + opentelemetry_cpp_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PARQUET_BUILD_EXAMPLES=ON \ PARQUET_BUILD_EXECUTABLES=ON \ diff --git a/ci/docker/ubuntu-24.04-verify-rc.dockerfile b/ci/docker/ubuntu-24.04-verify-rc.dockerfile new file mode 100644 index 0000000000000..42d71afcb0999 --- /dev/null +++ b/ci/docker/ubuntu-24.04-verify-rc.dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/ubuntu:24.04 + +ENV DEBIAN_FRONTEND=noninteractive +COPY dev/release/setup-ubuntu.sh / +RUN /setup-ubuntu.sh && \ + rm /setup-ubuntu.sh && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index de3f70934f70a..c1e7adf6a05e0 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -171,10 +171,10 @@ else -DARROW_GCS=${ARROW_GCS:-OFF} \ -DARROW_HDFS=${ARROW_HDFS:-ON} \ -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ - -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC:-OFF} \ -DARROW_JSON=${ARROW_JSON:-ON} \ -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ - -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \ -DARROW_ORC=${ARROW_ORC:-OFF} \ -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ @@ -225,6 +225,7 @@ else -DgRPC_SOURCE=${gRPC_SOURCE:-} \ -DGTest_SOURCE=${GTest_SOURCE:-} \ -Dlz4_SOURCE=${lz4_SOURCE:-} \ + -Dopentelemetry-cpp_SOURCE=${opentelemetry_cpp_SOURCE:-} \ -DORC_SOURCE=${ORC_SOURCE:-} \ -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \ diff --git a/ci/scripts/install_substrait_consumer.sh b/ci/scripts/install_substrait_consumer.sh index 8d3333a18f8de..2e6d299f68bf2 100755 --- a/ci/scripts/install_substrait_consumer.sh +++ b/ci/scripts/install_substrait_consumer.sh @@ -24,10 +24,8 @@ echo "Install Substrait Consumer Test Suite"; git clone https://github.com/substrait-io/consumer-testing.git cd consumer-testing # avoid installing pyarrow -grep -v 'pyarrow\|arrow-nightlies' requirements.txt | while read line -do - pip install $line -done +grep -v 'pyarrow\|arrow-nightlies' requirements.txt > requirements-no-arrow.txt +pip install -r requirements-no-arrow.txt pip install -r requirements-build.txt # setup substrait-java diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh index 8d0a343ebb443..275ef431c7d8f 100755 --- a/ci/scripts/integration_arrow.sh +++ b/ci/scripts/integration_arrow.sh @@ -26,10 +26,9 @@ gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration : ${ARROW_INTEGRATION_CPP:=ON} : ${ARROW_INTEGRATION_CSHARP:=ON} -: ${ARROW_INTEGRATION_JAVA:=ON} : ${ARROW_INTEGRATION_JS:=ON} -: ${ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS:=cpp,csharp,java,js} +: ${ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS:=cpp,csharp,js} export ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS . ${arrow_dir}/ci/scripts/util_log.sh @@ -43,7 +42,7 @@ github_actions_group_begin "Integration: Prepare: Dependencies" if [ "${ARROW_INTEGRATION_CSHARP}" == "ON" ]; then pip install pythonnet fi -if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then +if [ "${ARCHERY_INTEGRATION_WITH_JAVA}" -gt "0" ]; then pip install jpype1 fi github_actions_group_end @@ -66,7 +65,6 @@ time archery integration \ --run-flight \ --with-cpp=$([ "$ARROW_INTEGRATION_CPP" == "ON" ] && echo "1" || echo "0") \ --with-csharp=$([ "$ARROW_INTEGRATION_CSHARP" == "ON" ] && echo "1" || echo "0") \ - --with-java=$([ "$ARROW_INTEGRATION_JAVA" == "ON" ] && echo "1" || echo "0") \ --with-js=$([ "$ARROW_INTEGRATION_JS" == "ON" ] && echo "1" || echo "0") \ --gold-dirs=$gold_dir/0.14.1 \ --gold-dirs=$gold_dir/0.17.1 \ diff --git a/ci/scripts/integration_arrow_build.sh b/ci/scripts/integration_arrow_build.sh index 4dfcf8768c71f..1554865ecce3b 100755 --- a/ci/scripts/integration_arrow_build.sh +++ b/ci/scripts/integration_arrow_build.sh @@ -24,7 +24,6 @@ build_dir=${2} : ${ARROW_INTEGRATION_CPP:=ON} : ${ARROW_INTEGRATION_CSHARP:=ON} -: ${ARROW_INTEGRATION_JAVA:=ON} : ${ARROW_INTEGRATION_JS:=ON} . ${arrow_dir}/ci/scripts/util_log.sh @@ -56,12 +55,12 @@ fi github_actions_group_end github_actions_group_begin "Integration: Build: Java" -if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then +if [ "${ARCHERY_INTEGRATION_WITH_JAVA}" -gt "0" ]; then export ARROW_JAVA_CDATA="ON" export JAVA_JNI_CMAKE_ARGS="-DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF -DARROW_JAVA_JNI_ENABLE_C=ON" - ${arrow_dir}/ci/scripts/java_jni_build.sh ${arrow_dir} ${ARROW_HOME} ${build_dir} /tmp/dist/java - ${arrow_dir}/ci/scripts/java_build.sh ${arrow_dir} ${build_dir} /tmp/dist/java + ${arrow_dir}/java/ci/scripts/java_jni_build.sh "${arrow_dir}/java" "${ARROW_HOME}" "${build_dir}/java/" /tmp/dist/java + ${arrow_dir}/java/ci/scripts/java_build.sh "${arrow_dir}/java" "${build_dir}/java" /tmp/dist/java fi github_actions_group_end diff --git a/ci/scripts/integration_spark.sh b/ci/scripts/integration_spark.sh index 424ac5994653a..f7ef87a8b8f29 100755 --- a/ci/scripts/integration_spark.sh +++ b/ci/scripts/integration_spark.sh @@ -21,9 +21,6 @@ set -eu source_dir=${1} spark_dir=${2} -# Test Spark with latest PyArrow only, don't build with latest Arrow Java -test_pyarrow_only=${3:-false} - # Spark branch to checkout spark_version=${SPARK_VERSION:-master} @@ -35,45 +32,23 @@ if [ "${SPARK_VERSION:1:2}" == "2." ]; then export ARROW_PRE_0_15_IPC_FORMAT=1 fi -# Get Arrow Java version -pushd ${source_dir}/java - arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'` -popd - export MAVEN_OPTS="-Xss256m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=warn" export MAVEN_OPTS="${MAVEN_OPTS} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" pushd ${spark_dir} + echo "Building Spark ${SPARK_VERSION}" - if [ "${test_pyarrow_only}" == "true" ]; then - echo "Building Spark ${SPARK_VERSION} to test pyarrow only" - - # Build Spark only - build/mvn -B -DskipTests package - - else - - # Update Spark pom with the Arrow version just installed and build Spark, need package phase for pyspark - echo "Building Spark ${SPARK_VERSION} with Arrow ${arrow_version}" - build/mvn versions:set-property -Dproperty=arrow.version -DnewVersion=${arrow_version} - - # Build Spark with new Arrow Java - build/mvn -B -DskipTests package - - spark_scala_tests=( - "org.apache.spark.sql.execution.arrow" - "org.apache.spark.sql.execution.vectorized.ColumnarBatchSuite" - "org.apache.spark.sql.execution.vectorized.ArrowColumnVectorSuite") - - (echo "Testing Spark:"; IFS=$'\n'; echo "${spark_scala_tests[*]}") - - # TODO: should be able to only build spark-sql tests with adding "-pl sql/core" but not currently working - build/mvn -B -Dtest=none -DwildcardSuites=$(IFS=,; echo "${spark_scala_tests[*]}") test - fi + # Build Spark only + build/mvn -B -DskipTests package # Run pyarrow related Python tests only + # "pyspark.sql.tests.arrow.test_arrow_grouped_map" and + # "pyspark.sql.tests.arrow.test_arrow_cogrouped_map" currently fail. + # See: https://github.com/apache/arrow/issues/44986 spark_python_tests=( - "pyspark.sql.tests.test_arrow") + "pyspark.sql.tests.arrow.test_arrow" + "pyspark.sql.tests.arrow.test_arrow_map" + "pyspark.sql.tests.arrow.test_arrow_python_udf") case "${SPARK_VERSION}" in v1.*|v2.*|v3.0.*|v3.1.*|v3.2.*|v3.3.*) diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index 91925e7abe8b0..1eaecd6bea07d 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -144,7 +144,6 @@ cmake \ -DCMAKE_INSTALL_PREFIX=${build_dir}/install \ -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ - -DORC_SOURCE=BUNDLED \ -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} \ -DVCPKG_MANIFEST_MODE=OFF \ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh index 6365fcfacfc38..b9f4406a2d452 100755 --- a/ci/scripts/python_wheel_manylinux_build.sh +++ b/ci/scripts/python_wheel_manylinux_build.sh @@ -125,7 +125,6 @@ cmake \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ - -DORC_SOURCE=BUNDLED \ -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} \ -DVCPKG_MANIFEST_MODE=OFF \ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 8886db0e11017..2fb7b2044f54a 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -78,6 +78,7 @@ "name": "features-minimal", "hidden": true, "cacheVariables": { + "ARROW_MIMALLOC": "OFF", "ARROW_WITH_RE2": "OFF", "ARROW_WITH_UTF8PROC": "OFF" } @@ -91,7 +92,8 @@ "ARROW_CSV": "ON", "ARROW_DATASET": "ON", "ARROW_FILESYSTEM": "ON", - "ARROW_JSON": "ON" + "ARROW_JSON": "ON", + "ARROW_MIMALLOC": "ON" } }, { diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index a7bf9e59f8aa2..43e4e7603cfbf 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -362,29 +362,11 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_IPC "Build the Arrow IPC extensions" ON) - set(ARROW_JEMALLOC_DESCRIPTION "Build the Arrow jemalloc-based allocator") - if(WIN32 - OR CMAKE_SYSTEM_NAME STREQUAL "FreeBSD" - OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch|ARM|arm" - OR NOT ARROW_ENABLE_THREADING) - # jemalloc is not supported on Windows. - # - # jemalloc is the default malloc implementation on FreeBSD and can't - # be built with --disable-libdl on FreeBSD. Because lazy-lock feature - # is required on FreeBSD. Lazy-lock feature requires libdl. - # - # jemalloc may have a problem on ARM. - # See also: https://github.com/apache/arrow/issues/44342 - # - # jemalloc requires thread. - define_option(ARROW_JEMALLOC ${ARROW_JEMALLOC_DESCRIPTION} OFF) - else() - define_option(ARROW_JEMALLOC ${ARROW_JEMALLOC_DESCRIPTION} ON) - endif() + define_option(ARROW_JEMALLOC "Build the Arrow jemalloc-based allocator" OFF) define_option(ARROW_JSON "Build Arrow with JSON support (requires RapidJSON)" OFF) - define_option(ARROW_MIMALLOC "Build the Arrow mimalloc-based allocator" OFF) + define_option(ARROW_MIMALLOC "Build the Arrow mimalloc-based allocator" ON) define_option(ARROW_PARQUET "Build the Parquet libraries" diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index fd26dc7dd9c79..fdb28b540e2d2 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -163,21 +163,6 @@ if(WIN32) # insecure, like std::getenv add_definitions(-D_CRT_SECURE_NO_WARNINGS) - # Disable static assertion in Microsoft C++ standard library. - # - # """[...]\include\type_traits(1271): error C2338: - # You've instantiated std::aligned_storage with an extended - # alignment (in other words, Align > alignof(max_align_t)). - # Before VS 2017 15.8, the member type would non-conformingly have an - # alignment of only alignof(max_align_t). VS 2017 15.8 was fixed to handle - # this correctly, but the fix inherently changes layout and breaks binary - # compatibility (*only* for uses of aligned_storage with extended alignments). - # Please define either (1) _ENABLE_EXTENDED_ALIGNED_STORAGE to acknowledge - # that you understand this message and that you actually want a type with - # an extended alignment, or (2) _DISABLE_EXTENDED_ALIGNED_STORAGE to silence - # this message and get the old non-conformant behavior.""" - add_definitions(-D_ENABLE_EXTENDED_ALIGNED_STORAGE) - if(MSVC) # ARROW-1931 See https://github.com/google/googletest/issues/1318 # diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 35ad4089e7f91..f0df5a59948b7 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -738,9 +738,8 @@ if(DEFINED ENV{ARROW_ORC_URL}) set(ORC_SOURCE_URL "$ENV{ARROW_ORC_URL}") else() set_urls(ORC_SOURCE_URL - "https://www.apache.org/dyn/closer.cgi?action=download&filename=/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz" - "https://downloads.apache.org/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz" - "https://github.com/apache/orc/archive/rel/release-${ARROW_ORC_BUILD_VERSION}.tar.gz" + "https://www.apache.org/dyn/closer.lua/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz?action=download" + "https://dlcdn.apache.org/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz" ) endif() @@ -817,6 +816,7 @@ if(DEFINED ENV{ARROW_THRIFT_URL}) set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}") else() set(THRIFT_SOURCE_URL + "https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download" "https://dlcdn.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" ) endif() @@ -2061,10 +2061,14 @@ macro(build_substrait) # Missing dll-interface: list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "/wd4251") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL - "Clang") - # Protobuf generated files trigger some errors on CLANG TSAN builds - list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-error=shorten-64-to-32") + else() + # GH-44954: silence [[deprecated]] declarations in protobuf-generated code + list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-deprecated") + if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL + "Clang") + # Protobuf generated files trigger some errors on CLANG TSAN builds + list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-error=shorten-64-to-32") + endif() endif() set(SUBSTRAIT_SOURCES) @@ -2116,6 +2120,7 @@ macro(build_substrait) add_library(substrait STATIC ${SUBSTRAIT_SOURCES}) set_target_properties(substrait PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_options(substrait PRIVATE "${SUBSTRAIT_SUPPRESSED_FLAGS}") target_include_directories(substrait PUBLIC ${SUBSTRAIT_INCLUDES}) target_link_libraries(substrait PUBLIC ${ARROW_PROTOBUF_LIBPROTOBUF}) add_dependencies(substrait substrait_gen) @@ -4966,7 +4971,6 @@ if(ARROW_WITH_OPENTELEMETRY) # cURL is required whether we build from source or use an existing installation # (OTel's cmake files do not call find_curl for you) find_curl() - set(opentelemetry-cpp_SOURCE "AUTO") resolve_dependency(opentelemetry-cpp) set(ARROW_OPENTELEMETRY_LIBS opentelemetry-cpp::trace diff --git a/cpp/examples/arrow/parquet_read_write.cc b/cpp/examples/arrow/parquet_read_write.cc index a07c10fda5af8..7a2fe6f070a56 100644 --- a/cpp/examples/arrow/parquet_read_write.cc +++ b/cpp/examples/arrow/parquet_read_write.cc @@ -26,7 +26,7 @@ arrow::Status ReadFullFile(std::string path_to_file) { // #include "arrow/io/api.h" - // #include "arrow/parquet/arrow/reader.h" + // #include "parquet/arrow/reader.h" arrow::MemoryPool* pool = arrow::default_memory_pool(); std::shared_ptr input; @@ -44,7 +44,7 @@ arrow::Status ReadFullFile(std::string path_to_file) { arrow::Status ReadInBatches(std::string path_to_file) { // #include "arrow/io/api.h" - // #include "arrow/parquet/arrow/reader.h" + // #include "parquet/arrow/reader.h" arrow::MemoryPool* pool = arrow::default_memory_pool(); diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 4e40056839ce2..6e2294371e7a6 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -166,8 +166,7 @@ if(WIN32) list(APPEND ARROW_SYSTEM_LINK_LIBS "ws2_32") endif() -if(NOT WIN32 AND NOT APPLE) - # Pass -lrt on Linux only +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") list(APPEND ARROW_SYSTEM_LINK_LIBS rt) endif() @@ -674,6 +673,7 @@ set(ARROW_TESTING_SRCS testing/fixed_width_test_util.cc testing/generator.cc testing/gtest_util.cc + testing/math.cc testing/process.cc testing/random.cc testing/util.cc) @@ -726,9 +726,6 @@ set(ARROW_COMPUTE_SRCS compute/function.cc compute/function_internal.cc compute/kernel.cc - compute/key_hash_internal.cc - compute/key_map_internal.cc - compute/light_array_internal.cc compute/ordering.cc compute/registry.cc compute/kernels/chunked_internal.cc @@ -747,20 +744,7 @@ set(ARROW_COMPUTE_SRCS compute/kernels/vector_selection.cc compute/kernels/vector_selection_filter_internal.cc compute/kernels/vector_selection_internal.cc - compute/kernels/vector_selection_take_internal.cc - compute/row/encode_internal.cc - compute/row/compare_internal.cc - compute/row/grouper.cc - compute/row/row_encoder_internal.cc - compute/row/row_internal.cc - compute/util.cc - compute/util_internal.cc) - -append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc) -append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc) -append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc) -append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc) -append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc) + compute/kernels/vector_selection_take_internal.cc) if(ARROW_COMPUTE) # Include the remaining kernels @@ -793,10 +777,25 @@ if(ARROW_COMPUTE) compute/kernels/vector_replace.cc compute/kernels/vector_run_end_encode.cc compute/kernels/vector_select_k.cc - compute/kernels/vector_sort.cc) + compute/kernels/vector_sort.cc + compute/key_hash_internal.cc + compute/key_map_internal.cc + compute/light_array_internal.cc + compute/row/encode_internal.cc + compute/row/compare_internal.cc + compute/row/grouper.cc + compute/row/row_encoder_internal.cc + compute/row/row_internal.cc + compute/util.cc + compute/util_internal.cc) append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx2.cc) append_runtime_avx512_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx512.cc) + append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc) + append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc) + append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc) + append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc) + append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc) endif() arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS}) diff --git a/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in b/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in index 124cbcbf3d42e..66aa2b4078c7f 100644 --- a/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in +++ b/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in @@ -28,7 +28,6 @@ include(CMakeFindDependencyMacro) find_dependency(Arrow) -find_dependency(Parquet) include("${CMAKE_CURRENT_LIST_DIR}/ArrowAceroTargets.cmake") diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc index e3e37e249e6a3..0a56194f2a3c8 100644 --- a/cpp/src/arrow/acero/hash_join_benchmark.cc +++ b/cpp/src/arrow/acero/hash_join_benchmark.cc @@ -20,6 +20,7 @@ #include "arrow/acero/hash_join.h" #include "arrow/acero/hash_join_node.h" #include "arrow/acero/options.h" +#include "arrow/acero/swiss_join_internal.h" #include "arrow/acero/test_util_internal.h" #include "arrow/acero/util.h" #include "arrow/api.h" @@ -365,6 +366,21 @@ static void BM_HashJoinBasic_ComplexResidualFilter(benchmark::State& st, HashJoinBasicBenchmarkImpl(st, settings); } + +static void BM_HashJoinBasic_HeavyBuildPayload(benchmark::State& st) { + BenchmarkSettings settings; + settings.build_payload_types = {boolean(), fixed_size_binary(64), utf8(), + boolean(), fixed_size_binary(64), utf8()}; + settings.probe_payload_types = {int32()}; + settings.null_percentage = 0.5; + settings.cardinality = 1.0 / 16.0; + settings.num_build_batches = static_cast(st.range(0)); + settings.num_probe_batches = settings.num_build_batches; + settings.var_length_min = 64; + settings.var_length_max = 128; + + HashJoinBasicBenchmarkImpl(st, settings); +} #endif std::vector hashtable_krows = benchmark::CreateRange(1, 4096, 8); @@ -622,6 +638,10 @@ BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Full Outer", JoinType::FULL_OUTER) ->ArgNames(complex_residual_filter_argnames) ->ArgsProduct(complex_residual_filter_args); + +BENCHMARK(BM_HashJoinBasic_HeavyBuildPayload) + ->ArgNames({"HashTable krows"}) + ->ArgsProduct({benchmark::CreateRange(1, 512, 8)}); #else BENCHMARK_CAPTURE(BM_HashJoinBasic_KeyTypes, "{int32}", {int32()}) @@ -640,5 +660,106 @@ BENCHMARK(BM_HashJoinBasic_ProbeParallelism) #endif // ARROW_BUILD_DETAILED_BENCHMARKS +void RowArrayDecodeBenchmark(benchmark::State& st, const std::shared_ptr& schema, + int column_to_decode) { + auto batches = MakeRandomBatches(schema, 1, std::numeric_limits::max()); + const auto& batch = batches.batches[0]; + RowArray rows; + std::vector row_ids_encode(batch.length); + std::iota(row_ids_encode.begin(), row_ids_encode.end(), 0); + std::vector temp_column_arrays; + DCHECK_OK(rows.AppendBatchSelection( + default_memory_pool(), internal::CpuInfo::GetInstance()->hardware_flags(), batch, 0, + static_cast(batch.length), static_cast(batch.length), + row_ids_encode.data(), temp_column_arrays)); + std::vector row_ids_decode(batch.length); + // Create a random access pattern to simulate hash join. + std::default_random_engine gen(42); + std::uniform_int_distribution dist(0, + static_cast(batch.length - 1)); + std::transform(row_ids_decode.begin(), row_ids_decode.end(), row_ids_decode.begin(), + [&](uint32_t) { return dist(gen); }); + + for (auto _ : st) { + ResizableArrayData column; + // Allocate at least 8 rows for the convenience of SIMD decoding. + int log_num_rows_min = std::max(3, bit_util::Log2(batch.length)); + DCHECK_OK(column.Init(batch[column_to_decode].type(), default_memory_pool(), + log_num_rows_min)); + DCHECK_OK(rows.DecodeSelected(&column, column_to_decode, + static_cast(batch.length), row_ids_decode.data(), + default_memory_pool())); + } + st.SetItemsProcessed(st.iterations() * batch.length); +} + +static void BM_RowArray_Decode(benchmark::State& st, + const std::shared_ptr& type) { + SchemaBuilder schema_builder; + DCHECK_OK(schema_builder.AddField(field("", type))); + auto schema = *schema_builder.Finish(); + RowArrayDecodeBenchmark(st, schema, 0); +} + +BENCHMARK_CAPTURE(BM_RowArray_Decode, "boolean", boolean()); +BENCHMARK_CAPTURE(BM_RowArray_Decode, "int8", int8()); +BENCHMARK_CAPTURE(BM_RowArray_Decode, "int16", int16()); +BENCHMARK_CAPTURE(BM_RowArray_Decode, "int32", int32()); +BENCHMARK_CAPTURE(BM_RowArray_Decode, "int64", int64()); + +static void BM_RowArray_DecodeFixedSizeBinary(benchmark::State& st) { + int fixed_size = static_cast(st.range(0)); + SchemaBuilder schema_builder; + DCHECK_OK(schema_builder.AddField(field("", fixed_size_binary(fixed_size)))); + auto schema = *schema_builder.Finish(); + RowArrayDecodeBenchmark(st, schema, 0); +} + +BENCHMARK(BM_RowArray_DecodeFixedSizeBinary) + ->ArgNames({"fixed_size"}) + ->ArgsProduct({{3, 5, 6, 7, 9, 16, 42}}); + +static void BM_RowArray_DecodeBinary(benchmark::State& st) { + int max_length = static_cast(st.range(0)); + std::unordered_map metadata; + metadata["max_length"] = std::to_string(max_length); + SchemaBuilder schema_builder; + DCHECK_OK(schema_builder.AddField(field("", utf8(), key_value_metadata(metadata)))); + auto schema = *schema_builder.Finish(); + RowArrayDecodeBenchmark(st, schema, 0); +} + +BENCHMARK(BM_RowArray_DecodeBinary) + ->ArgNames({"max_length"}) + ->ArgsProduct({{32, 64, 128}}); + +static void BM_RowArray_DecodeOneOfColumns(benchmark::State& st, + std::vector> types) { + SchemaBuilder schema_builder; + for (const auto& type : types) { + DCHECK_OK(schema_builder.AddField(field("", type))); + } + auto schema = *schema_builder.Finish(); + int column_to_decode = static_cast(st.range(0)); + RowArrayDecodeBenchmark(st, schema, column_to_decode); +} + +const std::vector> fixed_length_row_column_types{ + boolean(), int32(), fixed_size_binary(64)}; +BENCHMARK_CAPTURE(BM_RowArray_DecodeOneOfColumns, + "fixed_length_row:{boolean,int32,fixed_size_binary(64)}", + fixed_length_row_column_types) + ->ArgNames({"column"}) + ->ArgsProduct( + {benchmark::CreateDenseRange(0, fixed_length_row_column_types.size() - 1, 1)}); + +const std::vector> var_length_row_column_types{ + boolean(), int32(), utf8(), utf8()}; +BENCHMARK_CAPTURE(BM_RowArray_DecodeOneOfColumns, + "var_length_row:{boolean,int32,utf8,utf8}", var_length_row_column_types) + ->ArgNames({"column"}) + ->ArgsProduct({benchmark::CreateDenseRange(0, var_length_row_column_types.size() - 1, + 1)}); + } // namespace acero } // namespace arrow diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc index 6c783110af571..53092c898eac9 100644 --- a/cpp/src/arrow/acero/swiss_join.cc +++ b/cpp/src/arrow/acero/swiss_join.cc @@ -57,150 +57,12 @@ int RowArrayAccessor::VarbinaryColumnId(const RowTableMetadata& row_metadata, return varbinary_column_id; } -int RowArrayAccessor::NumRowsToSkip(const RowTableImpl& rows, int column_id, int num_rows, - const uint32_t* row_ids, int num_tail_bytes_to_skip) { - uint32_t num_bytes_skipped = 0; - int num_rows_left = num_rows; - - bool is_fixed_length_column = - rows.metadata().column_metadatas[column_id].is_fixed_length; - - if (!is_fixed_length_column) { - // Varying length column - // - int varbinary_column_id = VarbinaryColumnId(rows.metadata(), column_id); - - while (num_rows_left > 0 && - num_bytes_skipped < static_cast(num_tail_bytes_to_skip)) { - // Find the pointer to the last requested row - // - uint32_t last_row_id = row_ids[num_rows_left - 1]; - const uint8_t* row_ptr = rows.data(2) + rows.offsets()[last_row_id]; - - // Find the length of the requested varying length field in that row - // - uint32_t field_offset_within_row, field_length; - if (varbinary_column_id == 0) { - rows.metadata().first_varbinary_offset_and_length( - row_ptr, &field_offset_within_row, &field_length); - } else { - rows.metadata().nth_varbinary_offset_and_length( - row_ptr, varbinary_column_id, &field_offset_within_row, &field_length); - } - - num_bytes_skipped += field_length; - --num_rows_left; - } - } else { - // Fixed length column - // - uint32_t field_length = rows.metadata().column_metadatas[column_id].fixed_length; - uint32_t num_bytes_skipped = 0; - while (num_rows_left > 0 && - num_bytes_skipped < static_cast(num_tail_bytes_to_skip)) { - num_bytes_skipped += field_length; - --num_rows_left; - } - } - - return num_rows - num_rows_left; -} - -template -void RowArrayAccessor::Visit(const RowTableImpl& rows, int column_id, int num_rows, - const uint32_t* row_ids, PROCESS_VALUE_FN process_value_fn) { - bool is_fixed_length_column = - rows.metadata().column_metadatas[column_id].is_fixed_length; - - // There are 4 cases, each requiring different steps: - // 1. Varying length column that is the first varying length column in a row - // 2. Varying length column that is not the first varying length column in a - // row - // 3. Fixed length column in a fixed length row - // 4. Fixed length column in a varying length row - - if (!is_fixed_length_column) { - int varbinary_column_id = VarbinaryColumnId(rows.metadata(), column_id); - const uint8_t* row_ptr_base = rows.data(2); - const RowTableImpl::offset_type* row_offsets = rows.offsets(); - uint32_t field_offset_within_row, field_length; - - if (varbinary_column_id == 0) { - // Case 1: This is the first varbinary column - // - for (int i = 0; i < num_rows; ++i) { - uint32_t row_id = row_ids[i]; - const uint8_t* row_ptr = row_ptr_base + row_offsets[row_id]; - rows.metadata().first_varbinary_offset_and_length( - row_ptr, &field_offset_within_row, &field_length); - process_value_fn(i, row_ptr + field_offset_within_row, field_length); - } - } else { - // Case 2: This is second or later varbinary column - // - for (int i = 0; i < num_rows; ++i) { - uint32_t row_id = row_ids[i]; - const uint8_t* row_ptr = row_ptr_base + row_offsets[row_id]; - rows.metadata().nth_varbinary_offset_and_length( - row_ptr, varbinary_column_id, &field_offset_within_row, &field_length); - process_value_fn(i, row_ptr + field_offset_within_row, field_length); - } - } - } - - if (is_fixed_length_column) { - uint32_t field_offset_within_row = rows.metadata().encoded_field_offset( - rows.metadata().pos_after_encoding(column_id)); - uint32_t field_length = rows.metadata().column_metadatas[column_id].fixed_length; - // Bit column is encoded as a single byte - // - if (field_length == 0) { - field_length = 1; - } - uint32_t row_length = rows.metadata().fixed_length; - - bool is_fixed_length_row = rows.metadata().is_fixed_length; - if (is_fixed_length_row) { - // Case 3: This is a fixed length column in a fixed length row - // - const uint8_t* row_ptr_base = rows.data(1) + field_offset_within_row; - for (int i = 0; i < num_rows; ++i) { - uint32_t row_id = row_ids[i]; - const uint8_t* row_ptr = row_ptr_base + row_length * row_id; - process_value_fn(i, row_ptr, field_length); - } - } else { - // Case 4: This is a fixed length column in a varying length row - // - const uint8_t* row_ptr_base = rows.data(2) + field_offset_within_row; - const RowTableImpl::offset_type* row_offsets = rows.offsets(); - for (int i = 0; i < num_rows; ++i) { - uint32_t row_id = row_ids[i]; - const uint8_t* row_ptr = row_ptr_base + row_offsets[row_id]; - process_value_fn(i, row_ptr, field_length); - } - } - } -} - -template -void RowArrayAccessor::VisitNulls(const RowTableImpl& rows, int column_id, int num_rows, - const uint32_t* row_ids, - PROCESS_VALUE_FN process_value_fn) { - const uint8_t* null_masks = rows.null_masks(); - uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row; - uint32_t pos_after_encoding = rows.metadata().pos_after_encoding(column_id); - for (int i = 0; i < num_rows; ++i) { - uint32_t row_id = row_ids[i]; - int64_t bit_id = row_id * null_mask_num_bytes * 8 + pos_after_encoding; - process_value_fn(i, bit_util::GetBit(null_masks, bit_id) ? 0xff : 0); - } -} - -Status RowArray::InitIfNeeded(MemoryPool* pool, const RowTableMetadata& row_metadata) { +Status RowArray::InitIfNeeded(MemoryPool* pool, int64_t hardware_flags, + const RowTableMetadata& row_metadata) { if (is_initialized_) { return Status::OK(); } + hardware_flags_ = hardware_flags; encoder_.Init(row_metadata.column_metadatas, sizeof(uint64_t), sizeof(uint64_t)); RETURN_NOT_OK(rows_temp_.Init(pool, row_metadata)); RETURN_NOT_OK(rows_.Init(pool, row_metadata)); @@ -208,7 +70,8 @@ Status RowArray::InitIfNeeded(MemoryPool* pool, const RowTableMetadata& row_meta return Status::OK(); } -Status RowArray::InitIfNeeded(MemoryPool* pool, const ExecBatch& batch) { +Status RowArray::InitIfNeeded(MemoryPool* pool, int64_t hardware_flags, + const ExecBatch& batch) { if (is_initialized_) { return Status::OK(); } @@ -218,14 +81,15 @@ Status RowArray::InitIfNeeded(MemoryPool* pool, const ExecBatch& batch) { row_metadata.FromColumnMetadataVector(column_metadatas, sizeof(uint64_t), sizeof(uint64_t)); - return InitIfNeeded(pool, row_metadata); + return InitIfNeeded(pool, hardware_flags, row_metadata); } -Status RowArray::AppendBatchSelection(MemoryPool* pool, const ExecBatch& batch, - int begin_row_id, int end_row_id, int num_row_ids, +Status RowArray::AppendBatchSelection(MemoryPool* pool, int64_t hardware_flags, + const ExecBatch& batch, int begin_row_id, + int end_row_id, int num_row_ids, const uint16_t* row_ids, std::vector& temp_column_arrays) { - RETURN_NOT_OK(InitIfNeeded(pool, batch)); + RETURN_NOT_OK(InitIfNeeded(pool, hardware_flags, batch)); RETURN_NOT_OK(ColumnArraysFromExecBatch(batch, begin_row_id, end_row_id - begin_row_id, &temp_column_arrays)); encoder_.PrepareEncodeSelected( @@ -238,7 +102,7 @@ Status RowArray::AppendBatchSelection(MemoryPool* pool, const ExecBatch& batch, void RowArray::Compare(const ExecBatch& batch, int begin_row_id, int end_row_id, int num_selected, const uint16_t* batch_selection_maybe_null, const uint32_t* array_row_ids, uint32_t* out_num_not_equal, - uint16_t* out_not_equal_selection, int64_t hardware_flags, + uint16_t* out_not_equal_selection, arrow::util::TempVectorStack* temp_stack, std::vector& temp_column_arrays, uint8_t* out_match_bitvector_maybe_null) { @@ -247,7 +111,7 @@ void RowArray::Compare(const ExecBatch& batch, int begin_row_id, int end_row_id, ARROW_DCHECK(status.ok()); LightContext ctx; - ctx.hardware_flags = hardware_flags; + ctx.hardware_flags = hardware_flags_; ctx.stack = temp_stack; KeyCompare::CompareColumnsToRows( num_selected, batch_selection_maybe_null, array_row_ids, &ctx, out_num_not_equal, @@ -259,6 +123,25 @@ Status RowArray::DecodeSelected(ResizableArrayData* output, int column_id, int num_rows_to_append, const uint32_t* row_ids, MemoryPool* pool) const { int num_rows_before = output->num_rows(); +#ifdef ARROW_HAVE_RUNTIME_AVX2 + // Preprocess some rows if necessary to assure that AVX2 version sees 8-row aligned + // output address. + if ((hardware_flags_ & arrow::internal::CpuInfo::AVX2) && (num_rows_before % 8 != 0) && + (num_rows_to_append >= 8)) { + int num_rows_to_preprocess = 8 - num_rows_before % 8; + // The output must have allocated enough rows to store this few number of preprocessed + // rows without costly resizing the internal buffers. + DCHECK_GE(output->num_rows_allocated(), num_rows_before + num_rows_to_preprocess); + RETURN_NOT_OK( + DecodeSelected(output, column_id, num_rows_to_preprocess, row_ids, pool)); + return DecodeSelected(output, column_id, num_rows_to_append - num_rows_to_preprocess, + row_ids + num_rows_to_preprocess, pool); + } + + bool use_avx2 = + (hardware_flags_ & arrow::internal::CpuInfo::AVX2) && (num_rows_before % 8 == 0); +#endif + RETURN_NOT_OK(output->ResizeFixedLengthBuffers(num_rows_before + num_rows_to_append)); // Both input (KeyRowArray) and output (ResizableArrayData) have buffers with @@ -267,98 +150,59 @@ Status RowArray::DecodeSelected(ResizableArrayData* output, int column_id, // ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata column_metadata, output->column_metadata()); + int num_rows_processed = 0; if (column_metadata.is_fixed_length) { uint32_t fixed_length = column_metadata.fixed_length; - switch (fixed_length) { - case 0: - RowArrayAccessor::Visit(rows_, column_id, num_rows_to_append, row_ids, - [&](int i, const uint8_t* ptr, uint32_t num_bytes) { - bit_util::SetBitTo(output->mutable_data(1), - num_rows_before + i, *ptr != 0); - }); - break; - case 1: - RowArrayAccessor::Visit(rows_, column_id, num_rows_to_append, row_ids, - [&](int i, const uint8_t* ptr, uint32_t num_bytes) { - output->mutable_data(1)[num_rows_before + i] = *ptr; - }); - break; - case 2: - RowArrayAccessor::Visit( - rows_, column_id, num_rows_to_append, row_ids, - [&](int i, const uint8_t* ptr, uint32_t num_bytes) { - reinterpret_cast(output->mutable_data(1))[num_rows_before + i] = - *reinterpret_cast(ptr); - }); - break; - case 4: - RowArrayAccessor::Visit( - rows_, column_id, num_rows_to_append, row_ids, - [&](int i, const uint8_t* ptr, uint32_t num_bytes) { - reinterpret_cast(output->mutable_data(1))[num_rows_before + i] = - *reinterpret_cast(ptr); - }); - break; - case 8: - RowArrayAccessor::Visit( - rows_, column_id, num_rows_to_append, row_ids, - [&](int i, const uint8_t* ptr, uint32_t num_bytes) { - reinterpret_cast(output->mutable_data(1))[num_rows_before + i] = - *reinterpret_cast(ptr); - }); - break; - default: - RowArrayAccessor::Visit( - rows_, column_id, num_rows_to_append, row_ids, - [&](int i, const uint8_t* ptr, uint32_t num_bytes) { - uint64_t* dst = reinterpret_cast( - output->mutable_data(1) + num_bytes * (num_rows_before + i)); - const uint64_t* src = reinterpret_cast(ptr); - for (uint32_t word_id = 0; - word_id < bit_util::CeilDiv(num_bytes, sizeof(uint64_t)); ++word_id) { - arrow::util::SafeStore(dst + word_id, - arrow::util::SafeLoad(src + word_id)); - } - }); - break; + + // Process fixed length columns + // +#ifdef ARROW_HAVE_RUNTIME_AVX2 + if (use_avx2) { + num_rows_processed = DecodeFixedLength_avx2( + output, num_rows_before, column_id, fixed_length, num_rows_to_append, row_ids); } +#endif + DecodeFixedLength(output, num_rows_before + num_rows_processed, column_id, + fixed_length, num_rows_to_append - num_rows_processed, + row_ids + num_rows_processed); } else { - uint32_t* offsets = - reinterpret_cast(output->mutable_data(1)) + num_rows_before; - uint32_t sum = num_rows_before == 0 ? 0 : offsets[0]; - RowArrayAccessor::Visit( - rows_, column_id, num_rows_to_append, row_ids, - [&](int i, const uint8_t* ptr, uint32_t num_bytes) { offsets[i] = num_bytes; }); - for (int i = 0; i < num_rows_to_append; ++i) { - uint32_t length = offsets[i]; - offsets[i] = sum; - sum += length; - } - offsets[num_rows_to_append] = sum; + // Process offsets for varying length columns + // +#ifdef ARROW_HAVE_RUNTIME_AVX2 + if (use_avx2) { + num_rows_processed = DecodeOffsets_avx2(output, num_rows_before, column_id, + num_rows_to_append, row_ids); + } +#endif + DecodeOffsets(output, num_rows_before + num_rows_processed, column_id, + num_rows_to_append - num_rows_processed, row_ids + num_rows_processed); + RETURN_NOT_OK(output->ResizeVaryingLengthBuffer()); - RowArrayAccessor::Visit( - rows_, column_id, num_rows_to_append, row_ids, - [&](int i, const uint8_t* ptr, uint32_t num_bytes) { - uint64_t* dst = reinterpret_cast( - output->mutable_data(2) + - reinterpret_cast( - output->mutable_data(1))[num_rows_before + i]); - const uint64_t* src = reinterpret_cast(ptr); - for (uint32_t word_id = 0; - word_id < bit_util::CeilDiv(num_bytes, sizeof(uint64_t)); ++word_id) { - arrow::util::SafeStore(dst + word_id, - arrow::util::SafeLoad(src + word_id)); - } - }); + + // Process data for varying length columns + // +#ifdef ARROW_HAVE_RUNTIME_AVX2 + if (use_avx2) { + num_rows_processed = DecodeVarLength_avx2(output, num_rows_before, column_id, + num_rows_to_append, row_ids); + } +#endif + DecodeVarLength(output, num_rows_before + num_rows_processed, column_id, + num_rows_to_append - num_rows_processed, + row_ids + num_rows_processed); } // Process nulls // - RowArrayAccessor::VisitNulls( - rows_, column_id, num_rows_to_append, row_ids, [&](int i, uint8_t value) { - bit_util::SetBitTo(output->mutable_data(0), num_rows_before + i, value == 0); - }); +#ifdef ARROW_HAVE_RUNTIME_AVX2 + if (use_avx2) { + num_rows_processed = + DecodeNulls_avx2(output, num_rows_before, column_id, num_rows_to_append, row_ids); + } +#endif + DecodeNulls(output, num_rows_before + num_rows_processed, column_id, + num_rows_to_append - num_rows_processed, row_ids + num_rows_processed); return Status::OK(); } @@ -437,16 +281,125 @@ void RowArray::DebugPrintToFile(const char* filename, bool print_sorted) const { } } +void RowArray::DecodeFixedLength(ResizableArrayData* output, int output_start_row, + int column_id, uint32_t fixed_length, + int num_rows_to_append, const uint32_t* row_ids) const { + switch (fixed_length) { + case 0: + RowArrayAccessor::Visit(rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* ptr, uint32_t num_bytes) { + bit_util::SetBitTo(output->mutable_data(1), + output_start_row + i, *ptr != 0); + }); + break; + case 1: + RowArrayAccessor::Visit(rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* ptr, uint32_t num_bytes) { + output->mutable_data(1)[output_start_row + i] = *ptr; + }); + break; + case 2: + RowArrayAccessor::Visit( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* ptr, uint32_t num_bytes) { + output->mutable_data_as(1)[output_start_row + i] = + *reinterpret_cast(ptr); + }); + break; + case 4: + RowArrayAccessor::Visit( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* ptr, uint32_t num_bytes) { + output->mutable_data_as(1)[output_start_row + i] = + *reinterpret_cast(ptr); + }); + break; + case 8: + RowArrayAccessor::Visit( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* ptr, uint32_t num_bytes) { + output->mutable_data_as(1)[output_start_row + i] = + *reinterpret_cast(ptr); + }); + break; + default: + RowArrayAccessor::Visit( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* ptr, uint32_t num_bytes) { + uint64_t* dst = reinterpret_cast( + output->mutable_data(1) + num_bytes * (output_start_row + i)); + const uint64_t* src = reinterpret_cast(ptr); + // Note that both `output` and `ptr` have been allocated with enough padding + // to accommodate the memory overshoot. See the allocations for + // `ResizableArrayData` in `JoinResultMaterialize` and `JoinResidualFilter` + // for `output`, and `RowTableImpl::kPaddingForVectors` for `ptr`. + for (uint32_t word_id = 0; + word_id < bit_util::CeilDiv(num_bytes, sizeof(uint64_t)); ++word_id) { + arrow::util::SafeStore(dst + word_id, + arrow::util::SafeLoad(src + word_id)); + } + }); + break; + } +} + +void RowArray::DecodeOffsets(ResizableArrayData* output, int output_start_row, + int column_id, int num_rows_to_append, + const uint32_t* row_ids) const { + uint32_t* offsets = output->mutable_data_as(1) + output_start_row; + uint32_t sum = (output_start_row == 0) ? 0 : offsets[0]; + RowArrayAccessor::Visit( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* ptr, uint32_t num_bytes) { offsets[i] = num_bytes; }); + for (int i = 0; i < num_rows_to_append; ++i) { + uint32_t length = offsets[i]; + offsets[i] = sum; + sum += length; + } + offsets[num_rows_to_append] = sum; +} + +void RowArray::DecodeVarLength(ResizableArrayData* output, int output_start_row, + int column_id, int num_rows_to_append, + const uint32_t* row_ids) const { + RowArrayAccessor::Visit( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* ptr, uint32_t num_bytes) { + uint64_t* dst = reinterpret_cast( + output->mutable_data(2) + + output->mutable_data_as(1)[output_start_row + i]); + const uint64_t* src = reinterpret_cast(ptr); + // Note that both `output` and `ptr` have been allocated with enough padding to + // accommodate the memory overshoot. See the allocations for `ResizableArrayData` + // in `JoinResultMaterialize` and `JoinResidualFilter` for `output`, and + // `RowTableImpl::kPaddingForVectors` for `ptr`. + for (uint32_t word_id = 0; + word_id < bit_util::CeilDiv(num_bytes, sizeof(uint64_t)); ++word_id) { + arrow::util::SafeStore(dst + word_id, + arrow::util::SafeLoad(src + word_id)); + } + }); +} + +void RowArray::DecodeNulls(ResizableArrayData* output, int output_start_row, + int column_id, int num_rows_to_append, + const uint32_t* row_ids) const { + RowArrayAccessor::VisitNulls( + rows_, column_id, num_rows_to_append, row_ids, [&](int i, uint8_t value) { + bit_util::SetBitTo(output->mutable_data(0), output_start_row + i, value == 0); + }); +} + Status RowArrayMerge::PrepareForMerge(RowArray* target, const std::vector& sources, std::vector* first_target_row_id, - MemoryPool* pool) { + MemoryPool* pool, int64_t hardware_flags) { ARROW_DCHECK(!sources.empty()); ARROW_DCHECK(sources[0]->is_initialized_); const RowTableMetadata& metadata = sources[0]->rows_.metadata(); ARROW_DCHECK(!target->is_initialized_); - RETURN_NOT_OK(target->InitIfNeeded(pool, metadata)); + RETURN_NOT_OK(target->InitIfNeeded(pool, hardware_flags, metadata)); // Sum the number of rows from all input sources and calculate their total // size. @@ -895,8 +848,8 @@ void SwissTableWithKeys::EqualCallback(int num_keys, const uint16_t* selection_m uint8_t* match_bitvector = match_bitvector_buf.mutable_data(); keys_.Compare(*in->batch, batch_start_to_use, batch_end_to_use, num_keys, - selection_to_use, group_ids_to_use, nullptr, nullptr, hardware_flags, - in->temp_stack, *in->temp_column_arrays, match_bitvector); + selection_to_use, group_ids_to_use, nullptr, nullptr, in->temp_stack, + *in->temp_column_arrays, match_bitvector); if (selection_maybe_null) { int num_keys_mismatch = 0; @@ -918,8 +871,7 @@ void SwissTableWithKeys::EqualCallback(int num_keys, const uint16_t* selection_m group_ids_to_use = group_ids; keys_.Compare(*in->batch, batch_start_to_use, batch_end_to_use, num_keys, selection_to_use, group_ids_to_use, out_num_keys_mismatch, - out_selection_mismatch, hardware_flags, in->temp_stack, - *in->temp_column_arrays); + out_selection_mismatch, in->temp_stack, *in->temp_column_arrays); } } @@ -944,16 +896,18 @@ Status SwissTableWithKeys::AppendCallback(int num_keys, const uint16_t* selectio batch_end_to_use = static_cast(in->batch->length); selection_to_use = selection_to_use_buf.mutable_data(); - return keys_.AppendBatchSelection(swiss_table_.pool(), *in->batch, batch_start_to_use, - batch_end_to_use, num_keys, selection_to_use, + return keys_.AppendBatchSelection(swiss_table_.pool(), swiss_table_.hardware_flags(), + *in->batch, batch_start_to_use, batch_end_to_use, + num_keys, selection_to_use, *in->temp_column_arrays); } else { batch_start_to_use = in->batch_start_row; batch_end_to_use = in->batch_end_row; selection_to_use = selection; - return keys_.AppendBatchSelection(swiss_table_.pool(), *in->batch, batch_start_to_use, - batch_end_to_use, num_keys, selection_to_use, + return keys_.AppendBatchSelection(swiss_table_.pool(), swiss_table_.hardware_flags(), + *in->batch, batch_start_to_use, batch_end_to_use, + num_keys, selection_to_use, *in->temp_column_arrays); } } @@ -1177,8 +1131,10 @@ Status SwissTableForJoinBuild::Init(SwissTableForJoin* target, int dop, int64_t for (int i = 0; i < num_prtns_; ++i) { PartitionState& prtn_state = prtn_states_[i]; RETURN_NOT_OK(prtn_state.keys.Init(hardware_flags_, pool_)); - RETURN_NOT_OK(prtn_state.keys.keys()->InitIfNeeded(pool, key_row_metadata)); - RETURN_NOT_OK(prtn_state.payloads.InitIfNeeded(pool, payload_row_metadata)); + RETURN_NOT_OK( + prtn_state.keys.keys()->InitIfNeeded(pool, hardware_flags, key_row_metadata)); + RETURN_NOT_OK( + prtn_state.payloads.InitIfNeeded(pool, hardware_flags, payload_row_metadata)); } target_->dop_ = dop_; @@ -1294,7 +1250,7 @@ Status SwissTableForJoinBuild::ProcessPartition(int64_t thread_id, if (!no_payload_) { ARROW_DCHECK(payload_batch_maybe_null); RETURN_NOT_OK(prtn_state.payloads.AppendBatchSelection( - pool_, *payload_batch_maybe_null, 0, + pool_, hardware_flags_, *payload_batch_maybe_null, 0, static_cast(payload_batch_maybe_null->length), num_rows_new, row_ids, locals.temp_column_arrays)); } @@ -1324,7 +1280,8 @@ Status SwissTableForJoinBuild::PreparePrtnMerge() { partition_keys[i] = prtn_states_[i].keys.keys(); } RETURN_NOT_OK(RowArrayMerge::PrepareForMerge(target_->map_.keys(), partition_keys, - &partition_keys_first_row_id_, pool_)); + &partition_keys_first_row_id_, pool_, + hardware_flags_)); // 2. SwissTable: // @@ -1346,8 +1303,8 @@ Status SwissTableForJoinBuild::PreparePrtnMerge() { partition_payloads[i] = &prtn_states_[i].payloads; } RETURN_NOT_OK(RowArrayMerge::PrepareForMerge(&target_->payloads_, partition_payloads, - &partition_payloads_first_row_id_, - pool_)); + &partition_payloads_first_row_id_, pool_, + hardware_flags_)); } // Check if we have duplicate keys @@ -1499,7 +1456,7 @@ void SwissTableForJoinBuild::FinishPrtnMerge(arrow::util::TempVectorStack* temp_ LightContext ctx; ctx.hardware_flags = hardware_flags_; ctx.stack = temp_stack; - std::ignore = target_->map_.keys()->rows_.has_any_nulls(&ctx); + target_->map_.keys()->EnsureHasAnyNullsComputed(ctx); } void JoinResultMaterialize::Init(MemoryPool* pool, @@ -1667,7 +1624,9 @@ Result> JoinResultMaterialize::FlushBuildColumn( const std::shared_ptr& data_type, const RowArray* row_array, int column_id, uint32_t* row_ids) { ResizableArrayData output; - RETURN_NOT_OK(output.Init(data_type, pool_, bit_util::Log2(num_rows_))); + // Allocate at least 8 rows for the convenience of SIMD decoding. + int log_num_rows_min = std::max(3, bit_util::Log2(num_rows_)); + RETURN_NOT_OK(output.Init(data_type, pool_, log_num_rows_min)); for (size_t i = 0; i <= null_ranges_.size(); ++i) { int row_id_begin = @@ -2247,9 +2206,11 @@ Result JoinResidualFilter::MaterializeFilterInput( build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::PAYLOAD); for (int i = 0; i < num_build_cols; ++i) { ResizableArrayData column_data; + // Allocate at least 8 rows for the convenience of SIMD decoding. + int log_num_rows_min = std::max(3, bit_util::Log2(num_batch_rows)); RETURN_NOT_OK( column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i), - pool_, bit_util::Log2(num_batch_rows))); + pool_, log_num_rows_min)); if (auto idx = to_key.get(i); idx != SchemaProjectionMap::kMissingField) { RETURN_NOT_OK(build_keys_->DecodeSelected(&column_data, idx, num_batch_rows, key_ids_maybe_null, pool_)); diff --git a/cpp/src/arrow/acero/swiss_join_avx2.cc b/cpp/src/arrow/acero/swiss_join_avx2.cc index 1076073523448..20886cad539c3 100644 --- a/cpp/src/arrow/acero/swiss_join_avx2.cc +++ b/cpp/src/arrow/acero/swiss_join_avx2.cc @@ -32,7 +32,7 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu // Number of rows processed together in a single iteration of the loop (single // call to the provided processing lambda). // - constexpr int unroll = 8; + constexpr int kUnroll = 8; bool is_fixed_length_column = rows.metadata().column_metadatas[column_id].is_fixed_length; @@ -48,6 +48,8 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu int varbinary_column_id = VarbinaryColumnId(rows.metadata(), column_id); const uint8_t* row_ptr_base = rows.data(2); const RowTableImpl::offset_type* row_offsets = rows.offsets(); + auto row_offsets_i64 = + reinterpret_cast(row_offsets); static_assert( sizeof(RowTableImpl::offset_type) == sizeof(int64_t), "RowArrayAccessor::Visit_avx2 only supports 64-bit RowTableImpl::offset_type"); @@ -58,17 +60,17 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu __m256i field_offset_within_row = _mm256_set1_epi32(rows.metadata().fixed_length); __m256i varbinary_end_array_offset = _mm256_set1_epi64x(rows.metadata().varbinary_end_array_offset); - for (int i = 0; i < num_rows / unroll; ++i) { + for (int i = 0; i < num_rows / kUnroll; ++i) { // Load 8 32-bit row ids. __m256i row_id = _mm256_loadu_si256(reinterpret_cast(row_ids) + i); // Gather the lower/higher 4 64-bit row offsets based on the lower/higher 4 32-bit // row ids. __m256i row_offset_lo = - _mm256_i32gather_epi64(row_offsets, _mm256_castsi256_si128(row_id), + _mm256_i32gather_epi64(row_offsets_i64, _mm256_castsi256_si128(row_id), sizeof(RowTableImpl::offset_type)); __m256i row_offset_hi = - _mm256_i32gather_epi64(row_offsets, _mm256_extracti128_si256(row_id, 1), + _mm256_i32gather_epi64(row_offsets_i64, _mm256_extracti128_si256(row_id, 1), sizeof(RowTableImpl::offset_type)); // Gather the lower/higher 4 32-bit field lengths based on the lower/higher 4 // 64-bit row offsets. @@ -81,7 +83,7 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu // The final 8 32-bit field lengths, subtracting the field offset within row. __m256i field_length = _mm256_sub_epi32( _mm256_set_m128i(field_length_hi, field_length_lo), field_offset_within_row); - process_8_values_fn(i * unroll, row_ptr_base, + process_8_values_fn(i * kUnroll, row_ptr_base, _mm256_add_epi64(row_offset_lo, field_offset_within_row), _mm256_add_epi64(row_offset_hi, field_offset_within_row), field_length); @@ -94,19 +96,19 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu sizeof(uint32_t) * (varbinary_column_id - 1)); auto row_ptr_base_i64 = reinterpret_cast(row_ptr_base); - for (int i = 0; i < num_rows / unroll; ++i) { + for (int i = 0; i < num_rows / kUnroll; ++i) { // Load 8 32-bit row ids. __m256i row_id = _mm256_loadu_si256(reinterpret_cast(row_ids) + i); // Gather the lower/higher 4 64-bit row offsets based on the lower/higher 4 32-bit // row ids. __m256i row_offset_lo = - _mm256_i32gather_epi64(row_offsets, _mm256_castsi256_si128(row_id), + _mm256_i32gather_epi64(row_offsets_i64, _mm256_castsi256_si128(row_id), sizeof(RowTableImpl::offset_type)); // Gather the lower/higher 4 32-bit field lengths based on the lower/higher 4 // 64-bit row offsets. __m256i row_offset_hi = - _mm256_i32gather_epi64(row_offsets, _mm256_extracti128_si256(row_id, 1), + _mm256_i32gather_epi64(row_offsets_i64, _mm256_extracti128_si256(row_id, 1), sizeof(RowTableImpl::offset_type)); // Prepare the lower/higher 4 64-bit end array offsets based on the lower/higher 4 // 64-bit row offsets. @@ -127,8 +129,8 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu __m256i field_offset_within_row = _mm256_blend_epi32( field_offset_within_row_A, field_offset_within_row_B, 0xf0); - __m256i alignment_padding = - _mm256_andnot_si256(field_offset_within_row, _mm256_set1_epi8(0xff)); + __m256i alignment_padding = _mm256_andnot_si256( + field_offset_within_row, _mm256_set1_epi8(static_cast(0xff))); alignment_padding = _mm256_add_epi32(alignment_padding, _mm256_set1_epi32(1)); alignment_padding = _mm256_and_si256( alignment_padding, _mm256_set1_epi32(rows.metadata().string_alignment - 1)); @@ -147,7 +149,7 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu field_offset_within_row_B = _mm256_add_epi32(field_offset_within_row_B, alignment_padding); - process_8_values_fn(i * unroll, row_ptr_base, + process_8_values_fn(i * kUnroll, row_ptr_base, _mm256_add_epi64(row_offset_lo, field_offset_within_row_A), _mm256_add_epi64(row_offset_hi, field_offset_within_row_B), field_length); @@ -159,15 +161,21 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu __m256i field_offset_within_row = _mm256_set1_epi64x(rows.metadata().encoded_field_offset( rows.metadata().pos_after_encoding(column_id))); - __m256i field_length = - _mm256_set1_epi32(rows.metadata().column_metadatas[column_id].fixed_length); + uint32_t actual_field_length = + rows.metadata().column_metadatas[column_id].fixed_length; + // Bit column is encoded as a single byte + if (actual_field_length == 0) { + actual_field_length = 1; + } + __m256i field_length = _mm256_set1_epi32(actual_field_length); + __m256i row_length = _mm256_set1_epi64x(rows.metadata().fixed_length); bool is_fixed_length_row = rows.metadata().is_fixed_length; if (is_fixed_length_row) { // Case 3: This is a fixed length column in fixed length row // const uint8_t* row_ptr_base = rows.data(1); - for (int i = 0; i < num_rows / unroll; ++i) { + for (int i = 0; i < num_rows / kUnroll; ++i) { // Load 8 32-bit row ids. __m256i row_id = _mm256_loadu_si256(reinterpret_cast(row_ids) + i); @@ -177,15 +185,15 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu __m256i row_id_hi = _mm256_cvtepi32_epi64(_mm256_extracti128_si256(row_id, 1)); // Calculate the lower/higher 4 64-bit row offsets based on the lower/higher 4 // 64-bit row ids and the fixed field length. - __m256i row_offset_lo = _mm256_mul_epi32(row_id_lo, field_length); - __m256i row_offset_hi = _mm256_mul_epi32(row_id_hi, field_length); + __m256i row_offset_lo = _mm256_mul_epi32(row_id_lo, row_length); + __m256i row_offset_hi = _mm256_mul_epi32(row_id_hi, row_length); // Calculate the lower/higher 4 64-bit field offsets based on the lower/higher 4 // 64-bit row offsets and field offset within row. __m256i field_offset_lo = _mm256_add_epi64(row_offset_lo, field_offset_within_row); __m256i field_offset_hi = _mm256_add_epi64(row_offset_hi, field_offset_within_row); - process_8_values_fn(i * unroll, row_ptr_base, field_offset_lo, field_offset_hi, + process_8_values_fn(i * kUnroll, row_ptr_base, field_offset_lo, field_offset_hi, field_length); } } else { @@ -193,17 +201,19 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu // const uint8_t* row_ptr_base = rows.data(2); const RowTableImpl::offset_type* row_offsets = rows.offsets(); - for (int i = 0; i < num_rows / unroll; ++i) { + auto row_offsets_i64 = + reinterpret_cast(row_offsets); + for (int i = 0; i < num_rows / kUnroll; ++i) { // Load 8 32-bit row ids. __m256i row_id = _mm256_loadu_si256(reinterpret_cast(row_ids) + i); // Gather the lower/higher 4 64-bit row offsets based on the lower/higher 4 32-bit // row ids. __m256i row_offset_lo = - _mm256_i32gather_epi64(row_offsets, _mm256_castsi256_si128(row_id), + _mm256_i32gather_epi64(row_offsets_i64, _mm256_castsi256_si128(row_id), sizeof(RowTableImpl::offset_type)); __m256i row_offset_hi = - _mm256_i32gather_epi64(row_offsets, _mm256_extracti128_si256(row_id, 1), + _mm256_i32gather_epi64(row_offsets_i64, _mm256_extracti128_si256(row_id, 1), sizeof(RowTableImpl::offset_type)); // Calculate the lower/higher 4 64-bit field offsets based on the lower/higher 4 // 64-bit row offsets and field offset within row. @@ -211,13 +221,13 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu _mm256_add_epi64(row_offset_lo, field_offset_within_row); __m256i field_offset_hi = _mm256_add_epi64(row_offset_hi, field_offset_within_row); - process_8_values_fn(i * unroll, row_ptr_base, field_offset_lo, field_offset_hi, + process_8_values_fn(i * kUnroll, row_ptr_base, field_offset_lo, field_offset_hi, field_length); } } } - return num_rows - (num_rows % unroll); + return num_rows - (num_rows % kUnroll); } template @@ -227,31 +237,296 @@ int RowArrayAccessor::VisitNulls_avx2(const RowTableImpl& rows, int column_id, // Number of rows processed together in a single iteration of the loop (single // call to the provided processing lambda). // - constexpr int unroll = 8; + constexpr int kUnroll = 8; const uint8_t* null_masks = rows.null_masks(); __m256i null_bits_per_row = _mm256_set1_epi32(8 * rows.metadata().null_masks_bytes_per_row); - for (int i = 0; i < num_rows / unroll; ++i) { + __m256i pos_after_encoding = + _mm256_set1_epi32(rows.metadata().pos_after_encoding(column_id)); + for (int i = 0; i < num_rows / kUnroll; ++i) { __m256i row_id = _mm256_loadu_si256(reinterpret_cast(row_ids) + i); __m256i bit_id = _mm256_mullo_epi32(row_id, null_bits_per_row); - bit_id = _mm256_add_epi32(bit_id, _mm256_set1_epi32(column_id)); + bit_id = _mm256_add_epi32(bit_id, pos_after_encoding); __m256i bytes = _mm256_i32gather_epi32(reinterpret_cast(null_masks), _mm256_srli_epi32(bit_id, 3), 1); __m256i bit_in_word = _mm256_sllv_epi32( _mm256_set1_epi32(1), _mm256_and_si256(bit_id, _mm256_set1_epi32(7))); + // `result` will contain one 32-bit word per tested null bit, either 0xffffffff if the + // null bit was set or 0 if it was unset. __m256i result = _mm256_cmpeq_epi32(_mm256_and_si256(bytes, bit_in_word), bit_in_word); - uint64_t null_bytes = static_cast( + // NB: Be careful about sign-extension when casting the return value of + // _mm256_movemask_epi8 (signed 32-bit) to unsigned 64-bit, which will pollute the + // higher bits of the following OR. + uint32_t null_bytes_lo = static_cast( _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_castsi256_si128(result)))); - null_bytes |= static_cast(_mm256_movemask_epi8( - _mm256_cvtepi32_epi64(_mm256_extracti128_si256(result, 1)))) - << 32; + uint64_t null_bytes_hi = + _mm256_movemask_epi8(_mm256_cvtepi32_epi64(_mm256_extracti128_si256(result, 1))); + uint64_t null_bytes = null_bytes_lo | (null_bytes_hi << 32); - process_8_values_fn(i * unroll, null_bytes); + process_8_values_fn(i * kUnroll, null_bytes); } - return num_rows - (num_rows % unroll); + return num_rows - (num_rows % kUnroll); +} + +namespace { + +inline void Decode8FixedLength0_avx2(uint8_t* output, const uint8_t* row_ptr_base, + __m256i offset_lo, __m256i offset_hi) { + // Gather the lower/higher 4 32-bit (only lower 1 bit interesting) values based on the + // lower/higher 4 64-bit row offsets. + __m128i row_lo = + _mm256_i64gather_epi32(reinterpret_cast(row_ptr_base), offset_lo, 1); + __m128i row_hi = + _mm256_i64gather_epi32(reinterpret_cast(row_ptr_base), offset_hi, 1); + // Extend to 64-bit. + __m256i row_lo_64 = _mm256_cvtepi32_epi64(row_lo); + __m256i row_hi_64 = _mm256_cvtepi32_epi64(row_hi); + // Keep the first 8 bits in each 64-bit value, as the other bits belong to other + // columns. + row_lo_64 = _mm256_and_si256(row_lo_64, _mm256_set1_epi64x(0xFF)); + row_hi_64 = _mm256_and_si256(row_hi_64, _mm256_set1_epi64x(0xFF)); + // If a 64-bit value is zero, then we get 64 set bits. + __m256i is_zero_lo_64 = _mm256_cmpeq_epi64(row_lo_64, _mm256_setzero_si256()); + __m256i is_zero_hi_64 = _mm256_cmpeq_epi64(row_hi_64, _mm256_setzero_si256()); + // 64 set bits per value to 8 set bits (one byte) per value. + int is_zero_lo_8 = _mm256_movemask_epi8(is_zero_lo_64); + int is_zero_hi_8 = _mm256_movemask_epi8(is_zero_hi_64); + // 8 set bits to 1 set bit. + uint8_t is_zero = static_cast( + _mm_movemask_epi8(_mm_set_epi32(0, 0, is_zero_hi_8, is_zero_lo_8))); + *output = static_cast(~is_zero); +} + +inline void Decode8FixedLength1_avx2(uint8_t* output, const uint8_t* row_ptr_base, + __m256i offset_lo, __m256i offset_hi) { + // Gather the lower/higher 4 32-bit (only lower 8 bits interesting) values based on the + // lower/higher 4 64-bit row offsets. + __m128i row_lo = + _mm256_i64gather_epi32(reinterpret_cast(row_ptr_base), offset_lo, 1); + __m128i row_hi = + _mm256_i64gather_epi32(reinterpret_cast(row_ptr_base), offset_hi, 1); + __m256i row = _mm256_set_m128i(row_hi, row_lo); + // Shuffle the lower 8 bits of each 32-bit values to the lower 32 bits of each 128-bit + // lane. + constexpr uint64_t kByteSequence_0_4_8_12 = 0x0c080400ULL; + const __m256i shuffle_const = + _mm256_setr_epi64x(kByteSequence_0_4_8_12, -1, kByteSequence_0_4_8_12, -1); + row = _mm256_shuffle_epi8(row, shuffle_const); + // Get the lower 32-bits (4 8-bit values) from each 128-bit lane. + // NB: Be careful about sign-extension when casting the return value of + // _mm256_extract_epi32 (signed 32-bit) to unsigned 64-bit, which will pollute the + // higher bits of the following OR. + uint32_t compact_row_lo = static_cast(_mm256_extract_epi32(row, 0)); + uint64_t compact_row_hi = static_cast(_mm256_extract_epi32(row, 4)) << 32; + *reinterpret_cast(output) = compact_row_lo | compact_row_hi; +} + +inline void Decode8FixedLength2_avx2(uint16_t* output, const uint8_t* row_ptr_base, + __m256i offset_lo, __m256i offset_hi) { + // Gather the lower/higher 4 32-bit (only lower 16 bits interesting) values based on the + // lower/higher 4 64-bit row offsets. + __m128i row_lo = + _mm256_i64gather_epi32(reinterpret_cast(row_ptr_base), offset_lo, 1); + __m128i row_hi = + _mm256_i64gather_epi32(reinterpret_cast(row_ptr_base), offset_hi, 1); + __m256i row = _mm256_set_m128i(row_hi, row_lo); + // Shuffle the lower 16 bits of each 32-bit values to the lower 64 bits of each 128-bit + // lane. + constexpr uint64_t kByteSequence_0_1_4_5_8_9_12_13 = 0x0d0c090805040100ULL; + const __m256i shuffle_const = _mm256_setr_epi64x(kByteSequence_0_1_4_5_8_9_12_13, -1, + kByteSequence_0_1_4_5_8_9_12_13, -1); + row = _mm256_shuffle_epi8(row, shuffle_const); + // Swap the second and the third 64-bit lane, so that all 16-bit values end up in the + // lower half of `row`. + // (0xd8 = 0b 11 01 10 00) + row = _mm256_permute4x64_epi64(row, 0xd8); + _mm_storeu_si128(reinterpret_cast<__m128i*>(output), _mm256_castsi256_si128(row)); +} + +inline void Decode8FixedLength4_avx2(uint32_t* output, const uint8_t* row_ptr_base, + __m256i offset_lo, __m256i offset_hi) { + // Gather the lower/higher 4 32-bit values based on the lower/higher 4 64-bit row + // offsets. + __m128i row_lo = + _mm256_i64gather_epi32(reinterpret_cast(row_ptr_base), offset_lo, 1); + __m128i row_hi = + _mm256_i64gather_epi32(reinterpret_cast(row_ptr_base), offset_hi, 1); + __m256i row = _mm256_set_m128i(row_hi, row_lo); + _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), row); +} + +inline void Decode8FixedLength8_avx2(uint64_t* output, const uint8_t* row_ptr_base, + __m256i offset_lo, __m256i offset_hi) { + auto row_ptr_base_i64 = + reinterpret_cast(row_ptr_base); + // Gather the lower/higher 4 64-bit values based on the lower/higher 4 64-bit row + // offsets. + __m256i row_lo = _mm256_i64gather_epi64(row_ptr_base_i64, offset_lo, 1); + __m256i row_hi = _mm256_i64gather_epi64(row_ptr_base_i64, offset_hi, 1); + _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), row_lo); + _mm256_storeu_si256(reinterpret_cast<__m256i*>(output + 4), row_hi); +} + +inline void Decode1_avx2(uint8_t* output, const uint8_t* row_ptr, uint32_t num_bytes) { + // Copy 32 bytes at a time. + // Note that both `output` and `row_ptr` have been allocated with enough padding to + // accommodate the memory overshoot. See the allocations for `ResizableArrayData` in + // `JoinResultMaterialize` and `JoinResidualFilter` for `output`, and + // `RowTableImpl::kPaddingForVectors` for `row_ptr`. + __m256i* output_i256 = reinterpret_cast<__m256i*>(output); + const __m256i* row_ptr_i256 = reinterpret_cast(row_ptr); + for (int istripe = 0; istripe < bit_util::CeilDiv(num_bytes, 32); ++istripe) { + _mm256_storeu_si256(output_i256 + istripe, + _mm256_loadu_si256(row_ptr_i256 + istripe)); + } +} + +inline uint32_t Decode8Offset_avx2(uint32_t* output, uint32_t current_length, + __m256i num_bytes) { + uint32_t num_bytes_last = static_cast(_mm256_extract_epi32(num_bytes, 7)); + // Init every offset with the current length. + __m256i offsets = _mm256_set1_epi32(current_length); + // We keep left-shifting the length and accumulate the offset by adding the length. + __m256i length = + _mm256_permutevar8x32_epi32(num_bytes, _mm256_setr_epi32(7, 0, 1, 2, 3, 4, 5, 6)); + length = _mm256_insert_epi32(length, 0, 0); + // `length` is now a sequence of 32-bit words such as: + // - length[0] = 0 + // - length[1] = num_bytes[0] + // ... + // - length[7] = num_bytes[6] + // (note that num_bytes[7] is kept in `num_bytes_last`) + for (int i = 0; i < 7; ++i) { + offsets = _mm256_add_epi32(offsets, length); + length = + _mm256_permutevar8x32_epi32(length, _mm256_setr_epi32(7, 0, 1, 2, 3, 4, 5, 6)); + length = _mm256_insert_epi32(length, 0, 0); + } + _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), offsets); + return _mm256_extract_epi32(offsets, 7) + num_bytes_last; +} + +inline void Decode8Null_avx2(uint8_t* output, uint64_t null_bytes) { + uint8_t null_bits = + static_cast(_mm256_movemask_epi8(_mm256_set1_epi64x(null_bytes))); + *output = ~null_bits; +} + +} // namespace + +int RowArray::DecodeFixedLength_avx2(ResizableArrayData* output, int output_start_row, + int column_id, uint32_t fixed_length, + int num_rows_to_append, + const uint32_t* row_ids) const { + DCHECK_EQ(output_start_row % 8, 0); + + int num_rows_processed = 0; + switch (fixed_length) { + case 0: + num_rows_processed = RowArrayAccessor::Visit_avx2( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* row_ptr_base, __m256i offset_lo, __m256i offset_hi, + __m256i num_bytes) { + DCHECK_EQ(i % 8, 0); + Decode8FixedLength0_avx2(output->mutable_data(1) + (output_start_row + i) / 8, + row_ptr_base, offset_lo, offset_hi); + }); + break; + case 1: + num_rows_processed = RowArrayAccessor::Visit_avx2( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* row_ptr_base, __m256i offset_lo, __m256i offset_hi, + __m256i num_bytes) { + Decode8FixedLength1_avx2(output->mutable_data(1) + output_start_row + i, + row_ptr_base, offset_lo, offset_hi); + }); + break; + case 2: + num_rows_processed = RowArrayAccessor::Visit_avx2( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* row_ptr_base, __m256i offset_lo, __m256i offset_hi, + __m256i num_bytes) { + Decode8FixedLength2_avx2( + output->mutable_data_as(1) + output_start_row + i, row_ptr_base, + offset_lo, offset_hi); + }); + break; + case 4: + num_rows_processed = RowArrayAccessor::Visit_avx2( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* row_ptr_base, __m256i offset_lo, __m256i offset_hi, + __m256i num_bytes) { + Decode8FixedLength4_avx2( + output->mutable_data_as(1) + output_start_row + i, row_ptr_base, + offset_lo, offset_hi); + }); + break; + case 8: + num_rows_processed = RowArrayAccessor::Visit_avx2( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* row_ptr_base, __m256i offset_lo, __m256i offset_hi, + __m256i num_bytes) { + Decode8FixedLength8_avx2( + output->mutable_data_as(1) + output_start_row + i, row_ptr_base, + offset_lo, offset_hi); + }); + break; + default: + RowArrayAccessor::Visit( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* row_ptr, uint32_t num_bytes) { + Decode1_avx2(output->mutable_data(1) + num_bytes * (output_start_row + i), + row_ptr, num_bytes); + }); + num_rows_processed = num_rows_to_append; + break; + } + + return num_rows_processed; +} + +int RowArray::DecodeOffsets_avx2(ResizableArrayData* output, int output_start_row, + int column_id, int num_rows_to_append, + const uint32_t* row_ids) const { + uint32_t* offsets = output->mutable_data_as(1) + output_start_row; + uint32_t current_length = (output_start_row == 0) ? 0 : offsets[0]; + int num_rows_processed = RowArrayAccessor::Visit_avx2( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* row_ptr_base, __m256i offset_lo, __m256i offset_hi, + __m256i num_bytes) { + current_length = Decode8Offset_avx2(offsets + i, current_length, num_bytes); + }); + offsets[num_rows_processed] = current_length; + return num_rows_processed; +} + +int RowArray::DecodeVarLength_avx2(ResizableArrayData* output, int output_start_row, + int column_id, int num_rows_to_append, + const uint32_t* row_ids) const { + RowArrayAccessor::Visit( + rows_, column_id, num_rows_to_append, row_ids, + [&](int i, const uint8_t* row_ptr, uint32_t num_bytes) { + uint8_t* dst = output->mutable_data(2) + + output->mutable_data_as(1)[output_start_row + i]; + Decode1_avx2(dst, row_ptr, num_bytes); + }); + return num_rows_to_append; +} + +int RowArray::DecodeNulls_avx2(ResizableArrayData* output, int output_start_row, + int column_id, int num_rows_to_append, + const uint32_t* row_ids) const { + DCHECK_EQ(output_start_row % 8, 0); + + return RowArrayAccessor::VisitNulls_avx2( + rows_, column_id, num_rows_to_append, row_ids, [&](int i, uint64_t null_bytes) { + DCHECK_EQ(i % 8, 0); + Decode8Null_avx2(output->mutable_data(0) + (output_start_row + i) / 8, + null_bytes); + }); } } // namespace acero diff --git a/cpp/src/arrow/acero/swiss_join_internal.h b/cpp/src/arrow/acero/swiss_join_internal.h index 4d749c1c529ae..f2f3ac5b1bf93 100644 --- a/cpp/src/arrow/acero/swiss_join_internal.h +++ b/cpp/src/arrow/acero/swiss_join_internal.h @@ -32,6 +32,7 @@ namespace arrow { using compute::ExecBatchBuilder; using compute::KeyColumnArray; using compute::KeyColumnMetadata; +using compute::LightContext; using compute::ResizableArrayData; using compute::RowTableEncoder; using compute::RowTableImpl; @@ -47,16 +48,6 @@ class RowArrayAccessor { // static int VarbinaryColumnId(const RowTableMetadata& row_metadata, int column_id); - // Calculate how many rows to skip from the tail of the - // sequence of selected rows, such that the total size of skipped rows is at - // least equal to the size specified by the caller. Skipping of the tail rows - // is used to allow for faster processing by the caller of remaining rows - // without checking buffer bounds (useful with SIMD or fixed size memory loads - // and stores). - // - static int NumRowsToSkip(const RowTableImpl& rows, int column_id, int num_rows, - const uint32_t* row_ids, int num_tail_bytes_to_skip); - // The supplied lambda will be called for each row in the given list of rows. // The arguments given to it will be: // - index of a row (within the set of selected rows), @@ -68,7 +59,80 @@ class RowArrayAccessor { // template static void Visit(const RowTableImpl& rows, int column_id, int num_rows, - const uint32_t* row_ids, PROCESS_VALUE_FN process_value_fn); + const uint32_t* row_ids, PROCESS_VALUE_FN process_value_fn) { + bool is_fixed_length_column = + rows.metadata().column_metadatas[column_id].is_fixed_length; + + // There are 4 cases, each requiring different steps: + // 1. Varying length column that is the first varying length column in a row + // 2. Varying length column that is not the first varying length column in a + // row + // 3. Fixed length column in a fixed length row + // 4. Fixed length column in a varying length row + + if (!is_fixed_length_column) { + int varbinary_column_id = VarbinaryColumnId(rows.metadata(), column_id); + const uint8_t* row_ptr_base = rows.data(2); + const RowTableImpl::offset_type* row_offsets = rows.offsets(); + uint32_t field_offset_within_row, field_length; + + if (varbinary_column_id == 0) { + // Case 1: This is the first varbinary column + // + for (int i = 0; i < num_rows; ++i) { + uint32_t row_id = row_ids[i]; + const uint8_t* row_ptr = row_ptr_base + row_offsets[row_id]; + rows.metadata().first_varbinary_offset_and_length( + row_ptr, &field_offset_within_row, &field_length); + process_value_fn(i, row_ptr + field_offset_within_row, field_length); + } + } else { + // Case 2: This is second or later varbinary column + // + for (int i = 0; i < num_rows; ++i) { + uint32_t row_id = row_ids[i]; + const uint8_t* row_ptr = row_ptr_base + row_offsets[row_id]; + rows.metadata().nth_varbinary_offset_and_length( + row_ptr, varbinary_column_id, &field_offset_within_row, &field_length); + process_value_fn(i, row_ptr + field_offset_within_row, field_length); + } + } + } + + if (is_fixed_length_column) { + uint32_t field_offset_within_row = rows.metadata().encoded_field_offset( + rows.metadata().pos_after_encoding(column_id)); + uint32_t field_length = rows.metadata().column_metadatas[column_id].fixed_length; + // Bit column is encoded as a single byte + // + if (field_length == 0) { + field_length = 1; + } + uint32_t row_length = rows.metadata().fixed_length; + + bool is_fixed_length_row = rows.metadata().is_fixed_length; + if (is_fixed_length_row) { + // Case 3: This is a fixed length column in a fixed length row + // + const uint8_t* row_ptr_base = rows.data(1) + field_offset_within_row; + for (int i = 0; i < num_rows; ++i) { + uint32_t row_id = row_ids[i]; + const uint8_t* row_ptr = row_ptr_base + row_length * row_id; + process_value_fn(i, row_ptr, field_length); + } + } else { + // Case 4: This is a fixed length column in a varying length row + // + const uint8_t* row_ptr_base = rows.data(2) + field_offset_within_row; + const RowTableImpl::offset_type* row_offsets = rows.offsets(); + for (int i = 0; i < num_rows; ++i) { + uint32_t row_id = row_ids[i]; + const uint8_t* row_ptr = row_ptr_base + row_offsets[row_id]; + process_value_fn(i, row_ptr, field_length); + } + } + } + } // The supplied lambda will be called for each row in the given list of rows. // The arguments given to it will be: @@ -77,9 +141,17 @@ class RowArrayAccessor { // template static void VisitNulls(const RowTableImpl& rows, int column_id, int num_rows, - const uint32_t* row_ids, PROCESS_VALUE_FN process_value_fn); + const uint32_t* row_ids, PROCESS_VALUE_FN process_value_fn) { + const uint8_t* null_masks = rows.null_masks(); + uint32_t null_mask_num_bytes = rows.metadata().null_masks_bytes_per_row; + uint32_t pos_after_encoding = rows.metadata().pos_after_encoding(column_id); + for (int i = 0; i < num_rows; ++i) { + uint32_t row_id = row_ids[i]; + int64_t bit_id = row_id * null_mask_num_bytes * 8 + pos_after_encoding; + process_value_fn(i, bit_util::GetBit(null_masks, bit_id) ? 0xff : 0); + } + } - private: #if defined(ARROW_HAVE_RUNTIME_AVX2) // This is equivalent to Visit method, but processing 8 rows at a time in a // loop. @@ -108,13 +180,15 @@ class RowArrayAccessor { // can be called by multiple threads concurrently. // struct RowArray { - RowArray() : is_initialized_(false) {} + RowArray() : is_initialized_(false), hardware_flags_(0) {} - Status InitIfNeeded(MemoryPool* pool, const ExecBatch& batch); - Status InitIfNeeded(MemoryPool* pool, const RowTableMetadata& row_metadata); + Status InitIfNeeded(MemoryPool* pool, int64_t hardware_flags, const ExecBatch& batch); + Status InitIfNeeded(MemoryPool* pool, int64_t hardware_flags, + const RowTableMetadata& row_metadata); - Status AppendBatchSelection(MemoryPool* pool, const ExecBatch& batch, int begin_row_id, - int end_row_id, int num_row_ids, const uint16_t* row_ids, + Status AppendBatchSelection(MemoryPool* pool, int64_t hardware_flags, + const ExecBatch& batch, int begin_row_id, int end_row_id, + int num_row_ids, const uint16_t* row_ids, std::vector& temp_column_arrays); // This can only be called for a minibatch. @@ -122,12 +196,10 @@ struct RowArray { void Compare(const ExecBatch& batch, int begin_row_id, int end_row_id, int num_selected, const uint16_t* batch_selection_maybe_null, const uint32_t* array_row_ids, uint32_t* out_num_not_equal, uint16_t* out_not_equal_selection, - int64_t hardware_flags, arrow::util::TempVectorStack* temp_stack, + arrow::util::TempVectorStack* temp_stack, std::vector& temp_column_arrays, uint8_t* out_match_bitvector_maybe_null = NULLPTR); - // TODO: add AVX2 version - // Status DecodeSelected(ResizableArrayData* target, int column_id, int num_rows_to_append, const uint32_t* row_ids, MemoryPool* pool) const; @@ -135,10 +207,43 @@ struct RowArray { int64_t num_rows() const { return is_initialized_ ? rows_.length() : 0; } + void EnsureHasAnyNullsComputed(const LightContext& ctx) { + std::ignore = rows_.has_any_nulls(&ctx); + } + + private: bool is_initialized_; + + int64_t hardware_flags_; RowTableEncoder encoder_; RowTableImpl rows_; RowTableImpl rows_temp_; + + private: + void DecodeFixedLength(ResizableArrayData* output, int output_start_row, int column_id, + uint32_t fixed_length, int num_rows_to_append, + const uint32_t* row_ids) const; + void DecodeOffsets(ResizableArrayData* output, int output_start_row, int column_id, + int num_rows_to_append, const uint32_t* row_ids) const; + void DecodeVarLength(ResizableArrayData* output, int output_start_row, int column_id, + int num_rows_to_append, const uint32_t* row_ids) const; + void DecodeNulls(ResizableArrayData* output, int output_start_row, int column_id, + int num_rows_to_append, const uint32_t* row_ids) const; + +#if defined(ARROW_HAVE_RUNTIME_AVX2) + int DecodeFixedLength_avx2(ResizableArrayData* output, int output_start_row, + int column_id, uint32_t fixed_length, int num_rows_to_append, + const uint32_t* row_ids) const; + int DecodeOffsets_avx2(ResizableArrayData* output, int output_start_row, int column_id, + int num_rows_to_append, const uint32_t* row_ids) const; + int DecodeVarLength_avx2(ResizableArrayData* output, int output_start_row, + int column_id, int num_rows_to_append, + const uint32_t* row_ids) const; + int DecodeNulls_avx2(ResizableArrayData* output, int output_start_row, int column_id, + int num_rows_to_append, const uint32_t* row_ids) const; +#endif + + friend class RowArrayMerge; }; // Implements concatenating multiple row arrays into a single one, using @@ -161,7 +266,7 @@ class RowArrayMerge { // static Status PrepareForMerge(RowArray* target, const std::vector& sources, std::vector* first_target_row_id, - MemoryPool* pool); + MemoryPool* pool, int64_t hardware_flags); // Copy rows from source array to target array. // Both arrays must have the same row metadata. diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index d16b6cfd2e97d..51cca497485ce 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -145,7 +145,10 @@ class OrcStripeReader : public RecordBatchReader { Status ReadNext(std::shared_ptr* out) override { std::unique_ptr batch; - ORC_CATCH_NOT_OK(batch = row_reader_->createRowBatch(batch_size_)); + std::unique_ptr builder; + + ORC_BEGIN_CATCH_NOT_OK + batch = row_reader_->createRowBatch(batch_size_); const liborc::Type& type = row_reader_->getSelectedType(); if (!row_reader_->next(*batch)) { @@ -153,10 +156,8 @@ class OrcStripeReader : public RecordBatchReader { return Status::OK(); } - std::unique_ptr builder; ARROW_ASSIGN_OR_RAISE(builder, RecordBatchBuilder::Make(schema_, pool_, batch->numElements)); - // The top-level type must be a struct to read into an arrow table const auto& struct_batch = checked_cast(*batch); @@ -164,9 +165,9 @@ class OrcStripeReader : public RecordBatchReader { RETURN_NOT_OK(AppendBatch(type.getSubtype(i), struct_batch.fields[i], 0, batch->numElements, builder->GetField(i))); } + ORC_END_CATCH_NOT_OK - ARROW_ASSIGN_OR_RAISE(*out, builder->Flush()); - return Status::OK(); + return builder->Flush().Value(out); } private: @@ -470,15 +471,13 @@ class ORCFileReader::Impl { int64_t nrows) { std::unique_ptr row_reader; std::unique_ptr batch; + std::unique_ptr builder; ORC_BEGIN_CATCH_NOT_OK row_reader = reader_->createRowReader(opts); batch = row_reader->createRowBatch(std::min(nrows, kReadRowsBatch)); - ORC_END_CATCH_NOT_OK - std::unique_ptr builder; ARROW_ASSIGN_OR_RAISE(builder, RecordBatchBuilder::Make(schema, pool_, nrows)); - // The top-level type must be a struct to read into an arrow table const auto& struct_batch = checked_cast(*batch); @@ -489,6 +488,7 @@ class ORCFileReader::Impl { batch->numElements, builder->GetField(i))); } } + ORC_END_CATCH_NOT_OK return builder->Flush(); } diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index 3d18d5f967b72..226f5fc4649af 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -1186,7 +1186,8 @@ TEST_F(TestMapArray, BuildingStringToInt) { std::vector offsets = {0, 2, 2, 3, 3}; auto expected_keys = ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])"); auto expected_values = ArrayFromJSON(int32(), "[0, null, 8]"); - ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, internal::BytesToBits({1, 0, 1, 1})); + ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, + internal::BytesToBits(std::vector({1, 0, 1, 1}))); MapArray expected(type, 4, Buffer::Wrap(offsets), expected_keys, expected_values, expected_null_bitmap, 1); diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index aa2a2d4e9af0b..ca811dac041fe 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -87,16 +87,18 @@ add_arrow_test(internals_test function_test.cc exec_test.cc kernel_test.cc - light_array_test.cc - registry_test.cc - key_hash_test.cc - row/compare_test.cc - row/grouper_test.cc - row/row_encoder_internal_test.cc - row/row_test.cc - util_internal_test.cc) + registry_test.cc) add_arrow_compute_test(expression_test SOURCES expression_test.cc) +add_arrow_compute_test(row_test + SOURCES + key_hash_test.cc + light_array_test.cc + row/compare_test.cc + row/grouper_test.cc + row/row_encoder_internal_test.cc + row/row_test.cc + util_internal_test.cc) add_arrow_benchmark(function_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 7c3bc46650e9f..61a16f5f5eb9b 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -732,19 +732,26 @@ void RegisterScalarOptions(FunctionRegistry* registry) { SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked") SCALAR_ARITHMETIC_UNARY(Acos, "acos", "acos_checked") +SCALAR_ARITHMETIC_UNARY(Acosh, "acosh", "acosh_checked") SCALAR_ARITHMETIC_UNARY(Asin, "asin", "asin_checked") +SCALAR_ARITHMETIC_UNARY(Atanh, "atanh", "atanh_checked") SCALAR_ARITHMETIC_UNARY(Cos, "cos", "cos_checked") SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked") SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked") SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked") SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked") -SCALAR_ARITHMETIC_UNARY(Sqrt, "sqrt", "sqrt_checked") SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked") SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked") +SCALAR_ARITHMETIC_UNARY(Sqrt, "sqrt", "sqrt_checked") SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked") +SCALAR_EAGER_UNARY(Asinh, "asinh") SCALAR_EAGER_UNARY(Atan, "atan") +SCALAR_EAGER_UNARY(Cosh, "cosh") SCALAR_EAGER_UNARY(Exp, "exp") +SCALAR_EAGER_UNARY(Expm1, "expm1") SCALAR_EAGER_UNARY(Sign, "sign") +SCALAR_EAGER_UNARY(Sinh, "sinh") +SCALAR_EAGER_UNARY(Tanh, "tanh") Result Round(const Datum& arg, RoundOptions options, ExecContext* ctx) { return CallFunction("round", {arg}, &options, ctx); diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 947474e5962d0..0e5a388b1074f 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -684,6 +684,18 @@ Result Power(const Datum& left, const Datum& right, ARROW_EXPORT Result Exp(const Datum& arg, ExecContext* ctx = NULLPTR); +/// \brief More accurately calculate `exp(arg) - 1` for values close to zero. +/// If the exponent value is null the result will be null. +/// +/// This function is more accurate than calculating `exp(value) - 1` directly for values +/// close to zero. +/// +/// \param[in] arg the exponent +/// \param[in] ctx the function execution context, optional +/// \return the element-wise Euler's number raised to the power of exponent minus 1 +ARROW_EXPORT +Result Expm1(const Datum& arg, ExecContext* ctx = NULLPTR); + /// \brief Left shift the left array by the right array. Array values must be the /// same length. If either operand is null, the result will be null. /// @@ -772,6 +784,52 @@ Result Atan(const Datum& arg, ExecContext* ctx = NULLPTR); ARROW_EXPORT Result Atan2(const Datum& y, const Datum& x, ExecContext* ctx = NULLPTR); +/// \brief Compute the hyperbolic sine of the array values. +/// \param[in] arg The values to compute the hyperbolic sine for. +/// \param[in] ctx the function execution context, optional +/// \return the elementwise hyperbolic sine of the values +ARROW_EXPORT +Result Sinh(const Datum& arg, ExecContext* ctx = NULLPTR); + +/// \brief Compute the hyperbolic cosine of the array values. +/// \param[in] arg The values to compute the hyperbolic cosine for. +/// \param[in] ctx the function execution context, optional +/// \return the elementwise hyperbolic cosine of the values +ARROW_EXPORT +Result Cosh(const Datum& arg, ExecContext* ctx = NULLPTR); + +/// \brief Compute the hyperbolic tangent of the array values. +/// \param[in] arg The values to compute the hyperbolic tangent for. +/// \param[in] ctx the function execution context, optional +/// \return the elementwise hyperbolic tangent of the values +ARROW_EXPORT +Result Tanh(const Datum& arg, ExecContext* ctx = NULLPTR); + +/// \brief Compute the inverse hyperbolic sine of the array values. +/// \param[in] arg The values to compute the inverse hyperbolic sine for. +/// \param[in] ctx the function execution context, optional +/// \return the elementwise inverse hyperbolic sine of the values +ARROW_EXPORT +Result Asinh(const Datum& arg, ExecContext* ctx = NULLPTR); + +/// \brief Compute the inverse hyperbolic cosine of the array values. +/// \param[in] arg The values to compute the inverse hyperbolic cosine for. +/// \param[in] options arithmetic options (enable/disable overflow checking), optional +/// \param[in] ctx the function execution context, optional +/// \return the elementwise inverse hyperbolic cosine of the values +ARROW_EXPORT +Result Acosh(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), + ExecContext* ctx = NULLPTR); + +/// \brief Compute the inverse hyperbolic tangent of the array values. +/// \param[in] arg The values to compute the inverse hyperbolic tangent for. +/// \param[in] options arithmetic options (enable/disable overflow checking), optional +/// \param[in] ctx the function execution context, optional +/// \return the elementwise inverse hyperbolic tangent of the values +ARROW_EXPORT +Result Atanh(const Datum& arg, ArithmeticOptions options = ArithmeticOptions(), + ExecContext* ctx = NULLPTR); + /// \brief Get the natural log of a value. /// /// If argument is null the result will be null. diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h index d59320d270e4f..f045e323b3d0b 100644 --- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h +++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h @@ -532,6 +532,14 @@ struct Exp { } }; +struct Expm1 { + template + static T Call(KernelContext*, Arg exp, Status*) { + static_assert(std::is_same::value); + return std::expm1(exp); + } +}; + struct Power { ARROW_NOINLINE static uint64_t IntegerPower(uint64_t base, uint64_t exp) { diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index eb243de4a765e..c13dae573a3d9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -178,6 +178,14 @@ struct SinChecked { } }; +struct Sinh { + template + static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { + static_assert(std::is_same::value, ""); + return std::sinh(val); + } +}; + struct Cos { template static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { @@ -198,6 +206,14 @@ struct CosChecked { } }; +struct Cosh { + template + static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { + static_assert(std::is_same::value, ""); + return std::cosh(val); + } +}; + struct Tan { template static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { @@ -219,6 +235,14 @@ struct TanChecked { } }; +struct Tanh { + template + static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { + static_assert(std::is_same::value, ""); + return std::tanh(val); + } +}; + struct Asin { template static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { @@ -242,6 +266,14 @@ struct AsinChecked { } }; +struct Asinh { + template + static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { + static_assert(std::is_same::value, ""); + return std::asinh(val); + } +}; + struct Acos { template static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { @@ -265,6 +297,29 @@ struct AcosChecked { } }; +struct Acosh { + template + static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { + static_assert(std::is_same::value, ""); + if (ARROW_PREDICT_FALSE(val < 1.0)) { + return std::numeric_limits::quiet_NaN(); + } + return std::acosh(val); + } +}; + +struct AcoshChecked { + template + static enable_if_floating_value Call(KernelContext*, Arg0 val, Status* st) { + static_assert(std::is_same::value, ""); + if (ARROW_PREDICT_FALSE(val < 1.0)) { + *st = Status::Invalid("domain error"); + return val; + } + return std::acosh(val); + } +}; + struct Atan { template static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { @@ -273,6 +328,35 @@ struct Atan { } }; +struct Atanh { + template + static enable_if_floating_value Call(KernelContext*, Arg0 val, Status*) { + static_assert(std::is_same::value, ""); + if (ARROW_PREDICT_FALSE((val < -1.0 || val > 1.0))) { + // N.B. This predicate does *not* match the predicate in AtanhChecked. In + // GH-44630 it was decided that the checked version should error when asked + // for +/- 1 as an input and the unchecked version should return +/- oo + return std::numeric_limits::quiet_NaN(); + } + return std::atanh(val); + } +}; + +struct AtanhChecked { + template + static enable_if_floating_value Call(KernelContext*, Arg0 val, Status* st) { + static_assert(std::is_same::value, ""); + if (ARROW_PREDICT_FALSE((val <= -1.0 || val >= 1.0))) { + // N.B. This predicate does *not* match the predicate in Atanh. In GH-44630 it was + // decided that the checked version should error when asked for +/- 1 as an input + // and the unchecked version should return +/- oo + *st = Status::Invalid("domain error"); + return val; + } + return std::atanh(val); + } +}; + struct Atan2 { template static enable_if_floating_value Call(KernelContext*, Arg0 y, Arg1 x, Status*) { @@ -1087,6 +1171,12 @@ const FunctionDoc exp_doc{ ("If exponent is null the result will be null."), {"exponent"}}; +const FunctionDoc expm1_doc{ + "Compute Euler's number raised to the power of specified exponent, " + "then decrement 1, element-wise", + ("If exponent is null the result will be null."), + {"exponent"}}; + const FunctionDoc pow_checked_doc{ "Raise arguments to power element-wise", ("An error is returned when integer to negative integer power is encountered,\n" @@ -1172,6 +1262,8 @@ const FunctionDoc sin_checked_doc{"Compute the sine", "to return NaN instead, see \"sin\"."), {"x"}}; +const FunctionDoc sinh_doc{"Compute the hyperbolic sine", (""), {"x"}}; + const FunctionDoc cos_doc{"Compute the cosine", ("NaN is returned for invalid input values;\n" "to raise an error instead, see \"cos_checked\"."), @@ -1182,6 +1274,8 @@ const FunctionDoc cos_checked_doc{"Compute the cosine", "to return NaN instead, see \"cos\"."), {"x"}}; +const FunctionDoc cosh_doc{"Compute the hyperbolic cosine", (""), {"x"}}; + const FunctionDoc tan_doc{"Compute the tangent", ("NaN is returned for invalid input values;\n" "to raise an error instead, see \"tan_checked\"."), @@ -1192,6 +1286,8 @@ const FunctionDoc tan_checked_doc{"Compute the tangent", "to return NaN instead, see \"tan\"."), {"x"}}; +const FunctionDoc tanh_doc{"Compute the hyperbolic tangent", (""), {"x"}}; + const FunctionDoc asin_doc{"Compute the inverse sine", ("NaN is returned for invalid input values;\n" "to raise an error instead, see \"asin_checked\"."), @@ -1202,6 +1298,8 @@ const FunctionDoc asin_checked_doc{"Compute the inverse sine", "to return NaN instead, see \"asin\"."), {"x"}}; +const FunctionDoc asinh_doc{"Compute the inverse hyperbolic sine", (""), {"x"}}; + const FunctionDoc acos_doc{"Compute the inverse cosine", ("NaN is returned for invalid input values;\n" "to raise an error instead, see \"acos_checked\"."), @@ -1212,6 +1310,16 @@ const FunctionDoc acos_checked_doc{"Compute the inverse cosine", "to return NaN instead, see \"acos\"."), {"x"}}; +const FunctionDoc acosh_doc{"Compute the inverse hyperbolic cosine", + ("NaN is returned for input values < 1.0;\n" + "to raise an error instead, see \"acosh_checked\"."), + {"x"}}; + +const FunctionDoc acosh_checked_doc{"Compute the inverse hyperbolic cosine", + ("Input values < 1.0 raise an error;\n" + "to return NaN instead, see \"acosh\"."), + {"x"}}; + const FunctionDoc atan_doc{"Compute the inverse tangent of x", ("The return value is in the range [-pi/2, pi/2];\n" "for a full return range [-pi, pi], see \"atan2\"."), @@ -1221,6 +1329,17 @@ const FunctionDoc atan2_doc{"Compute the inverse tangent of y/x", ("The return value is in the range [-pi, pi]."), {"y", "x"}}; +const FunctionDoc atanh_doc{"Compute the inverse hyperbolic tangent", + ("NaN is returned for input values x with |x| > 1.\n" + "At x = +/- 1, returns +/- infinity.\n" + "To raise an error instead, see \"atanh_checked\"."), + {"x"}}; + +const FunctionDoc atanh_checked_doc{"Compute the inverse hyperbolic tangent", + ("Input values x with |x| >= 1.0 raise an error\n" + "to return NaN instead, see \"atanh\"."), + {"x"}}; + const FunctionDoc ln_doc{ "Compute natural logarithm", ("Non-positive values return -inf or NaN. Null values return null.\n" @@ -1614,6 +1733,10 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) { auto exp = MakeUnaryArithmeticFunctionFloatingPoint("exp", exp_doc); DCHECK_OK(registry->AddFunction(std::move(exp))); + // ---------------------------------------------------------------------- + auto expm1 = MakeUnaryArithmeticFunctionFloatingPoint("expm1", expm1_doc); + DCHECK_OK(registry->AddFunction(std::move(expm1))); + // ---------------------------------------------------------------------- auto sqrt = MakeUnaryArithmeticFunctionFloatingPoint("sqrt", sqrt_doc); DCHECK_OK(registry->AddFunction(std::move(sqrt))); @@ -1681,6 +1804,9 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) { "sin_checked", sin_checked_doc); DCHECK_OK(registry->AddFunction(std::move(sin_checked))); + auto sinh = MakeUnaryArithmeticFunctionFloatingPoint("sinh", sinh_doc); + DCHECK_OK(registry->AddFunction(std::move(sinh))); + auto cos = MakeUnaryArithmeticFunctionFloatingPoint("cos", cos_doc); DCHECK_OK(registry->AddFunction(std::move(cos))); @@ -1688,6 +1814,9 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) { "cos_checked", cos_checked_doc); DCHECK_OK(registry->AddFunction(std::move(cos_checked))); + auto cosh = MakeUnaryArithmeticFunctionFloatingPoint("cosh", cosh_doc); + DCHECK_OK(registry->AddFunction(std::move(cosh))); + auto tan = MakeUnaryArithmeticFunctionFloatingPoint("tan", tan_doc); DCHECK_OK(registry->AddFunction(std::move(tan))); @@ -1695,6 +1824,9 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) { "tan_checked", tan_checked_doc); DCHECK_OK(registry->AddFunction(std::move(tan_checked))); + auto tanh = MakeUnaryArithmeticFunctionFloatingPoint("tanh", tanh_doc); + DCHECK_OK(registry->AddFunction(std::move(tanh))); + auto asin = MakeUnaryArithmeticFunctionFloatingPoint("asin", asin_doc); DCHECK_OK(registry->AddFunction(std::move(asin))); @@ -1702,6 +1834,9 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) { "asin_checked", asin_checked_doc); DCHECK_OK(registry->AddFunction(std::move(asin_checked))); + auto asinh = MakeUnaryArithmeticFunctionFloatingPoint("asinh", asinh_doc); + DCHECK_OK(registry->AddFunction(std::move(asinh))); + auto acos = MakeUnaryArithmeticFunctionFloatingPoint("acos", acos_doc); DCHECK_OK(registry->AddFunction(std::move(acos))); @@ -1709,12 +1844,26 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) { "acos_checked", acos_checked_doc); DCHECK_OK(registry->AddFunction(std::move(acos_checked))); + auto acosh = MakeUnaryArithmeticFunctionFloatingPoint("acosh", acosh_doc); + DCHECK_OK(registry->AddFunction(std::move(acosh))); + + auto acosh_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull( + "acosh_checked", acosh_checked_doc); + DCHECK_OK(registry->AddFunction(std::move(acosh_checked))); + auto atan = MakeUnaryArithmeticFunctionFloatingPoint("atan", atan_doc); DCHECK_OK(registry->AddFunction(std::move(atan))); auto atan2 = MakeArithmeticFunctionFloatingPoint("atan2", atan2_doc); DCHECK_OK(registry->AddFunction(std::move(atan2))); + auto atanh = MakeUnaryArithmeticFunctionFloatingPoint("atanh", atanh_doc); + DCHECK_OK(registry->AddFunction(std::move(atanh))); + + auto atanh_checked = MakeUnaryArithmeticFunctionFloatingPointNotNull( + "atanh_checked", atanh_checked_doc); + DCHECK_OK(registry->AddFunction(std::move(atanh_checked))); + // ---------------------------------------------------------------------- // Logarithms auto ln = MakeUnaryArithmeticFunctionFloatingPoint("ln", ln_doc); diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc index 37a1bcbc02d73..9a1a569081d9a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +// Required for Windows to define M_LN* constants +#define _USE_MATH_DEFINES + #include #include #include @@ -43,6 +46,9 @@ namespace arrow { namespace compute { namespace { +// 2.718281828459045090795598298427648842334747314453125 +constexpr double kEuler64 = 0x1.5bf0a8b145769p+1; + using IntegralTypes = testing::Types; @@ -1183,8 +1189,8 @@ TEST(TestUnaryArithmetic, DispatchBest) { } // Float types (with _checked variant) - for (std::string name : - {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"}) { + for (std::string name : {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", + "acos", "acosh", "atanh"}) { for (std::string suffix : {"", "_checked"}) { name += suffix; for (const auto& ty : {float32(), float64()}) { @@ -1195,7 +1201,7 @@ TEST(TestUnaryArithmetic, DispatchBest) { } // Float types - for (std::string name : {"atan", "sign", "exp"}) { + for (std::string name : {"sinh", "cosh", "tanh", "asinh", "atan", "sign", "exp"}) { for (const auto& ty : {float32(), float64()}) { CheckDispatchBest(name, {ty}, {ty}); CheckDispatchBest(name, {dictionary(int8(), ty)}, {ty}); @@ -1203,8 +1209,8 @@ TEST(TestUnaryArithmetic, DispatchBest) { } // Integer -> Float64 (with _checked variant) - for (std::string name : - {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"}) { + for (std::string name : {"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", + "acos", "acosh", "atanh"}) { for (std::string suffix : {"", "_checked"}) { name += suffix; for (const auto& ty : @@ -1216,7 +1222,7 @@ TEST(TestUnaryArithmetic, DispatchBest) { } // Integer -> Float64 - for (std::string name : {"atan"}) { + for (std::string name : {"sinh", "cosh", "tanh", "asinh", "atan"}) { for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) { CheckDispatchBest(name, {ty}, {float64()}); @@ -1226,15 +1232,16 @@ TEST(TestUnaryArithmetic, DispatchBest) { } TEST(TestUnaryArithmetic, Null) { - for (std::string name : {"abs", "acos", "asin", "cos", "ln", "log10", "log1p", "log2", - "negate", "sin", "tan"}) { + for (std::string name : {"abs", "acos", "acosh", "asin", "atanh", "cos", "ln", "log10", + "log1p", "log2", "negate", "sin", "tan"}) { for (std::string suffix : {"", "_checked"}) { name += suffix; AssertNullToNull(name); } } - for (std::string name : {"atan", "bit_wise_not", "sign"}) { + for (std::string name : + {"sinh", "cosh", "tanh", "asinh", "atan", "bit_wise_not", "sign"}) { AssertNullToNull(name); } } @@ -1485,8 +1492,7 @@ TYPED_TEST(TestUnaryArithmeticUnsigned, Exp) { this->AssertUnaryOp( exp, "[null, 1, 10]", ArrayFromJSON(float64(), "[null, 2.718281828459045, 22026.465794806718]")); - this->AssertUnaryOp(exp, this->MakeScalar(1), - arrow::MakeScalar(2.718281828459045F)); + this->AssertUnaryOp(exp, this->MakeScalar(1), arrow::MakeScalar(kEuler64)); } TYPED_TEST(TestUnaryArithmeticSigned, Exp) { @@ -1502,8 +1508,7 @@ TYPED_TEST(TestUnaryArithmeticSigned, Exp) { ArrayFromJSON(float64(), "[0.000045399929762484854, 0.36787944117144233, " "null, 2.718281828459045, 22026.465794806718]")); - this->AssertUnaryOp(exp, this->MakeScalar(1), - arrow::MakeScalar(2.718281828459045F)); + this->AssertUnaryOp(exp, this->MakeScalar(1), arrow::MakeScalar(kEuler64)); } TYPED_TEST(TestUnaryArithmeticFloating, Exp) { @@ -1563,6 +1568,101 @@ TEST_F(TestUnaryArithmeticDecimal, Exp) { } } +TYPED_TEST(TestUnaryArithmeticUnsigned, Expm1) { + auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { + return Expm1(arg, ctx); + }; + // Empty arrays + this->AssertUnaryOp(expm1, "[]", ArrayFromJSON(float64(), "[]")); + // Array with nulls + this->AssertUnaryOp(expm1, "[null]", ArrayFromJSON(float64(), "[null]")); + this->AssertUnaryOp(expm1, this->MakeNullScalar(), arrow::MakeNullScalar(float64())); + this->AssertUnaryOp( + expm1, "[null, 0, 1, 10]", + ArrayFromJSON(float64(), "[null, 0.0, 1.718281828459045, 22025.465794806718]")); + this->AssertUnaryOp(expm1, this->MakeScalar(1), arrow::MakeScalar(kEuler64 - 1.0)); +} + +TYPED_TEST(TestUnaryArithmeticSigned, Expm1) { + auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { + return Expm1(arg, ctx); + }; + // Empty arrays + this->AssertUnaryOp(expm1, "[]", ArrayFromJSON(float64(), "[]")); + // Array with nulls + this->AssertUnaryOp(expm1, "[null]", ArrayFromJSON(float64(), "[null]")); + this->AssertUnaryOp(expm1, this->MakeNullScalar(), arrow::MakeNullScalar(float64())); + this->AssertUnaryOp(expm1, "[-10, -1, 0, null, 1, 10]", + ArrayFromJSON(float64(), + "[-0.9999546000702375, -0.6321205588285577, 0.0, " + "null, 1.718281828459045, 22025.465794806718]")); + this->AssertUnaryOp(expm1, this->MakeScalar(1), arrow::MakeScalar(kEuler64 - 1.0)); +} + +TYPED_TEST(TestUnaryArithmeticFloating, Expm1) { + using CType = typename TestFixture::CType; + + auto min = std::numeric_limits::lowest(); + auto max = std::numeric_limits::max(); + + auto expm1 = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { + return Expm1(arg, ctx); + }; + // Empty arrays + this->AssertUnaryOp(expm1, "[]", "[]"); + // Array with nulls + this->AssertUnaryOp(expm1, "[null]", "[null]"); + this->AssertUnaryOp(expm1, this->MakeNullScalar(), this->MakeNullScalar()); + this->AssertUnaryOp(expm1, "[-1.0, 0.0, 0.1, 0.00000001, null, 10.0]", + "[-0.6321205588285577, 0.0, " + "0.10517091807564763, 0.000000010000000050000001, " + "null, 22025.465794806718]"); + // Ordinary arrays (positive, negative, fractional, and zero inputs) + this->AssertUnaryOp(expm1, "[-10.0, 0.0, 0.1, 0.00000001, 0.5, 1.0]", + "[-0.9999546000702375, 0.0, " + "0.10517091807564763, 0.000000010000000050000001, " + "0.6487212707001282, 1.718281828459045]"); + this->AssertUnaryOp(expm1, 1.3F, 2.6692964926535487F); + this->AssertUnaryOp(expm1, this->MakeScalar(1.3F), + this->MakeScalar(2.6692964926535487F)); + // Arrays with infinites + this->AssertUnaryOp(expm1, "[-Inf, Inf]", "[-1, Inf]"); + // Arrays with NaNs + this->SetNansEqual(true); + this->AssertUnaryOp(expm1, "[NaN]", "[NaN]"); + // Min/max + this->AssertUnaryOp(expm1, min, -1.0); + this->AssertUnaryOp(expm1, max, std::numeric_limits::infinity()); +} + +TEST_F(TestUnaryArithmeticDecimal, Expm1) { + auto max128 = Decimal128::GetMaxValue(38); + auto max256 = Decimal256::GetMaxValue(76); + const auto func = "expm1"; + for (const auto& ty : PositiveScaleTypes()) { + CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(float64(), "[]")); + CheckScalar( + func, {ArrayFromJSON(ty, R"(["-1.00", "0.00", "0.10", "0.01", "10.00", null])")}, + ArrayFromJSON(float64(), + "[-0.6321205588285577, 0.0, " + "0.10517091807564763, 0.010050167084168058, " + "22025.465794806718, null]")); + } + CheckScalar(func, {std::make_shared(max128, decimal128(38, 0))}, + ScalarFromJSON(float64(), "Inf")); + CheckScalar(func, {std::make_shared(-max128, decimal128(38, 0))}, + ScalarFromJSON(float64(), "-1.0")); + CheckScalar(func, {std::make_shared(max256, decimal256(76, 0))}, + ScalarFromJSON(float64(), "Inf")); + CheckScalar(func, {std::make_shared(-max256, decimal256(76, 0))}, + ScalarFromJSON(float64(), "-1.0")); + for (const auto& ty : NegativeScaleTypes()) { + CheckScalar(func, {ArrayFromJSON(ty, R"([])")}, ArrayFromJSON(float64(), "[]")); + CheckScalar(func, {DecimalArrayFromJSON(ty, R"(["12E2", "0", "-42E2", null])")}, + ArrayFromJSON(float64(), "[Inf, 0.0, -1.0, null]")); + } +} + TEST_F(TestUnaryArithmeticDecimal, Log) { std::vector unchecked = {"ln", "log2", "log10", "log1p"}; std::vector checked = {"ln_checked", "log2_checked", "log10_checked", @@ -2401,6 +2501,18 @@ TYPED_TEST(TestUnaryArithmeticFloating, TrigSin) { this->AssertUnaryOpRaises(Sin, "[Inf, -Inf]", "domain error"); } +TYPED_TEST(TestUnaryArithmeticFloating, TrigSinh) { + this->SetNansEqual(true); + auto sinh = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { + return Sinh(arg, ctx); + }; + + this->AssertUnaryOp(sinh, "[Inf, -Inf]", "[Inf, -Inf]"); + this->AssertUnaryOp(sinh, "[]", "[]"); + this->AssertUnaryOp(sinh, "[null, NaN]", "[null, NaN]"); + this->AssertUnaryOp(sinh, MakeArray(0, M_LN2, M_LN10), "[0, 0.75, 4.95]"); +} + TYPED_TEST(TestUnaryArithmeticFloating, TrigCos) { this->SetNansEqual(true); this->AssertUnaryOp(Cos, "[Inf, -Inf]", "[NaN, NaN]"); @@ -2413,6 +2525,18 @@ TYPED_TEST(TestUnaryArithmeticFloating, TrigCos) { this->AssertUnaryOpRaises(Cos, "[Inf, -Inf]", "domain error"); } +TYPED_TEST(TestUnaryArithmeticFloating, TrigCosh) { + this->SetNansEqual(true); + auto cosh = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { + return Cosh(arg, ctx); + }; + + this->AssertUnaryOp(cosh, "[Inf, -Inf]", "[Inf, Inf]"); + this->AssertUnaryOp(cosh, "[]", "[]"); + this->AssertUnaryOp(cosh, "[null, NaN]", "[null, NaN]"); + this->AssertUnaryOp(cosh, MakeArray(0, M_LN2, M_LN10), "[1, 1.25, 5.05]"); +} + TYPED_TEST(TestUnaryArithmeticFloating, TrigTan) { this->SetNansEqual(true); this->AssertUnaryOp(Tan, "[Inf, -Inf]", "[NaN, NaN]"); @@ -2427,6 +2551,18 @@ TYPED_TEST(TestUnaryArithmeticFloating, TrigTan) { this->AssertUnaryOpRaises(Tan, "[Inf, -Inf]", "domain error"); } +TYPED_TEST(TestUnaryArithmeticFloating, TrigTanh) { + this->SetNansEqual(true); + auto tanh = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { + return Tanh(arg, ctx); + }; + + this->AssertUnaryOp(tanh, "[Inf, -Inf]", "[1, -1]"); + this->AssertUnaryOp(tanh, "[]", "[]"); + this->AssertUnaryOp(tanh, "[null, NaN]", "[null, NaN]"); + this->AssertUnaryOp(tanh, MakeArray(0, M_LN2), "[0, 0.6]"); +} + TYPED_TEST(TestUnaryArithmeticFloating, TrigAsin) { this->SetNansEqual(true); this->AssertUnaryOp(Asin, "[Inf, -Inf, -2, 2]", "[NaN, NaN, NaN, NaN]"); @@ -2439,6 +2575,18 @@ TYPED_TEST(TestUnaryArithmeticFloating, TrigAsin) { this->AssertUnaryOpRaises(Asin, "[Inf, -Inf, -2, 2]", "domain error"); } +TYPED_TEST(TestUnaryArithmeticFloating, TrigAsinh) { + this->SetNansEqual(true); + auto asinh = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { + return Asinh(arg, ctx); + }; + + this->AssertUnaryOp(asinh, "[Inf, -Inf]", "[Inf, -Inf]"); + this->AssertUnaryOp(asinh, "[]", "[]"); + this->AssertUnaryOp(asinh, "[null, NaN]", "[null, NaN]"); + this->AssertUnaryOp(asinh, "[0, 0.75, 4.95]", MakeArray(0, M_LN2, M_LN10)); +} + TYPED_TEST(TestUnaryArithmeticFloating, TrigAcos) { this->SetNansEqual(true); this->AssertUnaryOp(Asin, "[Inf, -Inf, -2, 2]", "[NaN, NaN, NaN, NaN]"); @@ -2451,6 +2599,18 @@ TYPED_TEST(TestUnaryArithmeticFloating, TrigAcos) { this->AssertUnaryOpRaises(Acos, "[Inf, -Inf, -2, 2]", "domain error"); } +TYPED_TEST(TestUnaryArithmeticFloating, TrigAcosh) { + this->SetNansEqual(true); + this->AssertUnaryOp(Acosh, "[0, -1, -Inf]", "[NaN, NaN, NaN]"); + for (auto check_overflow : {false, true}) { + this->SetOverflowCheck(check_overflow); + this->AssertUnaryOp(Acosh, "[]", "[]"); + this->AssertUnaryOp(Acosh, "[null, NaN]", "[null, NaN]"); + this->AssertUnaryOp(Acosh, "[1, 1.25, 5.05]", MakeArray(0, M_LN2, M_LN10)); + } + this->AssertUnaryOpRaises(Acosh, "[0, -1, -Inf]", "domain error"); +} + TYPED_TEST(TestUnaryArithmeticFloating, TrigAtan) { this->SetNansEqual(true); auto atan = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { @@ -2476,6 +2636,19 @@ TYPED_TEST(TestBinaryArithmeticFloating, TrigAtan2) { -M_PI_2, 0, M_PI)); } +TYPED_TEST(TestUnaryArithmeticFloating, TrigAtanh) { + this->SetNansEqual(true); + this->AssertUnaryOp(Atanh, "[-Inf, Inf, -2, 2]", "[NaN, NaN, NaN, NaN]"); + this->AssertUnaryOp(Atanh, "[-1, 1]", "[-Inf, Inf]"); + for (auto check_overflow : {false, true}) { + this->SetOverflowCheck(check_overflow); + this->AssertUnaryOp(Atanh, "[]", "[]"); + this->AssertUnaryOp(Atanh, "[null, NaN]", "[null, NaN]"); + this->AssertUnaryOp(Atanh, "[0, 0.6]", MakeArray(0, M_LN2)); + } + this->AssertUnaryOpRaises(Atanh, "[-Inf, Inf, -1, 1, -2, 2]", "domain error"); +} + TYPED_TEST(TestUnaryArithmeticIntegral, Trig) { // Integer arguments promoted to double, sanity check here auto atan = [](const Datum& arg, ArithmeticOptions, ExecContext* ctx) { diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc index 1fe26b316362d..b000efd1e028b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc @@ -479,6 +479,43 @@ struct DecimalConversions { static Decimal256 ConvertOutput(Decimal256&& val) { return val; } }; +template +struct DecimalConversions { + static Decimal32 ConvertInput(InDecimal&& val) { return Decimal32(val.low_bits()); } + static Decimal32 ConvertOutput(Decimal32&& val) { return val; } +}; + +template <> +struct DecimalConversions { + // Convert then scale + static Decimal64 ConvertInput(Decimal32&& val) { return Decimal64(val); } + static Decimal64 ConvertOutput(Decimal64&& val) { return val; } +}; + +template <> +struct DecimalConversions { + static Decimal64 ConvertInput(Decimal64&& val) { return val; } + static Decimal64 ConvertOutput(Decimal64&& val) { return val; } +}; + +template <> +struct DecimalConversions { + // Scale then truncate + static Decimal128 ConvertInput(Decimal128&& val) { return val; } + static Decimal64 ConvertOutput(Decimal128&& val) { + return Decimal64(static_cast(val.low_bits())); + } +}; + +template <> +struct DecimalConversions { + // Scale then truncate + static Decimal256 ConvertInput(Decimal256&& val) { return val; } + static Decimal64 ConvertOutput(Decimal256&& val) { + return Decimal64(static_cast(val.low_bits())); + } +}; + template <> struct DecimalConversions { // Scale then truncate @@ -495,6 +532,20 @@ struct DecimalConversions { static Decimal128 ConvertOutput(Decimal128&& val) { return val; } }; +template <> +struct DecimalConversions { + // convert then scale + static Decimal128 ConvertInput(Decimal64&& val) { return Decimal128(val.value()); } + static Decimal128 ConvertOutput(Decimal128&& val) { return val; } +}; + +template <> +struct DecimalConversions { + // convert then scale + static Decimal128 ConvertInput(Decimal32&& val) { return Decimal128(val.value()); } + static Decimal128 ConvertOutput(Decimal128&& val) { return val; } +}; + struct UnsafeUpscaleDecimal { template OutValue Call(KernelContext*, Arg0Value val, Status*) const { @@ -659,6 +710,18 @@ struct DecimalCastFunctor { } }; +template +struct CastFunctor< + Decimal32Type, I, + enable_if_t::value || is_binary_view_like_type::value>> + : public DecimalCastFunctor {}; + +template +struct CastFunctor< + Decimal64Type, I, + enable_if_t::value || is_binary_view_like_type::value>> + : public DecimalCastFunctor {}; + template struct CastFunctor< Decimal128Type, I, @@ -744,6 +807,10 @@ std::shared_ptr GetCastToInteger(std::string name) { // From decimal to integer DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType(Type::DECIMAL)}, out_ty, CastFunctor::Exec)); + DCHECK_OK(func->AddKernel(Type::DECIMAL32, {InputType(Type::DECIMAL32)}, out_ty, + CastFunctor::Exec)); + DCHECK_OK(func->AddKernel(Type::DECIMAL64, {InputType(Type::DECIMAL64)}, out_ty, + CastFunctor::Exec)); DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty, CastFunctor::Exec)); return func; @@ -772,6 +839,10 @@ std::shared_ptr GetCastToFloating(std::string name) { AddCommonNumberCasts(out_ty, func.get()); // From decimal to floating point + DCHECK_OK(func->AddKernel(Type::DECIMAL32, {InputType(Type::DECIMAL32)}, out_ty, + CastFunctor::Exec)); + DCHECK_OK(func->AddKernel(Type::DECIMAL64, {InputType(Type::DECIMAL64)}, out_ty, + CastFunctor::Exec)); DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType(Type::DECIMAL)}, out_ty, CastFunctor::Exec)); DCHECK_OK(func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, out_ty, @@ -780,6 +851,94 @@ std::shared_ptr GetCastToFloating(std::string name) { return func; } +std::shared_ptr GetCastToDecimal32() { + OutputType sig_out_ty(ResolveOutputFromOptions); + + auto func = std::make_shared("cast_decimal32", Type::DECIMAL32); + AddCommonCasts(Type::DECIMAL32, sig_out_ty, func.get()); + + // Cast from floating point + DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty, + CastFunctor::Exec)); + DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty, + CastFunctor::Exec)); + + // Cast from integer + for (const std::shared_ptr& in_ty : IntTypes()) { + auto exec = GenerateInteger(in_ty->id()); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec))); + } + + // Cast from other strings + for (const std::shared_ptr& in_ty : BaseBinaryTypes()) { + auto exec = GenerateVarBinaryBase(in_ty->id()); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec))); + } + for (const std::shared_ptr& in_ty : BinaryViewTypes()) { + auto exec = GenerateVarBinaryViewBase(in_ty->id()); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec))); + } + + // Cast from other decimal + auto exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL32, {InputType(Type::DECIMAL32)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL64, {InputType(Type::DECIMAL64)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, sig_out_ty, exec)); + return func; +} + +std::shared_ptr GetCastToDecimal64() { + OutputType sig_out_ty(ResolveOutputFromOptions); + + auto func = std::make_shared("cast_decimal64", Type::DECIMAL64); + AddCommonCasts(Type::DECIMAL64, sig_out_ty, func.get()); + + // Cast from floating point + DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty, + CastFunctor::Exec)); + DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty, + CastFunctor::Exec)); + + // Cast from integer + for (const std::shared_ptr& in_ty : IntTypes()) { + auto exec = GenerateInteger(in_ty->id()); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec))); + } + + // Cast from other strings + for (const std::shared_ptr& in_ty : BaseBinaryTypes()) { + auto exec = GenerateVarBinaryBase(in_ty->id()); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec))); + } + for (const std::shared_ptr& in_ty : BinaryViewTypes()) { + auto exec = GenerateVarBinaryViewBase(in_ty->id()); + DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec))); + } + + // Cast from other decimal + auto exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL32, {InputType(Type::DECIMAL32)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL64, {InputType(Type::DECIMAL64)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL256)}, sig_out_ty, exec)); + return func; +} + std::shared_ptr GetCastToDecimal128() { OutputType sig_out_ty(ResolveOutputFromOptions); @@ -809,8 +968,14 @@ std::shared_ptr GetCastToDecimal128() { } // Cast from other decimal - auto exec = CastFunctor::Exec; + auto exec = CastFunctor::Exec; // We resolve the output type of this kernel from the CastOptions + DCHECK_OK( + func->AddKernel(Type::DECIMAL32, {InputType(Type::DECIMAL32)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL64, {InputType(Type::DECIMAL64)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; DCHECK_OK( func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec)); exec = CastFunctor::Exec; @@ -848,7 +1013,13 @@ std::shared_ptr GetCastToDecimal256() { } // Cast from other decimal - auto exec = CastFunctor::Exec; + auto exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL32, {InputType(Type::DECIMAL32)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; + DCHECK_OK( + func->AddKernel(Type::DECIMAL64, {InputType(Type::DECIMAL64)}, sig_out_ty, exec)); + exec = CastFunctor::Exec; DCHECK_OK( func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, sig_out_ty, exec)); exec = CastFunctor::Exec; @@ -950,6 +1121,8 @@ std::vector> GetNumericCasts() { auto cast_double = GetCastToFloating("cast_double"); functions.push_back(cast_double); + functions.push_back(GetCastToDecimal32()); + functions.push_back(GetCastToDecimal64()); functions.push_back(GetCastToDecimal128()); functions.push_back(GetCastToDecimal256()); diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index 4edf00225d317..7186612d25a76 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -683,7 +683,8 @@ void AddNumberToStringCasts(CastFunction* func) { template void AddDecimalToStringCasts(CastFunction* func) { auto out_ty = TypeTraits::type_singleton(); - for (const auto& in_tid : std::vector{Type::DECIMAL128, Type::DECIMAL256}) { + for (const auto& in_tid : std::vector{Type::DECIMAL32, Type::DECIMAL64, + Type::DECIMAL128, Type::DECIMAL256}) { DCHECK_OK( func->AddKernel(in_tid, {in_tid}, out_ty, GenerateDecimal(in_tid), diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 33a01425508e0..80d5b3c46cae1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -447,6 +447,159 @@ TEST(Cast, IntToFloating) { CastOptions::Safe(float64())); } +TEST(Cast, Decimal32ToInt) { + auto options = CastOptions::Safe(int32()); + + for (bool allow_int_overflow : {false, true}) { + for (bool allow_decimal_truncate : {false, true}) { + options.allow_int_overflow = allow_int_overflow; + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_overflow_no_truncation = ArrayFromJSON(decimal32(9, 5), R"([ + "02.00000", + "-11.00000", + "22.00000", + "-121.00000", + null])"); + CheckCast(no_overflow_no_truncation, + ArrayFromJSON(int32(), "[2, -11, 22, -121, null]"), options); + } + } + + for (bool allow_int_overflow : {false, true}) { + options.allow_int_overflow = allow_int_overflow; + auto truncation_but_no_overflow = ArrayFromJSON(decimal32(9, 5), R"([ + "02.10000", + "-11.00450", + "22.00045", + "-121.12100", + null])"); + + options.allow_decimal_truncate = true; + CheckCast(truncation_but_no_overflow, + ArrayFromJSON(int32(), "[2, -11, 22, -121, null]"), options); + + options.allow_decimal_truncate = false; + CheckCastFails(truncation_but_no_overflow, options); + } + + for (bool allow_int_overflow : {false, true}) { + for (bool allow_decimal_truncate : {false, true}) { + options.allow_int_overflow = allow_int_overflow; + options.allow_decimal_truncate = allow_decimal_truncate; + + auto overflow_and_truncation = ArrayFromJSON(decimal32(9, 5), R"([ + "1234.00453", + "9999.00344", + null])"); + + if (options.allow_decimal_truncate) { + CheckCast(overflow_and_truncation, ArrayFromJSON(int32(), "[1234, 9999, null]"), + options); + } else { + CheckCastFails(overflow_and_truncation, options); + } + } + } + + Decimal32Builder builder(decimal32(9, -3)); + for (auto d : {Decimal32("12345000."), Decimal32("-12000000.")}) { + ASSERT_OK_AND_ASSIGN(d, d.Rescale(0, -3)); + ASSERT_OK(builder.Append(d)); + } + ASSERT_OK_AND_ASSIGN(auto negative_scale, builder.Finish()); + options.allow_int_overflow = true; + options.allow_decimal_truncate = true; + CheckCast(negative_scale, ArrayFromJSON(int32(), "[12345000, -12000000]"), options); +} + +TEST(Cast, Decimal64ToInt) { + auto options = CastOptions::Safe(int64()); + + for (bool allow_int_overflow : {false, true}) { + for (bool allow_decimal_truncate : {false, true}) { + options.allow_int_overflow = allow_int_overflow; + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_overflow_no_truncation = ArrayFromJSON(decimal64(18, 10), R"([ + "02.0000000000", + "-11.0000000000", + "22.0000000000", + "-121.0000000000", + null])"); + CheckCast(no_overflow_no_truncation, + ArrayFromJSON(int64(), "[2, -11, 22, -121, null]"), options); + } + } + + for (bool allow_int_overflow : {false, true}) { + options.allow_int_overflow = allow_int_overflow; + auto truncation_but_no_overflow = ArrayFromJSON(decimal64(18, 10), R"([ + "02.1000000000", + "-11.0000004500", + "22.0000004500", + "-121.1210000000", + null])"); + + options.allow_decimal_truncate = true; + CheckCast(truncation_but_no_overflow, + ArrayFromJSON(int32(), "[2, -11, 22, -121, null]"), options); + + options.allow_decimal_truncate = false; + CheckCastFails(truncation_but_no_overflow, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto overflow_no_truncation = ArrayFromJSON(decimal64(18, 5), R"([ + "1234567890123.00000", + "9999999999999.00000", + null])"); + + options.allow_int_overflow = true; + CheckCast(overflow_no_truncation, + ArrayFromJSON(int64(), "[1234567890123, 9999999999999, null]"), options); + + options.to_type = int32(); + options.allow_int_overflow = false; + CheckCastFails(overflow_no_truncation, options); + } + + for (bool allow_int_overflow : {false, true}) { + for (bool allow_decimal_truncate : {false, true}) { + options.allow_int_overflow = allow_int_overflow; + options.allow_decimal_truncate = allow_decimal_truncate; + options.to_type = int32(); + + auto overflow_and_truncation = ArrayFromJSON(decimal64(18, 5), R"([ + "1234567890123.45345", + "9999999999999.00344", + null])"); + + if (options.allow_int_overflow && options.allow_decimal_truncate) { + CheckCast(overflow_and_truncation, + ArrayFromJSON(int32(), + // 1234567890123 % 2**32, 9999999999999 % 2**32 + "[1912276171, 1316134911, null]"), + options); + } else { + CheckCastFails(overflow_and_truncation, options); + } + } + } + + Decimal64Builder builder(decimal64(18, -4)); + for (auto d : {Decimal64("1234567890000."), Decimal64("-120000.")}) { + ASSERT_OK_AND_ASSIGN(d, d.Rescale(0, -4)); + ASSERT_OK(builder.Append(d)); + } + ASSERT_OK_AND_ASSIGN(auto negative_scale, builder.Finish()); + options.allow_int_overflow = true; + options.allow_decimal_truncate = true; + CheckCast(negative_scale, ArrayFromJSON(int64(), "[1234567890000, -120000]"), options); +} + TEST(Cast, Decimal128ToInt) { auto options = CastOptions::Safe(int64()); @@ -629,11 +782,14 @@ TEST(Cast, Decimal256ToInt) { } TEST(Cast, IntegerToDecimal) { - for (auto decimal_type : {decimal128(22, 2), decimal256(22, 2)}) { + for (auto decimal_type : + {decimal32(9, 2), decimal64(18, 2), decimal128(22, 2), decimal256(22, 2)}) { for (auto integer_type : kIntegerTypes) { - CheckCast( - ArrayFromJSON(integer_type, "[0, 7, null, 100, 99]"), - ArrayFromJSON(decimal_type, R"(["0.00", "7.00", null, "100.00", "99.00"])")); + if (decimal_type->bit_width() > integer_type->bit_width()) { + CheckCast( + ArrayFromJSON(integer_type, "[0, 7, null, 100, 99]"), + ArrayFromJSON(decimal_type, R"(["0.00", "7.00", null, "100.00", "99.00"])")); + } } } @@ -652,6 +808,12 @@ TEST(Cast, IntegerToDecimal) { { CastOptions options; + options.to_type = decimal32(9, 3); + CheckCastFails(ArrayFromJSON(int32(), "[0]"), options); + + options.to_type = decimal64(18, 3); + CheckCastFails(ArrayFromJSON(int64(), "[0]"), options); + options.to_type = decimal128(5, 3); CheckCastFails(ArrayFromJSON(int8(), "[0]"), options); @@ -660,6 +822,166 @@ TEST(Cast, IntegerToDecimal) { } } +TEST(Cast, Decimal32ToDecimal32) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal32(9, 5), R"([ + "02.00000", + "30.00000", + "22.00000", + "-121.00000", + null])"); + auto expected = ArrayFromJSON(decimal32(9, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + CheckCast(expected, no_truncation, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_5_2 = ArrayFromJSON(decimal32(5, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal32(4, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_5_2, d_4_2, options); + CheckCast(d_4_2, d_5_2, options); + } + + auto d_9_5 = ArrayFromJSON(decimal32(9, 5), R"([ + "-02.12345", + "30.12345", + null])"); + + auto d_6_0 = ArrayFromJSON(decimal32(6, 0), R"([ + "-02.", + "30.", + null])"); + + auto d_9_5_roundtripped = ArrayFromJSON(decimal32(9, 5), R"([ + "-02.00000", + "30.00000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d_9_5, d_6_0, options); + CheckCast(d_6_0, d_9_5_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d_6_0->type(); + CheckCastFails(d_9_5, options); + CheckCast(d_6_0, d_9_5_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d_4_2 = ArrayFromJSON(decimal32(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal32(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal32(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal32(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d_4_2, options); + } +} + +TEST(Cast, Decimal64ToDecimal64) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal64(18, 10), R"([ + "02.0000000000", + "30.0000000000", + "22.0000000000", + "-121.0000000000", + null])"); + auto expected = ArrayFromJSON(decimal64(9, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + CheckCast(expected, no_truncation, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_5_2 = ArrayFromJSON(decimal64(5, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal64(4, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_5_2, d_4_2, options); + CheckCast(d_4_2, d_5_2, options); + } + + auto d_18_10 = ArrayFromJSON(decimal64(18, 10), R"([ + "-02.1234567890", + "30.1234567890", + null])"); + + auto d_12_0 = ArrayFromJSON(decimal64(12, 0), R"([ + "-02.", + "30.", + null])"); + + auto d_18_10_roundtripped = ArrayFromJSON(decimal64(18, 10), R"([ + "-02.0000000000", + "30.0000000000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d_18_10, d_12_0, options); + CheckCast(d_12_0, d_18_10_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d_12_0->type(); + CheckCastFails(d_18_10, options); + CheckCast(d_12_0, d_18_10_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d_4_2 = ArrayFromJSON(decimal64(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal64(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal64(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal64(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d_4_2, options); + } +} + TEST(Cast, Decimal128ToDecimal128) { CastOptions options; @@ -820,19 +1142,19 @@ TEST(Cast, Decimal256ToDecimal256) { } } -TEST(Cast, Decimal128ToDecimal256) { +TEST(Cast, Decimal32ToDecimal64) { CastOptions options; for (bool allow_decimal_truncate : {false, true}) { options.allow_decimal_truncate = allow_decimal_truncate; - auto no_truncation = ArrayFromJSON(decimal128(38, 10), R"([ - "02.0000000000", - "30.0000000000", - "22.0000000000", - "-121.0000000000", + auto no_truncation = ArrayFromJSON(decimal32(9, 5), R"([ + "02.00000", + "30.00000", + "22.00000", + "-121.00000", null])"); - auto expected = ArrayFromJSON(decimal256(48, 0), R"([ + auto expected = ArrayFromJSON(decimal64(16, 0), R"([ "02.", "30.", "22.", @@ -846,47 +1168,731 @@ TEST(Cast, Decimal128ToDecimal256) { options.allow_decimal_truncate = allow_decimal_truncate; // Same scale, different precision - auto d_5_2 = ArrayFromJSON(decimal128(5, 2), R"([ + auto d_5_2 = ArrayFromJSON(decimal32(5, 2), R"([ "12.34", "0.56"])"); - auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([ + auto d_4_2 = ArrayFromJSON(decimal64(4, 2), R"([ "12.34", "0.56"])"); - auto d_40_2 = ArrayFromJSON(decimal256(40, 2), R"([ + auto d_16_2 = ArrayFromJSON(decimal64(16, 2), R"([ "12.34", "0.56"])"); CheckCast(d_5_2, d_4_2, options); - CheckCast(d_5_2, d_40_2, options); + CheckCast(d_5_2, d_16_2, options); } - auto d128_38_10 = ArrayFromJSON(decimal128(38, 10), R"([ - "-02.1234567890", - "30.1234567890", + auto d32_7_5 = ArrayFromJSON(decimal32(7, 5), R"([ + "-02.12345", + "30.12345", null])"); - auto d128_28_0 = ArrayFromJSON(decimal128(28, 0), R"([ + auto d32_9_0 = ArrayFromJSON(decimal32(9, 0), R"([ "-02.", "30.", null])"); - auto d256_28_0 = ArrayFromJSON(decimal256(28, 0), R"([ + auto d64_14_0 = ArrayFromJSON(decimal64(14, 0), R"([ "-02.", "30.", null])"); - auto d256_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([ + auto d64_18_10_roundtripped = ArrayFromJSON(decimal64(18, 10), R"([ "-02.0000000000", "30.0000000000", null])"); // Rescale which leads to truncation options.allow_decimal_truncate = true; - CheckCast(d128_38_10, d256_28_0, options); - CheckCast(d128_28_0, d256_38_10_roundtripped, options); + CheckCast(d32_7_5, d64_14_0, options); + CheckCast(d32_9_0, d64_18_10_roundtripped, options); options.allow_decimal_truncate = false; - options.to_type = d256_28_0->type(); + options.to_type = d64_14_0->type(); + CheckCastFails(d32_7_5, options); + CheckCast(d32_9_0, d64_18_10_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d32_4_2 = ArrayFromJSON(decimal32(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal64(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal64(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal64(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d32_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d32_4_2, options); + } +} + +TEST(Cast, Decimal32ToDecimal128) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal32(9, 5), R"([ + "02.00000", + "30.00000", + "22.00000", + "-121.00000", + null])"); + auto expected = ArrayFromJSON(decimal128(16, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_5_2 = ArrayFromJSON(decimal32(5, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal128(4, 2), R"([ + "12.34", + "0.56"])"); + auto d_16_2 = ArrayFromJSON(decimal128(16, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_5_2, d_4_2, options); + CheckCast(d_5_2, d_16_2, options); + } + + auto d32_7_5 = ArrayFromJSON(decimal32(7, 5), R"([ + "-02.12345", + "30.12345", + null])"); + + auto d32_9_0 = ArrayFromJSON(decimal32(9, 0), R"([ + "-02.", + "30.", + null])"); + + auto d128_14_0 = ArrayFromJSON(decimal128(14, 0), R"([ + "-02.", + "30.", + null])"); + + auto d128_38_10_roundtripped = ArrayFromJSON(decimal128(38, 10), R"([ + "-02.0000000000", + "30.0000000000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d32_7_5, d128_14_0, options); + CheckCast(d32_9_0, d128_38_10_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d128_14_0->type(); + CheckCastFails(d32_7_5, options); + CheckCast(d32_9_0, d128_38_10_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d32_4_2 = ArrayFromJSON(decimal32(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal128(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal128(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal128(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d32_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d32_4_2, options); + } +} + +TEST(Cast, Decimal32ToDecimal256) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal32(9, 5), R"([ + "02.00000", + "30.00000", + "22.00000", + "-121.00000", + null])"); + auto expected = ArrayFromJSON(decimal256(16, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_5_2 = ArrayFromJSON(decimal32(5, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([ + "12.34", + "0.56"])"); + auto d_16_2 = ArrayFromJSON(decimal256(16, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_5_2, d_4_2, options); + CheckCast(d_5_2, d_16_2, options); + } + + auto d32_7_5 = ArrayFromJSON(decimal32(7, 5), R"([ + "-02.12345", + "30.12345", + null])"); + + auto d32_9_0 = ArrayFromJSON(decimal32(9, 0), R"([ + "-02.", + "30.", + null])"); + + auto d256_14_0 = ArrayFromJSON(decimal256(14, 0), R"([ + "-02.", + "30.", + null])"); + + auto d256_76_10_roundtripped = ArrayFromJSON(decimal256(76, 10), R"([ + "-02.0000000000", + "30.0000000000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d32_7_5, d256_14_0, options); + CheckCast(d32_9_0, d256_76_10_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d256_14_0->type(); + CheckCastFails(d32_7_5, options); + CheckCast(d32_9_0, d256_76_10_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d32_4_2 = ArrayFromJSON(decimal32(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d32_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d32_4_2, options); + } +} + +TEST(Cast, Decimal64ToDecimal32) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal64(18, 5), R"([ + "02.00000", + "30.00000", + "22.00000", + "-121.00000", + null])"); + auto expected = ArrayFromJSON(decimal32(9, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_12_2 = ArrayFromJSON(decimal64(12, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal32(4, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_12_2, d_4_2, options); + } + + auto d64_15_10 = ArrayFromJSON(decimal64(15, 5), R"([ + "-02.12345", + "30.12345", + null])"); + + auto d64_12_0 = ArrayFromJSON(decimal64(12, 0), R"([ + "-02.", + "30.", + null])"); + + auto d32_6_0 = ArrayFromJSON(decimal32(6, 0), R"([ + "-02.", + "30.", + null])"); + + auto d32_9_5_roundtripped = ArrayFromJSON(decimal32(9, 5), R"([ + "-02.00000", + "30.00000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d64_15_10, d32_6_0, options); + CheckCast(d64_12_0, d32_9_5_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d32_6_0->type(); + CheckCastFails(d64_15_10, options); + CheckCast(d64_12_0, d32_9_5_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d64_4_2 = ArrayFromJSON(decimal64(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal32(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal32(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal32(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d64_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d64_4_2, options); + } +} + +TEST(Cast, Decimal64ToDecimal128) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal64(18, 10), R"([ + "02.0000000000", + "30.0000000000", + "22.0000000000", + "-121.0000000000", + null])"); + auto expected = ArrayFromJSON(decimal128(28, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_5_2 = ArrayFromJSON(decimal64(5, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal128(4, 2), R"([ + "12.34", + "0.56"])"); + auto d_16_2 = ArrayFromJSON(decimal128(16, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_5_2, d_4_2, options); + CheckCast(d_5_2, d_16_2, options); + } + + auto d64_16_10 = ArrayFromJSON(decimal64(16, 10), R"([ + "-02.1234567890", + "30.1234567890", + null])"); + + auto d64_18_0 = ArrayFromJSON(decimal64(18, 0), R"([ + "-02.", + "30.", + null])"); + + auto d128_14_0 = ArrayFromJSON(decimal128(14, 0), R"([ + "-02.", + "30.", + null])"); + + auto d128_38_10_roundtripped = ArrayFromJSON(decimal128(38, 10), R"([ + "-02.0000000000", + "30.0000000000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d64_16_10, d128_14_0, options); + CheckCast(d64_18_0, d128_38_10_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d128_14_0->type(); + CheckCastFails(d64_16_10, options); + CheckCast(d64_18_0, d128_38_10_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d64_4_2 = ArrayFromJSON(decimal64(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal128(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal128(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal128(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d64_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d64_4_2, options); + } +} + +TEST(Cast, Decimal64ToDecimal256) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal64(18, 10), R"([ + "02.0000000000", + "30.0000000000", + "22.0000000000", + "-121.0000000000", + null])"); + auto expected = ArrayFromJSON(decimal256(16, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_5_2 = ArrayFromJSON(decimal64(5, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([ + "12.34", + "0.56"])"); + auto d_16_2 = ArrayFromJSON(decimal256(16, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_5_2, d_4_2, options); + CheckCast(d_5_2, d_16_2, options); + } + + auto d64_16_10 = ArrayFromJSON(decimal64(16, 10), R"([ + "-02.1234567890", + "30.1234567890", + null])"); + + auto d64_18_0 = ArrayFromJSON(decimal64(18, 0), R"([ + "-02.", + "30.", + null])"); + + auto d256_14_0 = ArrayFromJSON(decimal256(14, 0), R"([ + "-02.", + "30.", + null])"); + + auto d256_76_10_roundtripped = ArrayFromJSON(decimal256(76, 10), R"([ + "-02.0000000000", + "30.0000000000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d64_16_10, d256_14_0, options); + CheckCast(d64_18_0, d256_76_10_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d256_14_0->type(); + CheckCastFails(d64_16_10, options); + CheckCast(d64_18_0, d256_76_10_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d64_4_2 = ArrayFromJSON(decimal64(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d64_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d64_4_2, options); + } +} + +TEST(Cast, Decimal128ToDecimal32) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal128(26, 5), R"([ + "02.00000", + "30.00000", + "22.00000", + "-121.00000", + null])"); + auto expected = ArrayFromJSON(decimal32(9, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_28_2 = ArrayFromJSON(decimal128(28, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal32(4, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_28_2, d_4_2, options); + } + + auto d128_28_5 = ArrayFromJSON(decimal128(28, 5), R"([ + "-02.12345", + "30.12345", + null])"); + + auto d128_22_0 = ArrayFromJSON(decimal128(22, 0), R"([ + "-02.", + "30.", + null])"); + + auto d32_7_0 = ArrayFromJSON(decimal32(7, 0), R"([ + "-02.", + "30.", + null])"); + + auto d32_9_5_roundtripped = ArrayFromJSON(decimal32(9, 5), R"([ + "-02.00000", + "30.00000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d128_28_5, d32_7_0, options); + CheckCast(d128_22_0, d32_9_5_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d32_7_0->type(); + CheckCastFails(d128_28_5, options); + CheckCast(d128_22_0, d32_9_5_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d128_4_2 = ArrayFromJSON(decimal128(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal32(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal32(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal32(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d128_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d128_4_2, options); + } +} + +TEST(Cast, Decimal128ToDecimal64) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal128(26, 10), R"([ + "02.0000000000", + "30.0000000000", + "22.0000000000", + "-121.0000000000", + null])"); + auto expected = ArrayFromJSON(decimal64(15, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_28_2 = ArrayFromJSON(decimal128(28, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal64(4, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_28_2, d_4_2, options); + } + + auto d128_28_10 = ArrayFromJSON(decimal128(28, 10), R"([ + "-02.1234567890", + "30.1234567890", + null])"); + + auto d128_22_0 = ArrayFromJSON(decimal128(22, 0), R"([ + "-02.", + "30.", + null])"); + + auto d64_12_0 = ArrayFromJSON(decimal64(12, 0), R"([ + "-02.", + "30.", + null])"); + + auto d64_18_10_roundtripped = ArrayFromJSON(decimal64(18, 10), R"([ + "-02.0000000000", + "30.0000000000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d128_28_10, d64_12_0, options); + CheckCast(d128_22_0, d64_18_10_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d64_12_0->type(); + CheckCastFails(d128_28_10, options); + CheckCast(d128_22_0, d64_18_10_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d128_4_2 = ArrayFromJSON(decimal128(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal64(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal64(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal64(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d128_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d128_4_2, options); + } +} + +TEST(Cast, Decimal128ToDecimal256) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal128(38, 10), R"([ + "02.0000000000", + "30.0000000000", + "22.0000000000", + "-121.0000000000", + null])"); + auto expected = ArrayFromJSON(decimal256(48, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_5_2 = ArrayFromJSON(decimal128(5, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([ + "12.34", + "0.56"])"); + auto d_40_2 = ArrayFromJSON(decimal256(40, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_5_2, d_4_2, options); + CheckCast(d_5_2, d_40_2, options); + } + + auto d128_38_10 = ArrayFromJSON(decimal128(38, 10), R"([ + "-02.1234567890", + "30.1234567890", + null])"); + + auto d128_28_0 = ArrayFromJSON(decimal128(28, 0), R"([ + "-02.", + "30.", + null])"); + + auto d256_28_0 = ArrayFromJSON(decimal256(28, 0), R"([ + "-02.", + "30.", + null])"); + + auto d256_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([ + "-02.0000000000", + "30.0000000000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d128_38_10, d256_28_0, options); + CheckCast(d128_28_0, d256_38_10_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d256_28_0->type(); CheckCastFails(d128_38_10, options); CheckCast(d128_28_0, d256_38_10_roundtripped, options); @@ -907,6 +1913,172 @@ TEST(Cast, Decimal128ToDecimal256) { } } +TEST(Cast, Decimal256ToDecimal32) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal256(42, 5), R"([ + "02.00000", + "30.00000", + "22.00000", + "-121.00000", + null])"); + auto expected = ArrayFromJSON(decimal32(9, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_28_2 = ArrayFromJSON(decimal256(42, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal32(4, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_28_2, d_4_2, options); + } + + auto d256_52_5 = ArrayFromJSON(decimal256(52, 5), R"([ + "-02.12345", + "30.12345", + null])"); + + auto d256_42_0 = ArrayFromJSON(decimal256(42, 0), R"([ + "-02.", + "30.", + null])"); + + auto d32_7_0 = ArrayFromJSON(decimal32(7, 0), R"([ + "-02.", + "30.", + null])"); + + auto d32_9_5_roundtripped = ArrayFromJSON(decimal32(9, 5), R"([ + "-02.00000", + "30.00000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d256_52_5, d32_7_0, options); + CheckCast(d256_42_0, d32_9_5_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d32_7_0->type(); + CheckCastFails(d256_52_5, options); + CheckCast(d256_42_0, d32_9_5_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d256_4_2 = ArrayFromJSON(decimal256(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal32(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal32(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal32(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d256_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d256_4_2, options); + } +} + +TEST(Cast, Decimal256ToDecimal64) { + CastOptions options; + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + auto no_truncation = ArrayFromJSON(decimal256(42, 10), R"([ + "02.0000000000", + "30.0000000000", + "22.0000000000", + "-121.0000000000", + null])"); + auto expected = ArrayFromJSON(decimal64(15, 0), R"([ + "02.", + "30.", + "22.", + "-121.", + null])"); + + CheckCast(no_truncation, expected, options); + } + + for (bool allow_decimal_truncate : {false, true}) { + options.allow_decimal_truncate = allow_decimal_truncate; + + // Same scale, different precision + auto d_42_2 = ArrayFromJSON(decimal256(42, 2), R"([ + "12.34", + "0.56"])"); + auto d_4_2 = ArrayFromJSON(decimal64(4, 2), R"([ + "12.34", + "0.56"])"); + + CheckCast(d_42_2, d_4_2, options); + } + + auto d256_52_10 = ArrayFromJSON(decimal256(52, 10), R"([ + "-02.1234567890", + "30.1234567890", + null])"); + + auto d256_42_0 = ArrayFromJSON(decimal256(42, 0), R"([ + "-02.", + "30.", + null])"); + + auto d64_12_0 = ArrayFromJSON(decimal64(12, 0), R"([ + "-02.", + "30.", + null])"); + + auto d64_18_10_roundtripped = ArrayFromJSON(decimal64(18, 10), R"([ + "-02.0000000000", + "30.0000000000", + null])"); + + // Rescale which leads to truncation + options.allow_decimal_truncate = true; + CheckCast(d256_52_10, d64_12_0, options); + CheckCast(d256_42_0, d64_18_10_roundtripped, options); + + options.allow_decimal_truncate = false; + options.to_type = d64_12_0->type(); + CheckCastFails(d256_52_10, options); + CheckCast(d256_42_0, d64_18_10_roundtripped, options); + + // Precision loss without rescale leads to truncation + auto d256_4_2 = ArrayFromJSON(decimal256(4, 2), R"(["12.34"])"); + for (auto expected : { + ArrayFromJSON(decimal64(3, 2), R"(["12.34"])"), + ArrayFromJSON(decimal64(4, 3), R"(["12.340"])"), + ArrayFromJSON(decimal64(2, 1), R"(["12.3"])"), + }) { + options.allow_decimal_truncate = true; + ASSERT_OK_AND_ASSIGN(auto invalid, Cast(d256_4_2, expected->type(), options)); + ASSERT_RAISES(Invalid, invalid.make_array()->ValidateFull()); + + options.allow_decimal_truncate = false; + options.to_type = expected->type(); + CheckCastFails(d256_4_2, options); + } +} + TEST(Cast, Decimal256ToDecimal128) { CastOptions options; @@ -992,7 +2164,8 @@ TEST(Cast, Decimal256ToDecimal128) { TEST(Cast, FloatingToDecimal) { for (auto float_type : {float32(), float64()}) { - for (auto decimal_type : {decimal128(5, 2), decimal256(5, 2)}) { + for (auto decimal_type : + {decimal32(5, 2), decimal64(5, 2), decimal128(5, 2), decimal256(5, 2)}) { CheckCast( ArrayFromJSON(float_type, "[0.0, null, 123.45, 123.456, 999.994]"), ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "123.46", "999.99"])")); @@ -1036,7 +2209,8 @@ TEST(Cast, FloatingToDecimal) { TEST(Cast, DecimalToFloating) { for (auto float_type : {float32(), float64()}) { - for (auto decimal_type : {decimal128(5, 2), decimal256(5, 2)}) { + for (auto decimal_type : + {decimal32(5, 2), decimal64(5, 2), decimal128(5, 2), decimal256(5, 2)}) { CheckCast(ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "999.99"])"), ArrayFromJSON(float_type, "[0.0, null, 123.45, 999.99]")); } @@ -1048,7 +2222,8 @@ TEST(Cast, DecimalToFloating) { TEST(Cast, DecimalToString) { for (auto string_type : {utf8(), utf8_view(), large_utf8()}) { - for (auto decimal_type : {decimal128(5, 2), decimal256(5, 2)}) { + for (auto decimal_type : + {decimal32(5, 2), decimal64(5, 2), decimal128(5, 2), decimal256(5, 2)}) { CheckCast(ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "999.99"])"), ArrayFromJSON(string_type, R"(["0.00", null, "123.45", "999.99"])")); } @@ -1960,7 +3135,8 @@ TEST(Cast, StringToFloating) { TEST(Cast, StringToDecimal) { for (auto string_type : {utf8(), large_utf8()}) { - for (auto decimal_type : {decimal128(5, 2), decimal256(5, 2)}) { + for (auto decimal_type : + {decimal32(5, 2), decimal64(5, 2), decimal128(5, 2), decimal256(5, 2)}) { auto strings = ArrayFromJSON(string_type, R"(["0.01", null, "127.32", "200.43", "0.54"])"); auto decimals = diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h index 5adb06e540009..60f1a6a21e264 100644 --- a/cpp/src/arrow/compute/light_array_internal.h +++ b/cpp/src/arrow/compute/light_array_internal.h @@ -319,6 +319,9 @@ class ARROW_EXPORT ResizableArrayData { /// \brief The current length (in rows) of the array int num_rows() const { return num_rows_; } + /// \brief The current allocated length (in rows) of the array + int num_rows_allocated() const { return num_rows_allocated_; } + /// \brief A non-owning view into this array KeyColumnArray column_array() const; @@ -347,6 +350,11 @@ class ARROW_EXPORT ResizableArrayData { /// length binary data uint8_t* mutable_data(int i) { return buffers_[i]->mutable_data(); } + template + T* mutable_data_as(int i) { + return reinterpret_cast(mutable_data(i)); + } + private: static constexpr int64_t kNumPaddingBytes = 64; int log_num_rows_min_; diff --git a/cpp/src/arrow/csv/writer.cc b/cpp/src/arrow/csv/writer.cc index 4b5252076af53..5513007aff627 100644 --- a/cpp/src/arrow/csv/writer.cc +++ b/cpp/src/arrow/csv/writer.cc @@ -22,7 +22,6 @@ #include "arrow/ipc/writer.h" #include "arrow/record_batch.h" #include "arrow/result.h" -#include "arrow/result_internal.h" #include "arrow/stl_allocator.h" #include "arrow/util/iterator.h" #include "arrow/util/logging.h" @@ -129,15 +128,15 @@ class ColumnPopulator { // threading overhead would not be justified. ctx.set_use_threads(false); if (data.type() && is_large_binary_like(data.type()->id())) { - ASSIGN_OR_RAISE(array_, compute::Cast(data, /*to_type=*/large_utf8(), - compute::CastOptions(), &ctx)); + ARROW_ASSIGN_OR_RAISE(array_, compute::Cast(data, /*to_type=*/large_utf8(), + compute::CastOptions(), &ctx)); } else { auto casted = compute::Cast(data, /*to_type=*/utf8(), compute::CastOptions(), &ctx); if (casted.ok()) { array_ = std::move(casted).ValueOrDie(); } else if (casted.status().IsCapacityError()) { - ASSIGN_OR_RAISE(array_, compute::Cast(data, /*to_type=*/large_utf8(), - compute::CastOptions(), &ctx)); + ARROW_ASSIGN_OR_RAISE(array_, compute::Cast(data, /*to_type=*/large_utf8(), + compute::CastOptions(), &ctx)); } else { return casted.status(); } @@ -501,8 +500,8 @@ class CSVWriterImpl : public ipc::RecordBatchWriter { return Status::Invalid("Null string cannot contain quotes."); } - ASSIGN_OR_RAISE(std::shared_ptr null_string, - arrow::AllocateBuffer(options.null_string.length())); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr null_string, + arrow::AllocateBuffer(options.null_string.length())); memcpy(null_string->mutable_data(), options.null_string.data(), options.null_string.length()); @@ -511,7 +510,7 @@ class CSVWriterImpl : public ipc::RecordBatchWriter { for (int col = 0; col < schema->num_fields(); col++) { const std::string& end_chars = col < schema->num_fields() - 1 ? delimiter : options.eol; - ASSIGN_OR_RAISE( + ARROW_ASSIGN_OR_RAISE( populators[col], MakePopulator(*schema->field(col), end_chars, options.delimiter, null_string, options.quoting_style, options.io_context.pool())); @@ -528,7 +527,7 @@ class CSVWriterImpl : public ipc::RecordBatchWriter { Status WriteRecordBatch(const RecordBatch& batch) override { RecordBatchIterator iterator = RecordBatchSliceIterator(batch, options_.batch_size); for (auto maybe_slice : iterator) { - ASSIGN_OR_RAISE(std::shared_ptr slice, maybe_slice); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr slice, maybe_slice); RETURN_NOT_OK(TranslateMinimalBatch(*slice)); RETURN_NOT_OK(sink_->Write(data_buffer_)); stats_.num_record_batches++; @@ -570,10 +569,11 @@ class CSVWriterImpl : public ipc::RecordBatchWriter { Status PrepareForContentsWrite() { // Only called once, as part of initialization if (data_buffer_ == nullptr) { - ASSIGN_OR_RAISE(data_buffer_, - AllocateResizableBuffer( - options_.batch_size * schema_->num_fields() * kColumnSizeGuess, - options_.io_context.pool())); + ARROW_ASSIGN_OR_RAISE( + data_buffer_, + AllocateResizableBuffer( + options_.batch_size * schema_->num_fields() * kColumnSizeGuess, + options_.io_context.pool())); } return Status::OK(); } @@ -665,24 +665,24 @@ class CSVWriterImpl : public ipc::RecordBatchWriter { Status WriteCSV(const Table& table, const WriteOptions& options, arrow::io::OutputStream* output) { - ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, table.schema(), options)); + ARROW_ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, table.schema(), options)); RETURN_NOT_OK(writer->WriteTable(table)); return writer->Close(); } Status WriteCSV(const RecordBatch& batch, const WriteOptions& options, arrow::io::OutputStream* output) { - ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, batch.schema(), options)); + ARROW_ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, batch.schema(), options)); RETURN_NOT_OK(writer->WriteRecordBatch(batch)); return writer->Close(); } Status WriteCSV(const std::shared_ptr& reader, const WriteOptions& options, arrow::io::OutputStream* output) { - ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, reader->schema(), options)); + ARROW_ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(output, reader->schema(), options)); std::shared_ptr batch; while (true) { - ASSIGN_OR_RAISE(batch, reader->Next()); + ARROW_ASSIGN_OR_RAISE(batch, reader->Next()); if (batch == nullptr) break; RETURN_NOT_OK(writer->WriteRecordBatch(*batch)); } diff --git a/cpp/src/arrow/csv/writer_test.cc b/cpp/src/arrow/csv/writer_test.cc index 703179da94093..4fccf4ddbbb48 100644 --- a/cpp/src/arrow/csv/writer_test.cc +++ b/cpp/src/arrow/csv/writer_test.cc @@ -27,7 +27,7 @@ #include "arrow/io/memory.h" #include "arrow/ipc/writer.h" #include "arrow/record_batch.h" -#include "arrow/result_internal.h" +#include "arrow/result.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/type.h" @@ -287,19 +287,19 @@ class TestWriteCSV : public ::testing::TestWithParam { template Result ToCsvString(const Data& data, const WriteOptions& options) { std::shared_ptr out; - ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create()); + ARROW_ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create()); RETURN_NOT_OK(WriteCSV(data, options, out.get())); - ASSIGN_OR_RAISE(std::shared_ptr buffer, out->Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr buffer, out->Finish()); return std::string(reinterpret_cast(buffer->data()), buffer->size()); } Result ToCsvStringUsingWriter(const Table& data, const WriteOptions& options) { std::shared_ptr out; - ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create()); + ARROW_ASSIGN_OR_RAISE(out, io::BufferOutputStream::Create()); // Write row-by-row - ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(out, data.schema(), options)); + ARROW_ASSIGN_OR_RAISE(auto writer, MakeCSVWriter(out, data.schema(), options)); TableBatchReader reader(data); reader.set_chunksize(1); std::shared_ptr batch; @@ -310,7 +310,7 @@ class TestWriteCSV : public ::testing::TestWithParam { } RETURN_NOT_OK(writer->Close()); EXPECT_EQ(data.num_rows(), writer->stats().num_record_batches); - ASSIGN_OR_RAISE(std::shared_ptr buffer, out->Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr buffer, out->Finish()); return std::string(reinterpret_cast(buffer->data()), buffer->size()); } }; diff --git a/cpp/src/arrow/dataset/ArrowDatasetConfig.cmake.in b/cpp/src/arrow/dataset/ArrowDatasetConfig.cmake.in index 66b0302cbca80..4573ac3718557 100644 --- a/cpp/src/arrow/dataset/ArrowDatasetConfig.cmake.in +++ b/cpp/src/arrow/dataset/ArrowDatasetConfig.cmake.in @@ -26,10 +26,12 @@ @PACKAGE_INIT@ +set(ARROW_DATASET_REQUIRED_DEPENDENCIES "@ARROW_DATASET_REQUIRED_DEPENDENCIES@") + include(CMakeFindDependencyMacro) -find_dependency(Arrow) -find_dependency(ArrowAcero) -find_dependency(Parquet) +foreach(dependency ${ARROW_DATASET_REQUIRED_DEPENDENCIES}) + find_dependency(${dependency}) +endforeach() include("${CMAKE_CURRENT_LIST_DIR}/ArrowDatasetTargets.cmake") diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt index e48bcfaf65bcb..bdb89ee8914f8 100644 --- a/cpp/src/arrow/dataset/CMakeLists.txt +++ b/cpp/src/arrow/dataset/CMakeLists.txt @@ -32,8 +32,10 @@ set(ARROW_DATASET_SRCS scan_node.cc) set(ARROW_DATASET_PKG_CONFIG_REQUIRES "arrow-acero") +set(ARROW_DATASET_REQUIRED_DEPENDENCIES Arrow ArrowAcero) if(ARROW_PARQUET) string(APPEND ARROW_DATASET_PKG_CONFIG_REQUIRES " parquet") + list(APPEND ARROW_DATASET_REQUIRED_DEPENDENCIES Parquet) endif() set(ARROW_DATASET_STATIC_LINK_LIBS) diff --git a/cpp/src/arrow/engine/substrait/expression_internal.h b/cpp/src/arrow/engine/substrait/expression_internal.h index 2ce2ee76af20b..a9f8949c23249 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.h +++ b/cpp/src/arrow/engine/substrait/expression_internal.h @@ -27,8 +27,12 @@ #include "arrow/engine/substrait/type_fwd.h" #include "arrow/engine/substrait/visibility.h" #include "arrow/result.h" +#include "arrow/util/macros.h" +// GH-44954: silence [[deprecated]] declarations in protobuf-generated code +ARROW_SUPPRESS_DEPRECATION_WARNING #include "substrait/algebra.pb.h" // IWYU pragma: export +ARROW_UNSUPPRESS_DEPRECATION_WARNING namespace arrow { namespace engine { diff --git a/cpp/src/arrow/engine/substrait/extended_expression_internal.h b/cpp/src/arrow/engine/substrait/extended_expression_internal.h index 81bc4b8745186..45f89c8610b5a 100644 --- a/cpp/src/arrow/engine/substrait/extended_expression_internal.h +++ b/cpp/src/arrow/engine/substrait/extended_expression_internal.h @@ -28,8 +28,12 @@ #include "arrow/engine/substrait/visibility.h" #include "arrow/result.h" #include "arrow/status.h" +#include "arrow/util/macros.h" +// GH-44954: silence [[deprecated]] declarations in protobuf-generated code +ARROW_SUPPRESS_DEPRECATION_WARNING #include "substrait/extended_expression.pb.h" // IWYU pragma: export +ARROW_UNSUPPRESS_DEPRECATION_WARNING namespace arrow { namespace engine { diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index cefe53d2847ca..ac25eba684bca 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -1071,7 +1071,8 @@ struct DefaultExtensionIdRegistry : ExtensionIdRegistryImpl { // Mappings either without a _checked variant or substrait has no overflow option for (const auto& function_name : - {"exp", "sign", "cos", "sin", "tan", "acos", "asin", "atan", "atan2"}) { + {"exp", "sign", "cos", "cosh", "sin", "sinh", "tan", "tanh", "acos", "acosh", + "asin", "asinh", "atan", "atanh", "atan2"}) { DCHECK_OK( AddSubstraitCallToArrow({kSubstraitArithmeticFunctionsUri, function_name}, DecodeOptionlessUncheckedArithmetic(function_name))); @@ -1207,7 +1208,13 @@ struct DefaultExtensionIdRegistry : ExtensionIdRegistryImpl { {kSubstraitArithmeticFunctionsUri, "acos"}, {kSubstraitArithmeticFunctionsUri, "asin"}, {kSubstraitArithmeticFunctionsUri, "atan"}, - {kSubstraitArithmeticFunctionsUri, "atan2"}}) { + {kSubstraitArithmeticFunctionsUri, "atan2"}, + {kSubstraitArithmeticFunctionsUri, "cosh"}, + {kSubstraitArithmeticFunctionsUri, "sinh"}, + {kSubstraitArithmeticFunctionsUri, "tanh"}, + {kSubstraitArithmeticFunctionsUri, "acosh"}, + {kSubstraitArithmeticFunctionsUri, "asinh"}, + {kSubstraitArithmeticFunctionsUri, "atanh"}}) { Id fn_id{fn_pair.first, fn_pair.second}; DCHECK_OK(AddArrowToSubstraitCall(std::string(fn_pair.second), EncodeBasic(fn_id))); } diff --git a/cpp/src/arrow/engine/substrait/plan_internal.h b/cpp/src/arrow/engine/substrait/plan_internal.h index 737e65b7e2e74..f2e7ded5f01e9 100644 --- a/cpp/src/arrow/engine/substrait/plan_internal.h +++ b/cpp/src/arrow/engine/substrait/plan_internal.h @@ -27,8 +27,12 @@ #include "arrow/engine/substrait/visibility.h" #include "arrow/result.h" #include "arrow/status.h" +#include "arrow/util/macros.h" +// GH-44954: silence [[deprecated]] declarations in protobuf-generated code +ARROW_SUPPRESS_DEPRECATION_WARNING #include "substrait/plan.pb.h" // IWYU pragma: export +ARROW_UNSUPPRESS_DEPRECATION_WARNING namespace arrow { namespace engine { diff --git a/cpp/src/arrow/engine/substrait/relation_internal.h b/cpp/src/arrow/engine/substrait/relation_internal.h index a436f1770d78b..2a96d0024e693 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.h +++ b/cpp/src/arrow/engine/substrait/relation_internal.h @@ -28,8 +28,12 @@ #include "arrow/engine/substrait/type_fwd.h" #include "arrow/engine/substrait/visibility.h" #include "arrow/result.h" +#include "arrow/util/macros.h" +// GH-44954: silence [[deprecated]] declarations in protobuf-generated code +ARROW_SUPPRESS_DEPRECATION_WARNING #include "substrait/algebra.pb.h" // IWYU pragma: export +ARROW_UNSUPPRESS_DEPRECATION_WARNING namespace arrow { namespace engine { diff --git a/cpp/src/arrow/engine/substrait/test_plan_builder.cc b/cpp/src/arrow/engine/substrait/test_plan_builder.cc index f38f7ece9a751..724c58277e725 100644 --- a/cpp/src/arrow/engine/substrait/test_plan_builder.cc +++ b/cpp/src/arrow/engine/substrait/test_plan_builder.cc @@ -31,8 +31,12 @@ #include "arrow/status.h" #include "arrow/table.h" #include "arrow/type_fwd.h" +#include "arrow/util/macros.h" -#include "substrait/algebra.pb.h" +// GH-44954: silence [[deprecated]] declarations in protobuf-generated code +ARROW_SUPPRESS_DEPRECATION_WARNING +#include "substrait/algebra.pb.h" // IWYU pragma: export +ARROW_UNSUPPRESS_DEPRECATION_WARNING namespace arrow { namespace engine { diff --git a/cpp/src/arrow/engine/substrait/util_internal.h b/cpp/src/arrow/engine/substrait/util_internal.h index 627ad1126df6e..d812bbf7b85ff 100644 --- a/cpp/src/arrow/engine/substrait/util_internal.h +++ b/cpp/src/arrow/engine/substrait/util_internal.h @@ -24,10 +24,14 @@ #include "arrow/engine/substrait/visibility.h" #include "arrow/result.h" #include "arrow/util/hashing.h" +#include "arrow/util/macros.h" #include "arrow/util/unreachable.h" +// GH-44954: silence [[deprecated]] declarations in protobuf-generated code +ARROW_SUPPRESS_DEPRECATION_WARNING #include "substrait/algebra.pb.h" // IWYU pragma: export #include "substrait/plan.pb.h" // IWYU pragma: export +ARROW_UNSUPPRESS_DEPRECATION_WARNING namespace arrow { namespace engine { diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 78f4ad1edd9a9..4638bb12c783c 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -106,6 +106,18 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { std::string tenant_id; std::string client_id; std::string client_secret; + + // These query parameters are the union of the following docs: + // https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas#specify-the-account-sas-parameters + // https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas#construct-a-service-sas + // (excluding parameters for table storage only) + // https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas#construct-a-user-delegation-sas + static const std::set sas_token_query_parameters = { + "sv", "ss", "sr", "st", "se", "sp", "si", "sip", "spr", + "skoid", "sktid", "srt", "skt", "ske", "skv", "sks", "saoid", "suoid", + "scid", "sdd", "ses", "sig", "rscc", "rscd", "rsce", "rscl", "rsct", + }; + ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items()); for (const auto& kv : options_items) { if (kv.first == "blob_storage_authority") { @@ -147,6 +159,9 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { } else if (kv.first == "background_writes") { ARROW_ASSIGN_OR_RAISE(background_writes, ::arrow::internal::ParseBoolean(kv.second)); + } else if (sas_token_query_parameters.find(kv.first) != + sas_token_query_parameters.end()) { + credential_kind = CredentialKind::kSASToken; } else { return Status::Invalid( "Unexpected query parameter in Azure Blob File System URI: '", kv.first, "'"); @@ -180,6 +195,13 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { case CredentialKind::kEnvironment: RETURN_NOT_OK(ConfigureEnvironmentCredential()); break; + case CredentialKind::kSASToken: + // Reconstructing the SAS token without the other URI query parameters is awkward + // because some parts are URI escaped and some parts are not. Instead we just + // pass through the entire query string and Azure ignores the extra query + // parameters. + RETURN_NOT_OK(ConfigureSASCredential("?" + uri.query_string())); + break; default: // Default credential break; @@ -225,7 +247,6 @@ Result AzureOptions::FromUri(const std::string& uri_string, } bool AzureOptions::Equals(const AzureOptions& other) const { - // TODO(GH-38598): update here when more auth methods are added. const bool equals = blob_storage_authority == other.blob_storage_authority && dfs_storage_authority == other.dfs_storage_authority && blob_storage_scheme == other.blob_storage_scheme && @@ -243,6 +264,8 @@ bool AzureOptions::Equals(const AzureOptions& other) const { case CredentialKind::kStorageSharedKey: return storage_shared_key_credential_->AccountName == other.storage_shared_key_credential_->AccountName; + case CredentialKind::kSASToken: + return sas_token_ == other.sas_token_; case CredentialKind::kClientSecret: case CredentialKind::kCLI: case CredentialKind::kManagedIdentity: @@ -311,6 +334,15 @@ Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_ke return Status::OK(); } +Status AzureOptions::ConfigureSASCredential(const std::string& sas_token) { + credential_kind_ = CredentialKind::kSASToken; + if (account_name.empty()) { + return Status::Invalid("AzureOptions doesn't contain a valid account name"); + } + sas_token_ = sas_token; + return Status::OK(); +} + Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id, const std::string& client_id, const std::string& client_secret) { @@ -372,6 +404,9 @@ Result> AzureOptions::MakeBlobServiceC case CredentialKind::kStorageSharedKey: return std::make_unique(AccountBlobUrl(account_name), storage_shared_key_credential_); + case CredentialKind::kSASToken: + return std::make_unique(AccountBlobUrl(account_name) + + sas_token_); } return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); } @@ -404,29 +439,13 @@ AzureOptions::MakeDataLakeServiceClient() const { case CredentialKind::kStorageSharedKey: return std::make_unique( AccountDfsUrl(account_name), storage_shared_key_credential_); + case CredentialKind::kSASToken: + return std::make_unique( + AccountBlobUrl(account_name) + sas_token_); } return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); } -Result AzureOptions::GenerateSASToken( - Storage::Sas::BlobSasBuilder* builder, Blobs::BlobServiceClient* client) const { - using SasProtocol = Storage::Sas::SasProtocol; - builder->Protocol = - blob_storage_scheme == "http" ? SasProtocol::HttpsAndHttp : SasProtocol::HttpsOnly; - if (storage_shared_key_credential_) { - return builder->GenerateSasToken(*storage_shared_key_credential_); - } else { - // GH-39344: This part isn't tested. This may not work. - try { - auto delegation_key_response = client->GetUserDelegationKey(builder->ExpiresOn); - return builder->GenerateSasToken(delegation_key_response.Value, account_name); - } catch (const Storage::StorageException& exception) { - return ExceptionToStatus(exception, "GetUserDelegationKey failed for '", - client->GetUrl(), "'."); - } - } -} - namespace { // An AzureFileSystem represents an Azure storage account. An AzureLocation describes a @@ -3161,19 +3180,7 @@ class AzureFileSystem::Impl { if (src == dest) { return Status::OK(); } - std::string sas_token; - { - Storage::Sas::BlobSasBuilder builder; - std::chrono::seconds available_period(60); - builder.ExpiresOn = std::chrono::system_clock::now() + available_period; - builder.BlobContainerName = src.container; - builder.BlobName = src.path; - builder.Resource = Storage::Sas::BlobSasResource::Blob; - builder.SetPermissions(Storage::Sas::BlobSasPermissions::Read); - ARROW_ASSIGN_OR_RAISE( - sas_token, options_.GenerateSASToken(&builder, blob_service_client_.get())); - } - auto src_url = GetBlobClient(src.container, src.path).GetUrl() + sas_token; + auto src_url = GetBlobClient(src.container, src.path).GetUrl(); auto dest_blob_client = GetBlobClient(dest.container, dest.path); if (!dest.path.empty()) { auto dest_parent = dest.parent(); @@ -3186,9 +3193,21 @@ class AzureFileSystem::Impl { } } try { - dest_blob_client.CopyFromUri(src_url); + // We use StartCopyFromUri instead of CopyFromUri because it supports blobs larger + // than 256 MiB and it doesn't require generating a SAS token to authenticate + // reading a source blob in the same storage account. + auto copy_operation = dest_blob_client.StartCopyFromUri(src_url); + // For large blobs, the copy operation may be slow so we need to poll until it + // completes. We use a polling interval of 1 second. + copy_operation.PollUntilDone(std::chrono::milliseconds(1000)); } catch (const Storage::StorageException& exception) { - return ExceptionToStatus(exception, "Failed to copy a blob. (", src_url, " -> ", + // StartCopyFromUri failed or a GetProperties call inside PollUntilDone failed. + return ExceptionToStatus( + exception, "Failed to start blob copy or poll status of ongoing copy. (", + src_url, " -> ", dest_blob_client.GetUrl(), ")"); + } catch (const Azure::Core::RequestFailedException& exception) { + // A GetProperties call inside PollUntilDone returned a failed CopyStatus. + return ExceptionToStatus(exception, "Failed to copy blob. (", src_url, " -> ", dest_blob_client.GetUrl(), ")"); } return Status::OK(); diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index c5e5091256959..ee0956afdd7a9 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -37,10 +37,6 @@ namespace Azure::Storage::Blobs { class BlobServiceClient; } -namespace Azure::Storage::Sas { -struct BlobSasBuilder; -} - namespace Azure::Storage::Files::DataLake { class DataLakeFileSystemClient; class DataLakeServiceClient; @@ -120,6 +116,7 @@ struct ARROW_EXPORT AzureOptions { kDefault, kAnonymous, kStorageSharedKey, + kSASToken, kClientSecret, kManagedIdentity, kCLI, @@ -129,6 +126,7 @@ struct ARROW_EXPORT AzureOptions { std::shared_ptr storage_shared_key_credential_; + std::string sas_token_; mutable std::shared_ptr token_credential_; public: @@ -180,6 +178,9 @@ struct ARROW_EXPORT AzureOptions { /// AzureOptions::ConfigureClientSecretCredential() is called. /// * client_secret: You must specify "tenant_id" and "client_id" /// too. AzureOptions::ConfigureClientSecretCredential() is called. + /// * A SAS token is made up of several query parameters. Appending a SAS + /// token to the URI configures SAS token auth by calling + /// AzureOptions::ConfigureSASCredential(). /// /// [1]: /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri @@ -189,6 +190,7 @@ struct ARROW_EXPORT AzureOptions { Status ConfigureDefaultCredential(); Status ConfigureAnonymousCredential(); Status ConfigureAccountKeyCredential(const std::string& account_key); + Status ConfigureSASCredential(const std::string& sas_token); Status ConfigureClientSecretCredential(const std::string& tenant_id, const std::string& client_id, const std::string& client_secret); @@ -207,10 +209,6 @@ struct ARROW_EXPORT AzureOptions { Result> MakeDataLakeServiceClient() const; - - Result GenerateSASToken( - Azure::Storage::Sas::BlobSasBuilder* builder, - Azure::Storage::Blobs::BlobServiceClient* client) const; }; /// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index a04977bdee076..7c1d450051901 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -387,6 +387,30 @@ class TestGeneric : public ::testing::Test, public GenericFileSystemTest { // builddir/main/../../threads.c:580:10 #2 0x7fa914b1cd1e in xmlGetGlobalState // builddir/main/../../threads.c:666:31 bool have_false_positive_memory_leak_with_generator() const override { return true; } + // This false positive leak is similar to the one pinpointed in the + // have_false_positive_memory_leak_with_generator() comments above, + // though the stack trace is different. It happens when a block list + // is committed from a background thread. + // + // clang-format off + // Direct leak of 968 byte(s) in 1 object(s) allocated from: + // #0 calloc + // #1 (/lib/x86_64-linux-gnu/libxml2.so.2+0xe25a4) + // #2 __xmlDefaultBufferSize + // #3 xmlBufferCreate + // #4 Azure::Storage::_internal::XmlWriter::XmlWriter() + // #5 Azure::Storage::Blobs::_detail::BlockBlobClient::CommitBlockList + // #6 Azure::Storage::Blobs::BlockBlobClient::CommitBlockList + // #7 arrow::fs::(anonymous namespace)::CommitBlockList + // #8 arrow::fs::(anonymous namespace)::ObjectAppendStream::FlushAsync()::'lambda' + // clang-format on + // + // TODO perhaps remove this skip once we can rely on + // https://github.com/Azure/azure-sdk-for-cpp/pull/5767 + // + // Also note that ClickHouse has a workaround for a similar issue: + // https://github.com/ClickHouse/ClickHouse/pull/45796 + bool have_false_positive_memory_leak_with_async_close() const override { return true; } BaseAzureEnv* env_; std::shared_ptr azure_fs_; @@ -690,6 +714,36 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kEnvironment); } + void TestFromUriCredentialSASToken() { + const std::string sas_token = + "?se=2024-12-12T18:57:47Z&sig=pAs7qEBdI6sjUhqX1nrhNAKsTY%2B1SqLxPK%" + "2BbAxLiopw%3D&sp=racwdxylti&spr=https,http&sr=c&sv=2024-08-04"; + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri( + "abfs://file_system@account.dfs.core.windows.net/" + sas_token, nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kSASToken); + ASSERT_EQ(options.sas_token_, sas_token); + } + + void TestFromUriCredentialSASTokenWithOtherParameters() { + const std::string uri_query_string = + "?enable_tls=false&se=2024-12-12T18:57:47Z&sig=pAs7qEBdI6sjUhqX1nrhNAKsTY%" + "2B1SqLxPK%" + "2BbAxLiopw%3D&sp=racwdxylti&spr=https,http&sr=c&sv=2024-08-04"; + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri( + "abfs://account@127.0.0.1:10000/container/dir/blob" + uri_query_string, + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kSASToken); + ASSERT_EQ(options.sas_token_, uri_query_string); + ASSERT_EQ(options.blob_storage_authority, "127.0.0.1:10000"); + ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000"); + ASSERT_EQ(options.blob_storage_scheme, "http"); + ASSERT_EQ(options.dfs_storage_scheme, "http"); + } + void TestFromUriCredentialInvalid() { ASSERT_RAISES(Invalid, AzureOptions::FromUri( "abfs://file_system@account.dfs.core.windows.net/dir/file?" @@ -777,6 +831,10 @@ TEST_F(TestAzureOptions, FromUriCredentialWorkloadIdentity) { TEST_F(TestAzureOptions, FromUriCredentialEnvironment) { TestFromUriCredentialEnvironment(); } +TEST_F(TestAzureOptions, FromUriCredentialSASToken) { TestFromUriCredentialSASToken(); } +TEST_F(TestAzureOptions, FromUriCredentialSASTokenWithOtherParameters) { + TestFromUriCredentialSASTokenWithOtherParameters(); +} TEST_F(TestAzureOptions, FromUriCredentialInvalid) { TestFromUriCredentialInvalid(); } TEST_F(TestAzureOptions, FromUriBlobStorageAuthority) { TestFromUriBlobStorageAuthority(); @@ -912,6 +970,20 @@ class TestAzureFileSystem : public ::testing::Test { .Value; } + Result GetContainerSASToken( + const std::string& container_name, + Azure::Storage::StorageSharedKeyCredential storage_shared_key_credential) { + std::string sas_token; + Azure::Storage::Sas::BlobSasBuilder builder; + std::chrono::seconds available_period(60); + builder.ExpiresOn = std::chrono::system_clock::now() + available_period; + builder.BlobContainerName = container_name; + builder.Resource = Azure::Storage::Sas::BlobSasResource::BlobContainer; + builder.SetPermissions(Azure::Storage::Sas::BlobContainerSasPermissions::All); + builder.Protocol = Azure::Storage::Sas::SasProtocol::HttpsAndHttp; + return builder.GenerateSasToken(storage_shared_key_credential); + } + void UploadLines(const std::vector& lines, const std::string& path, int total_size) { ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {})); @@ -1536,29 +1608,7 @@ class TestAzureFileSystem : public ::testing::Test { void TestOpenOutputStreamCloseAsync() { #if defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND) - // This false positive leak is similar to the one pinpointed in the - // have_false_positive_memory_leak_with_generator() comments above, - // though the stack trace is different. It happens when a block list - // is committed from a background thread. - // - // clang-format off - // Direct leak of 968 byte(s) in 1 object(s) allocated from: - // #0 calloc - // #1 (/lib/x86_64-linux-gnu/libxml2.so.2+0xe25a4) - // #2 __xmlDefaultBufferSize - // #3 xmlBufferCreate - // #4 Azure::Storage::_internal::XmlWriter::XmlWriter() - // #5 Azure::Storage::Blobs::_detail::BlockBlobClient::CommitBlockList - // #6 Azure::Storage::Blobs::BlockBlobClient::CommitBlockList - // #7 arrow::fs::(anonymous namespace)::CommitBlockList - // #8 arrow::fs::(anonymous namespace)::ObjectAppendStream::FlushAsync()::'lambda' - // clang-format on - // - // TODO perhaps remove this skip once we can rely on - // https://github.com/Azure/azure-sdk-for-cpp/pull/5767 - // - // Also note that ClickHouse has a workaround for a similar issue: - // https://github.com/ClickHouse/ClickHouse/pull/45796 + // See comment about have_false_positive_memory_leak_with_generator above. if (options_.background_writes) { GTEST_SKIP() << "False positive memory leak in libxml2 with CloseAsync"; } @@ -1617,6 +1667,31 @@ class TestAzureFileSystem : public ::testing::Test { AssertObjectContents(fs.get(), path, payload); } + void TestSASCredential() { + auto data = SetUpPreexistingData(); + + ASSERT_OK_AND_ASSIGN(auto env, GetAzureEnv()); + ASSERT_OK_AND_ASSIGN(auto options, MakeOptions(env)); + ASSERT_OK_AND_ASSIGN( + auto sas_token, + GetContainerSASToken(data.container_name, + Azure::Storage::StorageSharedKeyCredential( + env->account_name(), env->account_key()))); + // AzureOptions::FromUri will not cut off extra query parameters that it consumes, so + // make sure these don't cause problems. + ARROW_EXPECT_OK(options.ConfigureSASCredential( + "?blob_storage_authority=dummy_value0&" + sas_token.substr(1) + + "&credential_kind=dummy-value1")); + EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); + + AssertFileInfo(fs.get(), data.ObjectPath(), FileType::File); + + // Test CopyFile because the most obvious implementation requires generating a SAS + // token at runtime which doesn't work when the original auth is SAS token. + ASSERT_OK(fs->CopyFile(data.ObjectPath(), data.ObjectPath() + "_copy")); + AssertFileInfo(fs.get(), data.ObjectPath() + "_copy", FileType::File); + } + private: using StringMatcher = ::testing::PolymorphicMatcher<::testing::internal::HasSubstrMatcher>; @@ -2328,6 +2403,10 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateContainerFromPath) { TYPED_TEST(TestAzureFileSystemOnAllScenarios, MovePath) { this->TestMovePath(); } +TYPED_TEST(TestAzureFileSystemOnAllScenarios, SASCredential) { + this->TestSASCredential(); +} + // Tests using Azurite (the local Azure emulator) TEST_F(TestAzuriteFileSystem, CheckIfHierarchicalNamespaceIsEnabledRuntimeError) { @@ -2634,6 +2713,17 @@ TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) { EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } +TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationDifferentContainer) { + auto data = SetUpPreexistingData(); + auto data2 = SetUpPreexistingData(); + const auto destination_path = data2.ContainerPath("copy-destionation"); + ASSERT_OK(fs()->CopyFile(data.ObjectPath(), destination_path)); + ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(destination_path)); + ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(info)); + ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); + EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); +} + TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationSame) { auto data = SetUpPreexistingData(); ASSERT_OK(fs()->CopyFile(data.ObjectPath(), data.ObjectPath())); diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index b5765010ec7e9..37619df90fc34 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -630,9 +630,12 @@ Status CopyFiles(const std::vector& sources, destinations.size(), " paths."); } - auto copy_one_file = [&](int i) { - if (sources[i].filesystem->Equals(destinations[i].filesystem)) { - return sources[i].filesystem->CopyFile(sources[i].path, destinations[i].path); + auto copy_one_file = [&](size_t i, + const FileLocator& source_file_locator) -> Result> { + if (source_file_locator.filesystem->Equals(destinations[i].filesystem)) { + RETURN_NOT_OK(source_file_locator.filesystem->CopyFile(source_file_locator.path, + destinations[i].path)); + return Future<>::MakeFinished(); } ARROW_ASSIGN_OR_RAISE(auto source, @@ -642,12 +645,31 @@ Status CopyFiles(const std::vector& sources, ARROW_ASSIGN_OR_RAISE(auto destination, destinations[i].filesystem->OpenOutputStream( destinations[i].path, metadata)); RETURN_NOT_OK(internal::CopyStream(source, destination, chunk_size, io_context)); - return destination->Close(); + // Using the blocking Close() here can cause reduced performance and deadlocks because + // FileSystem implementations that implement background_writes need to queue and wait + // for other IO thread(s). There is a risk that most or all the threads in the IO + // thread pool are blocking on a call Close(), leaving no IO threads left to actually + // fulfil the background writes. + return destination->CloseAsync(); }; - return ::arrow::internal::OptionalParallelFor( - use_threads, static_cast(sources.size()), std::move(copy_one_file), - io_context.executor()); + // Spawn copy_one_file less urgently than default, so that background_writes are done + // with higher priority. Otherwise copy_one_file will keep buffering more data in memory + // without giving the background_writes any chance to upload the data and drop it from + // memory. Therefore, without this large copies would cause OOMs. + TaskHints hints{10}; + auto future = ::arrow::internal::OptionalParallelForAsync( + use_threads, sources, std::move(copy_one_file), io_context.executor(), hints); + + // Wait for all the copy_one_file instances to complete. + ARROW_ASSIGN_OR_RAISE(auto copy_close_async_future, future.result()); + + // Wait for all the futures returned by copy_one_file to complete. When the destination + // filesystem uses background_writes this is when most of the upload happens. + for (const auto& result : copy_close_async_future) { + result.Wait(); + } + return Status::OK(); } Status CopyFiles(const std::shared_ptr& source_fs, diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc index a6c897636000e..efe7cff4958ab 100644 --- a/cpp/src/arrow/filesystem/test_util.cc +++ b/cpp/src/arrow/filesystem/test_util.cc @@ -578,6 +578,67 @@ void GenericFileSystemTest::TestCopyFile(FileSystem* fs) { AssertAllFiles(fs, {"AB/abc", "EF/ghi", "def"}); } +void GenericFileSystemTest::TestCopyFiles(FileSystem* fs) { +#if defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND) + if (have_false_positive_memory_leak_with_async_close()) { + GTEST_SKIP() << "Filesystem have false positive memory leak with generator"; + } +#endif + auto io_thread_pool = + static_cast(fs->io_context().executor()); + auto original_threads = io_thread_pool->GetCapacity(); + // Needs to be smaller than the number of files we test with to catch GH-15233 + ASSERT_OK(io_thread_pool->SetCapacity(2)); + // Ensure the thread pool capacity is set back to the original value after the test + auto reset_thread_pool = [io_thread_pool, original_threads](void*) { + ASSERT_OK(io_thread_pool->SetCapacity(original_threads)); + }; + std::unique_ptr reset_thread_pool_guard( + nullptr, reset_thread_pool); + + auto mock_fs = std::make_shared( + std::chrono::system_clock::now()); + std::vector dirs0{"0", "0/AB", "0/AB/CD"}; + std::map files0{ + {"0/123", "123 data"}, {"0/AB/abc", "abc data"}, {"0/AB/CD/def", "def data"}}; + + std::vector dirs0and1{"0", "0/AB", "0/AB/CD", "1", "1/AB", "1/AB/CD"}; + std::map files0and1{ + {"0/123", "123 data"}, {"0/AB/abc", "abc data"}, {"0/AB/CD/def", "def data"}, + {"1/123", "123 data"}, {"1/AB/abc", "abc data"}, {"1/AB/CD/def", "def data"}}; + + ASSERT_OK(mock_fs->CreateDir("0/AB/CD")); + for (const auto& kv : files0) { + CreateFile(mock_fs.get(), kv.first, kv.second); + } + + auto selector0 = arrow::fs::FileSelector{}; + selector0.base_dir = "0"; + selector0.recursive = true; + + ASSERT_OK(CopyFiles(mock_fs, selector0, fs->shared_from_this(), "0")); + AssertAllDirs(fs, dirs0); + for (const auto& kv : files0) { + AssertFileContents(fs, kv.first, kv.second); + } + + ASSERT_OK(CopyFiles(fs->shared_from_this(), selector0, fs->shared_from_this(), "1")); + AssertAllDirs(fs, dirs0and1); + for (const auto& kv : files0and1) { + AssertFileContents(fs, kv.first, kv.second); + } + + auto selector1 = arrow::fs::FileSelector{}; + selector1.base_dir = "1"; + selector1.recursive = true; + + ASSERT_OK(CopyFiles(fs->shared_from_this(), selector1, mock_fs, "1")); + AssertAllDirs(mock_fs.get(), dirs0and1); + for (const auto& kv : files0and1) { + AssertFileContents(mock_fs.get(), kv.first, kv.second); + } +} + void GenericFileSystemTest::TestGetFileInfo(FileSystem* fs) { ASSERT_OK(fs->CreateDir("AB/CD/EF")); CreateFile(fs, "AB/CD/ghi", "some data"); @@ -1212,6 +1273,7 @@ GENERIC_FS_TEST_DEFINE(TestDeleteFiles) GENERIC_FS_TEST_DEFINE(TestMoveFile) GENERIC_FS_TEST_DEFINE(TestMoveDir) GENERIC_FS_TEST_DEFINE(TestCopyFile) +GENERIC_FS_TEST_DEFINE(TestCopyFiles) GENERIC_FS_TEST_DEFINE(TestGetFileInfo) GENERIC_FS_TEST_DEFINE(TestGetFileInfoVector) GENERIC_FS_TEST_DEFINE(TestGetFileInfoSelector) diff --git a/cpp/src/arrow/filesystem/test_util.h b/cpp/src/arrow/filesystem/test_util.h index 04000c14e9c2a..3a643b7e9f08b 100644 --- a/cpp/src/arrow/filesystem/test_util.h +++ b/cpp/src/arrow/filesystem/test_util.h @@ -140,6 +140,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest { void TestMoveFile(); void TestMoveDir(); void TestCopyFile(); + void TestCopyFiles(); void TestGetFileInfo(); void TestGetFileInfoVector(); void TestGetFileInfoSelector(); @@ -189,6 +190,8 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest { virtual bool have_file_metadata() const { return false; } // - Whether the filesystem has a false positive memory leak with generator virtual bool have_false_positive_memory_leak_with_generator() const { return false; } + // - Whether the filesystem has a false positive memory leak in async close + virtual bool have_false_positive_memory_leak_with_async_close() const { return false; } void TestEmpty(FileSystem* fs); void TestNormalizePath(FileSystem* fs); @@ -201,6 +204,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest { void TestMoveFile(FileSystem* fs); void TestMoveDir(FileSystem* fs); void TestCopyFile(FileSystem* fs); + void TestCopyFiles(FileSystem* fs); void TestGetFileInfo(FileSystem* fs); void TestGetFileInfoVector(FileSystem* fs); void TestGetFileInfoSelector(FileSystem* fs); @@ -233,6 +237,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest { GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, MoveFile) \ GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, MoveDir) \ GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, CopyFile) \ + GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, CopyFiles) \ GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfo) \ GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoVector) \ GENERIC_FS_TEST_FUNCTION(TEST_MACRO, TEST_CLASS, GetFileInfoSelector) \ diff --git a/cpp/src/arrow/flight/sql/client.cc b/cpp/src/arrow/flight/sql/client.cc index 536bc67fc586d..fe087cc947de5 100644 --- a/cpp/src/arrow/flight/sql/client.cc +++ b/cpp/src/arrow/flight/sql/client.cc @@ -31,6 +31,7 @@ #include "arrow/ipc/reader.h" #include "arrow/result.h" #include "arrow/util/logging.h" +#include "arrow/util/macros.h" namespace flight_sql_pb = arrow::flight::protocol::sql; @@ -829,6 +830,8 @@ Status FlightSqlClient::Rollback(const FlightCallOptions& options, return results->Drain(); } +// ActionCancelQuery{Request,Result} are deprecated +ARROW_SUPPRESS_DEPRECATION_WARNING ::arrow::Result FlightSqlClient::CancelQuery( const FlightCallOptions& options, const FlightInfo& info) { flight_sql_pb::ActionCancelQueryRequest cancel_query; @@ -855,6 +858,7 @@ ::arrow::Result FlightSqlClient::CancelQuery( } return Status::IOError("Server returned unknown result ", result.result()); } +ARROW_UNSUPPRESS_DEPRECATION_WARNING Status FlightSqlClient::Close() { return impl_->Close(); } diff --git a/cpp/src/arrow/flight/sql/protocol_internal.cc b/cpp/src/arrow/flight/sql/protocol_internal.cc index 0d5e3c4c60b8d..984e78222336e 100644 --- a/cpp/src/arrow/flight/sql/protocol_internal.cc +++ b/cpp/src/arrow/flight/sql/protocol_internal.cc @@ -14,6 +14,10 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations +#include "arrow/util/macros.h" + +// GH-44954: silence [[deprecated]] declarations in protobuf-generated code +ARROW_SUPPRESS_DEPRECATION_WARNING #include "arrow/flight/sql/protocol_internal.h" // NOTE(lidavidm): Normally this is forbidden, but on Windows to get @@ -21,3 +25,4 @@ // ensure our header gets included (and Protobuf will not insert the // include for you) #include "arrow/flight/sql/FlightSql.pb.cc" // NOLINT +ARROW_UNSUPPRESS_DEPRECATION_WARNING diff --git a/cpp/src/arrow/flight/sql/protocol_internal.h b/cpp/src/arrow/flight/sql/protocol_internal.h index ce50ad2f61b1e..09bfe32582a89 100644 --- a/cpp/src/arrow/flight/sql/protocol_internal.h +++ b/cpp/src/arrow/flight/sql/protocol_internal.h @@ -18,9 +18,12 @@ // This addresses platform-specific defines, e.g. on Windows #include "arrow/flight/platform.h" // IWYU pragma: keep +#include "arrow/util/macros.h" // This header holds the Flight SQL definitions. #include "arrow/flight/sql/visibility.h" +ARROW_SUPPRESS_DEPRECATION_WARNING #include "arrow/flight/sql/FlightSql.pb.h" // IWYU pragma: export +ARROW_UNSUPPRESS_DEPRECATION_WARNING diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc index 5f6154a576b02..f68d884c6211b 100644 --- a/cpp/src/arrow/flight/sql/server.cc +++ b/cpp/src/arrow/flight/sql/server.cc @@ -31,6 +31,7 @@ #include "arrow/flight/sql/sql_info_internal.h" #include "arrow/type.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/macros.h" #define PROPERTY_TO_OPTIONAL(COMMAND, PROPERTY) \ COMMAND.has_##PROPERTY() ? std::make_optional(COMMAND.PROPERTY()) : std::nullopt @@ -337,6 +338,8 @@ arrow::Result ParseActionBeginTransactionRequest( return result; } +// ActionCancelQueryRequest is deprecated +ARROW_SUPPRESS_DEPRECATION_WARNING arrow::Result ParseActionCancelQueryRequest( const Action& action) { pb::sql::ActionCancelQueryRequest command; @@ -346,6 +349,7 @@ arrow::Result ParseActionCancelQueryRequest( ARROW_ASSIGN_OR_RAISE(result.info, FlightInfo::Deserialize(command.info())); return result; } +ARROW_UNSUPPRESS_DEPRECATION_WARNING arrow::Result ParseActionCreatePreparedStatementRequest(const Action& action) { @@ -468,6 +472,8 @@ arrow::Result PackActionResult(const FlightEndpoint& endpoint) { return endpoint.SerializeToBuffer(); } +// ActionCancelQueryResult is deprecated +ARROW_SUPPRESS_DEPRECATION_WARNING arrow::Result PackActionResult(CancelResult result) { pb::sql::ActionCancelQueryResult pb_result; switch (result) { @@ -487,6 +493,7 @@ arrow::Result PackActionResult(CancelResult result) { } return PackActionResult(pb_result); } +ARROW_UNSUPPRESS_DEPRECATION_WARNING arrow::Result PackActionResult(ActionCreatePreparedStatementResult result) { pb::sql::ActionCreatePreparedStatementResult pb_result; diff --git a/cpp/src/arrow/flight/transport/ucx/ucx.h b/cpp/src/arrow/flight/transport/ucx/ucx.h index dda2c83035c6d..7a12987c31e00 100644 --- a/cpp/src/arrow/flight/transport/ucx/ucx.h +++ b/cpp/src/arrow/flight/transport/ucx/ucx.h @@ -26,6 +26,8 @@ namespace flight { namespace transport { namespace ucx { +/// \deprecated Deprecated in 19.0.0. Flight UCX is deprecated. +ARROW_DEPRECATED(" Deprecated in 19.0.0. Flight UCX is deprecated.") ARROW_FLIGHT_EXPORT void InitializeFlightUcx(); diff --git a/cpp/src/arrow/io/interfaces.cc b/cpp/src/arrow/io/interfaces.cc index 1d35549cc4345..f6be60509c45e 100644 --- a/cpp/src/arrow/io/interfaces.cc +++ b/cpp/src/arrow/io/interfaces.cc @@ -68,8 +68,8 @@ Status SetIOThreadPoolCapacity(int threads) { FileInterface::~FileInterface() = default; Future<> FileInterface::CloseAsync() { - return DeferNotOk( - default_io_context().executor()->Submit([this]() { return Close(); })); + return DeferNotOk(default_io_context().executor()->Submit( + [self = shared_from_this()]() { return self->Close(); })); } Status FileInterface::Abort() { return Close(); } diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc index 7a45f0906639a..31312f1ac6948 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/ipc/json_simple_test.cc @@ -857,7 +857,8 @@ TEST(TestMap, StringToInteger) { ASSERT_OK_AND_ASSIGN(auto expected_keys, ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])")); ASSERT_OK_AND_ASSIGN(auto expected_values, ArrayFromJSON(int32(), "[0, null, 8]")); - ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, BytesToBits({1, 0, 1, 1})); + ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, + BytesToBits(std::vector({1, 0, 1, 1}))); auto expected = std::make_shared(type, 4, Buffer::Wrap(offsets), expected_keys, expected_values, expected_null_bitmap, 1); diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 88aa3f3f8a47a..8cb0f5625760f 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -41,7 +41,7 @@ #include "arrow/ipc/metadata_internal.h" #include "arrow/ipc/util.h" #include "arrow/record_batch.h" -#include "arrow/result_internal.h" +#include "arrow/result.h" #include "arrow/sparse_tensor.h" #include "arrow/status.h" #include "arrow/table.h" @@ -840,8 +840,8 @@ Status WriteRecordBatch(const RecordBatch& batch, int64_t buffer_start_offset, Status WriteRecordBatchStream(const std::vector>& batches, const IpcWriteOptions& options, io::OutputStream* dst) { - ASSIGN_OR_RAISE(std::shared_ptr writer, - MakeStreamWriter(dst, batches[0]->schema(), options)); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr writer, + MakeStreamWriter(dst, batches[0]->schema(), options)); for (const auto& batch : batches) { DCHECK(batch->schema()->Equals(*batches[0]->schema())) << "Schemas unequal"; RETURN_NOT_OK(writer->WriteRecordBatch(*batch)); diff --git a/cpp/src/arrow/result_internal.h b/cpp/src/arrow/result_internal.h deleted file mode 100644 index 134902e1b75ad..0000000000000 --- a/cpp/src/arrow/result_internal.h +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright 2017 Asylo authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#pragma once - -#include "arrow/result.h" - -#ifndef ASSIGN_OR_RAISE -# define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs) -#endif diff --git a/cpp/src/arrow/stl.h b/cpp/src/arrow/stl.h index b542ee5c34868..ae5462c661a8c 100644 --- a/cpp/src/arrow/stl.h +++ b/cpp/src/arrow/stl.h @@ -187,6 +187,35 @@ struct ConversionTraits> } }; +template +struct ConversionTraits> + : public CTypeTraits> { + static arrow::Status AppendRow(FixedSizeListBuilder& builder, + const std::array& values) { + auto vb = + ::arrow::internal::checked_cast::BuilderType*>( + builder.value_builder()); + ARROW_RETURN_NOT_OK(builder.Append()); + return vb->AppendValues(values.data(), N); + } + + static std::array GetEntry(const ::arrow::FixedSizeListArray& array, + size_t j) { + using ElementArrayType = typename TypeTraits< + typename stl::ConversionTraits::ArrowType>::ArrayType; + + const ElementArrayType& value_array = + ::arrow::internal::checked_cast(*array.values()); + + std::array arr; + for (size_t i = 0; i < N; i++) { + arr[i] = stl::ConversionTraits::GetEntry(value_array, + array.value_offset(j) + i); + } + return arr; + } +}; + template struct ConversionTraits> : public CTypeTraits())>::type> { diff --git a/cpp/src/arrow/stl_test.cc b/cpp/src/arrow/stl_test.cc index 48e6f8014c923..ce5adf0c0e268 100644 --- a/cpp/src/arrow/stl_test.cc +++ b/cpp/src/arrow/stl_test.cc @@ -245,6 +245,26 @@ TEST(TestTableFromTupleVector, ListType) { ASSERT_TRUE(expected_table->Equals(*table)); } +TEST(TestTableFromTupleVector, FixedSizeListType) { + using tuple_type = std::tuple>; + + auto expected_schema = std::make_shared( + FieldVector{field("column1", fixed_size_list(int64(), 4), false)}); + std::shared_ptr expected_array = + ArrayFromJSON(fixed_size_list(int64(), 4), "[[1, 1, 2, 34], [2, -4, 1, 1]]"); + std::shared_ptr expected_table = Table::Make(expected_schema, {expected_array}); + + std::vector rows{tuple_type(std::array{1, 1, 2, 34}), + tuple_type(std::array{2, -4, 1, 1})}; + std::vector names{"column1"}; + + std::shared_ptr
table; + ASSERT_OK(TableFromTupleRange(default_memory_pool(), rows, names, &table)); + ASSERT_OK(table->ValidateFull()); + + AssertTablesEqual(*expected_table, *table); +} + TEST(TestTableFromTupleVector, ReferenceTuple) { std::vector names{"column1", "column2", "column3", "column4", "column5", "column6", "column7", "column8", "column9", "column10"}; @@ -468,6 +488,26 @@ TEST(TestTupleVectorFromTable, ListType) { ASSERT_EQ(rows, expected_rows); } +TEST(TestTupleVectorFromTable, FixedSizeListType) { + using tuple_type = std::tuple>; + + compute::ExecContext ctx; + compute::CastOptions cast_options; + auto expected_schema = std::make_shared( + FieldVector{field("column1", fixed_size_list(int64(), 4), false)}); + std::shared_ptr expected_array = + ArrayFromJSON(fixed_size_list(int64(), 4), "[[1, 1, 2, 34], [2, -4, 1, 1]]"); + std::shared_ptr
table = Table::Make(expected_schema, {expected_array}); + ASSERT_OK(table->ValidateFull()); + + std::vector expected_rows{tuple_type(std::array{1, 1, 2, 34}), + tuple_type(std::array{2, -4, 1, 1})}; + + std::vector rows(2); + ASSERT_OK(TupleRangeFromTable(*table, cast_options, &ctx, &rows)); + ASSERT_EQ(rows, expected_rows); +} + TEST(TestTupleVectorFromTable, CastingNeeded) { using tuple_type = std::tuple>; diff --git a/cpp/src/arrow/testing/gtest_util_test.cc b/cpp/src/arrow/testing/gtest_util_test.cc index 9b4514197d776..daf071c2b36f1 100644 --- a/cpp/src/arrow/testing/gtest_util_test.cc +++ b/cpp/src/arrow/testing/gtest_util_test.cc @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +#include + +#include #include #include "arrow/array.h" @@ -23,6 +26,7 @@ #include "arrow/record_batch.h" #include "arrow/tensor.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/math.h" #include "arrow/testing/random.h" #include "arrow/type.h" #include "arrow/type_traits.h" @@ -171,4 +175,110 @@ TEST_F(TestTensorFromJSON, FromJSON) { EXPECT_TRUE(tensor_expected->Equals(*result)); } +template +void CheckWithinUlpSingle(Float x, Float y, int n_ulp) { + ARROW_SCOPED_TRACE("x = ", x, ", y = ", y, ", n_ulp = ", n_ulp); + ASSERT_TRUE(WithinUlp(x, y, n_ulp)); +} + +template +void CheckNotWithinUlpSingle(Float x, Float y, int n_ulp) { + ARROW_SCOPED_TRACE("x = ", x, ", y = ", y, ", n_ulp = ", n_ulp); + ASSERT_FALSE(WithinUlp(x, y, n_ulp)); +} + +template +void CheckWithinUlp(Float x, Float y, int n_ulp) { + CheckWithinUlpSingle(x, y, n_ulp); + CheckWithinUlpSingle(y, x, n_ulp); + CheckWithinUlpSingle(x, y, n_ulp + 1); + CheckWithinUlpSingle(y, x, n_ulp + 1); + CheckWithinUlpSingle(-x, -y, n_ulp); + CheckWithinUlpSingle(-y, -x, n_ulp); + + for (int exp : {1, -1, 10, -10}) { + Float x_scaled = std::ldexp(x, exp); + Float y_scaled = std::ldexp(y, exp); + CheckWithinUlpSingle(x_scaled, y_scaled, n_ulp); + CheckWithinUlpSingle(y_scaled, x_scaled, n_ulp); + } +} + +template +void CheckNotWithinUlp(Float x, Float y, int n_ulp) { + CheckNotWithinUlpSingle(x, y, n_ulp); + CheckNotWithinUlpSingle(y, x, n_ulp); + CheckNotWithinUlpSingle(-x, -y, n_ulp); + CheckNotWithinUlpSingle(-y, -x, n_ulp); + if (n_ulp > 1) { + CheckNotWithinUlpSingle(x, y, n_ulp - 1); + CheckNotWithinUlpSingle(y, x, n_ulp - 1); + CheckNotWithinUlpSingle(-x, -y, n_ulp - 1); + CheckNotWithinUlpSingle(-y, -x, n_ulp - 1); + } + + for (int exp : {1, -1, 10, -10}) { + Float x_scaled = std::ldexp(x, exp); + Float y_scaled = std::ldexp(y, exp); + CheckNotWithinUlpSingle(x_scaled, y_scaled, n_ulp); + CheckNotWithinUlpSingle(y_scaled, x_scaled, n_ulp); + } +} + +TEST(TestWithinUlp, Double) { + for (double f : {0.0, 1e-20, 1.0, 2345678.9}) { + CheckWithinUlp(f, f, 1); + CheckWithinUlp(f, f, 42); + } + CheckWithinUlp(-0.0, 0.0, 1); + CheckWithinUlp(1.0, 1.0000000000000002, 1); + CheckWithinUlp(1.0, 1.0000000000000007, 3); + CheckNotWithinUlp(1.0, 1.0000000000000007, 2); + CheckNotWithinUlp(1.0, 1.0000000000000007, 1); + // left and right have a different exponent but are still very close + CheckWithinUlp(1.0, 0.9999999999999999, 1); + CheckWithinUlp(1.0, 0.9999999999999988, 11); + CheckNotWithinUlp(1.0, 0.9999999999999988, 10); + + CheckWithinUlp(123.4567, 123.45670000000015, 11); + CheckNotWithinUlp(123.4567, 123.45670000000015, 10); + + CheckNotWithinUlp(HUGE_VAL, -HUGE_VAL, 10); + CheckNotWithinUlp(12.34, -HUGE_VAL, 10); + CheckNotWithinUlp(12.34, std::nan(""), 10); + CheckNotWithinUlp(12.34, -12.34, 10); + CheckNotWithinUlp(0.0, 1e-20, 10); +} + +TEST(TestWithinUlp, Float) { + for (float f : {0.0f, 1e-8f, 1.0f, 123.456f}) { + CheckWithinUlp(f, f, 1); + CheckWithinUlp(f, f, 42); + } + CheckWithinUlp(-0.0f, 0.0f, 1); + CheckWithinUlp(1.0f, 1.0000001f, 1); + CheckWithinUlp(1.0f, 1.0000013f, 11); + CheckNotWithinUlp(1.0f, 1.0000013f, 10); + // left and right have a different exponent but are still very close + CheckWithinUlp(1.0f, 0.99999994f, 1); + CheckWithinUlp(1.0f, 0.99999934f, 11); + CheckNotWithinUlp(1.0f, 0.99999934f, 10); + + CheckWithinUlp(123.456f, 123.456085f, 11); + CheckNotWithinUlp(123.456f, 123.456085f, 10); + + CheckNotWithinUlp(HUGE_VALF, -HUGE_VALF, 10); + CheckNotWithinUlp(12.34f, -HUGE_VALF, 10); + CheckNotWithinUlp(12.34f, std::nanf(""), 10); + CheckNotWithinUlp(12.34f, -12.34f, 10); +} + +TEST(AssertTestWithinUlp, Basics) { + AssertWithinUlp(123.4567, 123.45670000000015, 11); + AssertWithinUlp(123.456f, 123.456085f, 11); + EXPECT_FATAL_FAILURE(AssertWithinUlp(123.4567, 123.45670000000015, 10), + "not within 10 ulps"); + EXPECT_FATAL_FAILURE(AssertWithinUlp(123.456f, 123.456085f, 10), "not within 10 ulps"); +} + } // namespace arrow diff --git a/cpp/src/arrow/testing/math.cc b/cpp/src/arrow/testing/math.cc new file mode 100644 index 0000000000000..2cb2fcb2a9c15 --- /dev/null +++ b/cpp/src/arrow/testing/math.cc @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/testing/math.h" + +#include +#include + +#include + +#include "arrow/util/logging.h" + +namespace arrow { +namespace { + +template +bool WithinUlpOneWay(Float left, Float right, int n_ulps) { + // The delta between 1.0 and the FP value immediately before it. + // We're using this value because `frexp` returns a mantissa between 0.5 and 1.0. + static const Float kOneUlp = Float(1.0) - std::nextafter(Float(1.0), Float(0.0)); + + DCHECK_GE(n_ulps, 1); + + if (left == 0) { + return left == right; + } + if (left < 0) { + left = -left; + right = -right; + } + + int left_exp; + Float left_mant = std::frexp(left, &left_exp); + Float delta = static_cast(n_ulps) * kOneUlp; + Float lower_bound = std::ldexp(left_mant - delta, left_exp); + Float upper_bound = std::ldexp(left_mant + delta, left_exp); + return right >= lower_bound && right <= upper_bound; +} + +template +bool WithinUlpGeneric(Float left, Float right, int n_ulps) { + if (!std::isfinite(left) || !std::isfinite(right)) { + return left == right; + } + return (std::abs(left) <= std::abs(right)) ? WithinUlpOneWay(left, right, n_ulps) + : WithinUlpOneWay(right, left, n_ulps); +} + +template +void AssertWithinUlpGeneric(Float left, Float right, int n_ulps) { + if (!WithinUlpGeneric(left, right, n_ulps)) { + FAIL() << left << " and " << right << " are not within " << n_ulps << " ulps"; + } +} + +} // namespace + +bool WithinUlp(float left, float right, int n_ulps) { + return WithinUlpGeneric(left, right, n_ulps); +} + +bool WithinUlp(double left, double right, int n_ulps) { + return WithinUlpGeneric(left, right, n_ulps); +} + +void AssertWithinUlp(float left, float right, int n_ulps) { + AssertWithinUlpGeneric(left, right, n_ulps); +} + +void AssertWithinUlp(double left, double right, int n_ulps) { + AssertWithinUlpGeneric(left, right, n_ulps); +} + +} // namespace arrow diff --git a/java/gandiva/src/main/cpp/env_helper.h b/cpp/src/arrow/testing/math.h similarity index 67% rename from java/gandiva/src/main/cpp/env_helper.h rename to cpp/src/arrow/testing/math.h index 5ae13c8071019..6aa3eac85056a 100644 --- a/java/gandiva/src/main/cpp/env_helper.h +++ b/cpp/src/arrow/testing/math.h @@ -17,7 +17,18 @@ #pragma once -#include +#include "arrow/testing/visibility.h" -// class references -extern jclass configuration_builder_class_; +namespace arrow { + +ARROW_TESTING_EXPORT +bool WithinUlp(float left, float right, int n_ulps); +ARROW_TESTING_EXPORT +bool WithinUlp(double left, double right, int n_ulps); + +ARROW_TESTING_EXPORT +void AssertWithinUlp(float left, float right, int n_ulps); +ARROW_TESTING_EXPORT +void AssertWithinUlp(double left, double right, int n_ulps); + +} // namespace arrow diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc index 133768ff015e6..57df0196c117f 100644 --- a/cpp/src/arrow/testing/process.cc +++ b/cpp/src/arrow/testing/process.cc @@ -85,9 +85,14 @@ # include # ifdef BOOST_PROCESS_USE_V2 -namespace asio = BOOST_PROCESS_V2_ASIO_NAMESPACE; namespace process = BOOST_PROCESS_V2_NAMESPACE; namespace filesystem = process::filesystem; +// For Boost < 1.87.0 +# ifdef BOOST_PROCESS_V2_ASIO_NAMESPACE +namespace asio = BOOST_PROCESS_V2_ASIO_NAMESPACE; +# else +namespace asio = process::net; +# endif # elif defined(BOOST_PROCESS_HAVE_V1) namespace process = boost::process::v1; namespace filesystem = boost::process::v1::filesystem; diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 6da05bd8f1435..92009c8560c4e 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -540,6 +540,16 @@ struct CTypeTraits> : public TypeTraits { } }; +/// \addtogroup c-type-traits +template +struct CTypeTraits> : public TypeTraits { + using ArrowType = FixedSizeListType; + + static auto type_singleton() { + return fixed_size_list(CTypeTraits::type_singleton(), N); + } +}; + /// \addtogroup type-traits /// @{ template <> diff --git a/cpp/src/arrow/util/aligned_storage.h b/cpp/src/arrow/util/aligned_storage.h index 01e3ced2d1f61..588806507039c 100644 --- a/cpp/src/arrow/util/aligned_storage.h +++ b/cpp/src/arrow/util/aligned_storage.h @@ -119,26 +119,7 @@ class AlignedStorage { } private: -#if !defined(__clang__) && defined(__GNUC__) && defined(__i386__) - // Workaround for GCC bug on i386: - // alignof(int64 | float64) can give different results depending on the - // compilation context, leading to internal ABI mismatch manifesting - // in incorrect propagation of Result between - // compilation units. - // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88115) - static constexpr size_t alignment() { - if (std::is_integral_v && sizeof(T) == 8) { - return 4; - } else if (std::is_floating_point_v && sizeof(T) == 8) { - return 4; - } - return alignof(T); - } - - typename std::aligned_storage::type data_; -#else - typename std::aligned_storage::type data_; -#endif + alignas(T) std::byte data_[sizeof(T)]; }; } // namespace internal diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h index 9c1f2e479c712..b5404bb7bc6d5 100644 --- a/cpp/src/arrow/util/basic_decimal.h +++ b/cpp/src/arrow/util/basic_decimal.h @@ -739,6 +739,16 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal(value.high_bits()), SignExtend(value.high_bits()), SignExtend(value.high_bits())})) {} + explicit BasicDecimal256(const BasicDecimal64& value) noexcept + : BasicDecimal256(bit_util::little_endian::ToNative( + {value.low_bits(), SignExtend(value.value()), SignExtend(value.value()), + SignExtend(value.value())})) {} + + explicit BasicDecimal256(const BasicDecimal32& value) noexcept + : BasicDecimal256(bit_util::little_endian::ToNative( + {value.low_bits(), SignExtend(value.value()), SignExtend(value.value()), + SignExtend(value.value())})) {} + /// \brief Negate the current value (in-place) BasicDecimal256& Negate(); diff --git a/cpp/src/arrow/util/bitmap_builders.cc b/cpp/src/arrow/util/bitmap_builders.cc index c5cf3d2bc72b5..000dda718d0da 100644 --- a/cpp/src/arrow/util/bitmap_builders.cc +++ b/cpp/src/arrow/util/bitmap_builders.cc @@ -33,7 +33,7 @@ namespace internal { namespace { -void FillBitsFromBytes(const std::vector& bytes, uint8_t* bits) { +void FillBitsFromBytes(util::span bytes, uint8_t* bits) { for (size_t i = 0; i < bytes.size(); ++i) { if (bytes[i] > 0) { bit_util::SetBit(bits, i); @@ -43,7 +43,7 @@ void FillBitsFromBytes(const std::vector& bytes, uint8_t* bits) { } // namespace -Result> BytesToBits(const std::vector& bytes, +Result> BytesToBits(util::span bytes, MemoryPool* pool) { int64_t bit_length = bit_util::BytesForBits(bytes.size()); diff --git a/cpp/src/arrow/util/bitmap_builders.h b/cpp/src/arrow/util/bitmap_builders.h index 5bd2ad4414083..4bf2edfdcbd69 100644 --- a/cpp/src/arrow/util/bitmap_builders.h +++ b/cpp/src/arrow/util/bitmap_builders.h @@ -23,6 +23,7 @@ #include "arrow/result.h" #include "arrow/type_fwd.h" +#include "arrow/util/span.h" #include "arrow/util/visibility.h" namespace arrow { @@ -36,7 +37,7 @@ Result> BitmapAllButOne(MemoryPool* pool, int64_t length /// \brief Convert vector of bytes to bitmap buffer ARROW_EXPORT -Result> BytesToBits(const std::vector&, +Result> BytesToBits(util::span bytes, MemoryPool* pool = default_memory_pool()); } // namespace internal diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h index 4ead1a7283d81..52525a83aa2ea 100644 --- a/cpp/src/arrow/util/hashing.h +++ b/cpp/src/arrow/util/hashing.h @@ -843,6 +843,14 @@ class BinaryMemoTable : public MemoTable { } } + // Visit the stored value at a specific index in insertion order. + // The visitor function should have the signature `void(std::string_view)` + // or `void(const std::string_view&)`. + template + void VisitValue(int32_t idx, VisitFunc&& visit) const { + visit(binary_builder_.GetView(idx)); + } + protected: struct Payload { int32_t memo_index; diff --git a/cpp/src/arrow/util/parallel.h b/cpp/src/arrow/util/parallel.h index 80f60fbdb3676..ae48a606e366f 100644 --- a/cpp/src/arrow/util/parallel.h +++ b/cpp/src/arrow/util/parallel.h @@ -48,12 +48,13 @@ Status ParallelFor(int num_tasks, FUNCTION&& func, template ::ValueType> -Future> ParallelForAsync( - std::vector inputs, FUNCTION&& func, - Executor* executor = internal::GetCpuThreadPool()) { +Future> ParallelForAsync(std::vector inputs, FUNCTION&& func, + Executor* executor = internal::GetCpuThreadPool(), + TaskHints hints = TaskHints{}) { std::vector> futures(inputs.size()); for (size_t i = 0; i < inputs.size(); ++i) { - ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i, std::move(inputs[i]))); + ARROW_ASSIGN_OR_RAISE(futures[i], + executor->Submit(hints, func, i, std::move(inputs[i]))); } return All(std::move(futures)) .Then([](const std::vector>& results) -> Result> { @@ -86,9 +87,10 @@ template ::ValueType> Future> OptionalParallelForAsync( bool use_threads, std::vector inputs, FUNCTION&& func, - Executor* executor = internal::GetCpuThreadPool()) { + Executor* executor = internal::GetCpuThreadPool(), TaskHints hints = TaskHints{}) { if (use_threads) { - return ParallelForAsync(std::move(inputs), std::forward(func), executor); + return ParallelForAsync(std::move(inputs), std::forward(func), executor, + hints); } else { std::vector result(inputs.size()); for (size_t i = 0; i < inputs.size(); ++i) { diff --git a/cpp/src/arrow/util/span.h b/cpp/src/arrow/util/span.h index 71cf9ed44890a..8a84d028b2ad8 100644 --- a/cpp/src/arrow/util/span.h +++ b/cpp/src/arrow/util/span.h @@ -28,28 +28,6 @@ namespace arrow::util { template class span; -// This trait is used to check if a type R can be used to construct a span. -// Specifically, it checks if std::data(R) and std::size(R) are valid expressions -// that may be passed to the span(T*, size_t) constructor. The reason this trait -// is needed rather than expressing this directly in the relevant span constructor -// is that this check requires instantiating span, which would violate the -// C++ standard if written directly in the constructor's enable_if clause -// because span is an incomplete type at that point. By defining this trait -// instead, we add an extra level of indirection that lets us delay the -// evaluation of the template until the first time the associated constructor -// is actually called, at which point span is a complete type. -// -// Note that most compilers do support the noncompliant construct, but nvcc -// does not. See https://github.com/apache/arrow/issues/40252 -template -struct ConstructibleFromDataAndSize : std::false_type {}; - -template -struct ConstructibleFromDataAndSize< - span, R, - std::void_t{std::data(std::declval()), - std::size(std::declval())})>> : std::true_type {}; - /// std::span polyfill. /// /// Does not support static extents. @@ -81,14 +59,12 @@ writing code which would break when it is replaced by std::span.)"); constexpr span(T* begin, T* end) : data_{begin}, size_{static_cast(end - begin)} {} - template < - typename R, - std::enable_if_t, R>::value, bool> = true, - typename DisableUnlessSimilarTypes = std::enable_if_t()))>>, - std::decay_t>>> + template ())), + typename RS = decltype(std::size(std::declval())), + typename E = std::enable_if_t && + std::is_constructible_v>> // NOLINTNEXTLINE runtime/explicit, non-const reference - constexpr span(R&& range) : span{std::data(range), std::size(range)} {} + constexpr span(R&& range) : data_{std::data(range)}, size_{std::size(range)} {} constexpr T* begin() const { return data_; } constexpr T* end() const { return data_ + size_; } diff --git a/cpp/src/arrow/util/string_builder.h b/cpp/src/arrow/util/string_builder.h index 7c05ccd51f7fd..448fb57d7a79a 100644 --- a/cpp/src/arrow/util/string_builder.h +++ b/cpp/src/arrow/util/string_builder.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "arrow/util/visibility.h" @@ -46,7 +47,12 @@ class ARROW_EXPORT StringStreamWrapper { template void StringBuilderRecursive(std::ostream& stream, Head&& head) { - stream << head; + if constexpr (std::is_floating_point_v>) { + // Avoid losing precision when printing floating point numbers + stream << std::to_string(head); + } else { + stream << head; + } } template diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc index 8aa6d548893de..faef51307e5d2 100644 --- a/cpp/src/arrow/util/thread_pool.cc +++ b/cpp/src/arrow/util/thread_pool.cc @@ -52,10 +52,28 @@ struct Task { Executor::StopCallback stop_callback; }; +struct QueuedTask { + Task task; + int32_t priority; + uint64_t spawn_index; + + // Implement comparison so that std::priority_queue will pop the low priorities more + // urgently. + bool operator<(const QueuedTask& other) const { + if (priority == other.priority) { + // Maintain execution order for tasks with the same priority. Its preferable to keep + // the execution order of tasks deterministic. + return spawn_index > other.spawn_index; + } + return priority > other.priority; + } +}; + } // namespace struct SerialExecutor::State { - std::deque task_queue; + std::priority_queue task_queue; + uint64_t spawned_tasks_count_ = 0; std::mutex mutex; std::condition_variable wait_for_tasks; std::thread::id current_thread; @@ -153,8 +171,9 @@ Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce task, "Attempt to schedule a task on a serial executor that has already finished or " "been abandoned"); } - state->task_queue.push_back( - Task{std::move(task), std::move(stop_token), std::move(stop_callback)}); + state->task_queue.push(QueuedTask{std::move(task), std::move(stop_token), + std::move(stop_callback), hints.priority, + state_->spawned_tasks_count_++}); } state->wait_for_tasks.notify_one(); return Status::OK(); @@ -189,8 +208,9 @@ Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce task, "been abandoned"); } - state_->task_queue.push_back( - Task{std::move(task), std::move(stop_token), std::move(stop_callback)}); + state_->task_queue.push(QueuedTask{std::move(task), std::move(stop_token), + std::move(stop_callback), hints.priority, + state_->spawned_tasks_count_++}); return Status::OK(); } @@ -245,8 +265,8 @@ void SerialExecutor::RunLoop() { // because sometimes we will pause even with work leftover when processing // an async generator while (!state_->paused && !state_->task_queue.empty()) { - Task task = std::move(state_->task_queue.front()); - state_->task_queue.pop_front(); + Task task = std::move(const_cast(state_->task_queue.top().task)); + state_->task_queue.pop(); lk.unlock(); if (!task.stop_token.IsStopRequested()) { std::move(task.callable)(); @@ -309,8 +329,8 @@ bool SerialExecutor::RunTasksOnAllExecutors() { if (exe->state_->paused == false && exe->state_->task_queue.empty() == false) { SerialExecutor* old_exe = globalState->current_executor; globalState->current_executor = exe; - Task task = std::move(exe->state_->task_queue.front()); - exe->state_->task_queue.pop_front(); + Task task = std::move(const_cast(exe->state_->task_queue.top().task)); + exe->state_->task_queue.pop(); run_task = true; exe->state_->tasks_running += 1; if (!task.stop_token.IsStopRequested()) { @@ -344,8 +364,8 @@ void SerialExecutor::RunLoop() { // we can't run any more until something else drops off the queue if (state_->tasks_running <= state_->max_tasks_running) { while (!state_->paused && !state_->task_queue.empty()) { - Task task = std::move(state_->task_queue.front()); - state_->task_queue.pop_front(); + Task task = std::move(const_cast(state_->task_queue.top().task)); + state_->task_queue.pop(); auto last_executor = globalState->current_executor; globalState->current_executor = this; state_->tasks_running += 1; @@ -386,7 +406,8 @@ struct ThreadPool::State { std::list workers_; // Trashcan for finished threads std::vector finished_workers_; - std::deque pending_tasks_; + std::priority_queue pending_tasks_; + uint64_t spawned_tasks_count_ = 0; // Desired number of threads int desired_capacity_ = 0; @@ -449,8 +470,8 @@ static void WorkerLoop(std::shared_ptr state, DCHECK_GE(state->tasks_queued_or_running_, 0); { - Task task = std::move(state->pending_tasks_.front()); - state->pending_tasks_.pop_front(); + Task task = std::move(const_cast(state->pending_tasks_.top().task)); + state->pending_tasks_.pop(); StopToken* stop_token = &task.stop_token; lock.unlock(); if (!stop_token->IsStopRequested()) { @@ -592,7 +613,8 @@ Status ThreadPool::Shutdown(bool wait) { if (!state_->quick_shutdown_) { DCHECK_EQ(state_->pending_tasks_.size(), 0); } else { - state_->pending_tasks_.clear(); + std::priority_queue empty; + std::swap(state_->pending_tasks_, empty); } CollectFinishedWorkersUnlocked(); return Status::OK(); @@ -653,8 +675,10 @@ Status ThreadPool::SpawnReal(TaskHints hints, FnOnce task, StopToken sto // We can still spin up more workers so spin up a new worker LaunchWorkersUnlocked(/*threads=*/1); } - state_->pending_tasks_.push_back( - {std::move(task), std::move(stop_token), std::move(stop_callback)}); + state_->pending_tasks_.push( + QueuedTask{{std::move(task), std::move(stop_token), std::move(stop_callback)}, + hints.priority, + state_->spawned_tasks_count_++}); } state_->cv_.notify_one(); return Status::OK(); @@ -737,7 +761,8 @@ Status ThreadPool::Shutdown(bool wait) { } else { // clear any pending tasks so that we behave // the same as threadpool on fast shutdown - state_->task_queue.clear(); + std::priority_queue empty; + std::swap(state_->task_queue, empty); } return Status::OK(); } @@ -777,7 +802,8 @@ Result> ThreadPool::MakeEternal(int threads) { ThreadPool::~ThreadPool() { // clear threadpool, otherwise ~SerialExecutor will // run any tasks left (which isn't threadpool behaviour) - state_->task_queue.clear(); + std::priority_queue empty; + std::swap(state_->task_queue, empty); } #endif // ARROW_ENABLE_THREADING diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc index 7cf8826e8a173..2c83146030243 100644 --- a/cpp/src/arrow/util/thread_pool_test.cc +++ b/cpp/src/arrow/util/thread_pool_test.cc @@ -21,6 +21,7 @@ #endif #include +#include #include #include #include @@ -578,6 +579,62 @@ TEST_F(TestThreadPool, Spawn) { SpawnAdds(pool.get(), 7, task_add); } +TEST_F(TestThreadPool, TasksRunInPriorityOrder) { + auto pool = this->MakeThreadPool(1); + constexpr int kNumTasks = 10; + auto recorded_times = std::vector(kNumTasks); + auto futures = std::vector>(kNumTasks); + std::mutex mutex; + + auto wait_task = [&mutex] { std::unique_lock lock(mutex); }; + { + std::unique_lock lock(mutex); + // Spawn wait_task to block the pool while we add the other tasks. This + // ensures all the tasks are queued before any of them start running, so that + // their running order is fully determined by their priority. + ASSERT_OK(pool->Spawn(wait_task)); + + for (int i = 0; i < kNumTasks; ++i) { + auto record_time = [&recorded_times, i]() { + recorded_times[i] = std::chrono::steady_clock::now(); + return i; + }; + // Spawn tasks in opposite order to urgency. + ASSERT_OK_AND_ASSIGN(futures[i], + pool->Submit(TaskHints{kNumTasks - i}, record_time)); + } + } + + ASSERT_OK(pool->Shutdown()); + + for (size_t i = 1; i < kNumTasks; ++i) { + ASSERT_GE(recorded_times[i - 1], recorded_times[i]); + ASSERT_LT(futures[i - 1].result().ValueOrDie(), futures[i].result().ValueOrDie()); + } +} + +TEST_F(TestThreadPool, TasksOfEqualPriorityRunInSpawnOrder) { + auto pool = this->MakeThreadPool(1); + constexpr int kNumTasks = 10; + auto recorded_times = std::vector(kNumTasks); + auto futures = std::vector>(kNumTasks); + + for (int i = 0; i < kNumTasks; ++i) { + auto record_time = [&recorded_times, i]() { + recorded_times[i] = std::chrono::steady_clock::now(); + return i; + }; + ASSERT_OK_AND_ASSIGN(futures[i], pool->Submit(record_time)); + } + + ASSERT_OK(pool->Shutdown()); + + for (size_t i = 1; i < kNumTasks; ++i) { + ASSERT_LE(recorded_times[i - 1], recorded_times[i]); + ASSERT_LT(futures[i - 1].result().ValueOrDie(), futures[i].result().ValueOrDie()); + } +} + TEST_F(TestThreadPool, StressSpawn) { auto pool = this->MakeThreadPool(30); SpawnAdds(pool.get(), 1000, task_add); diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 9c28b749e4319..0a9f92cebbbc4 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -181,6 +181,7 @@ set(PARQUET_SRCS printer.cc properties.cc schema.cc + size_statistics.cc statistics.cc stream_reader.cc stream_writer.cc @@ -373,6 +374,7 @@ add_parquet_test(internals-test metadata_test.cc page_index_test.cc public_api_test.cc + size_statistics_test.cc types_test.cc) set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index f8e639176aba3..856c032c3588a 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -3349,6 +3349,27 @@ TEST(TestArrowWrite, CheckChunkSize) { WriteTable(*table, ::arrow::default_memory_pool(), sink, chunk_size)); } +void CheckWritingNonNullableColumnWithNulls(std::shared_ptr<::arrow::Field> field, + std::string json_batch) { + ARROW_SCOPED_TRACE("field = ", field, ", json_batch = ", json_batch); + auto schema = ::arrow::schema({field}); + auto table = ::arrow::TableFromJSON(schema, {json_batch}); + auto sink = CreateOutputStream(); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::HasSubstr("is declared non-nullable but contains nulls"), + WriteTable(*table, ::arrow::default_memory_pool(), sink)); +} + +TEST(TestArrowWrite, InvalidSchema) { + // GH-41667: nulls in non-nullable column + CheckWritingNonNullableColumnWithNulls( + ::arrow::field("a", ::arrow::int32(), /*nullable=*/false), + R"([{"a": 456}, {"a": null}])"); + CheckWritingNonNullableColumnWithNulls( + ::arrow::field("a", ::arrow::utf8(), /*nullable=*/false), + R"([{"a": "foo"}, {"a": null}])"); +} + void DoNestedValidate(const std::shared_ptr<::arrow::DataType>& inner_type, const std::shared_ptr<::arrow::Field>& outer_field, const std::shared_ptr& buffer, diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 0ee595508fec4..c19e2b9e48bb3 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -25,7 +25,7 @@ #include "arrow/extension_type.h" #include "arrow/io/memory.h" #include "arrow/ipc/api.h" -#include "arrow/result_internal.h" +#include "arrow/result.h" #include "arrow/type.h" #include "arrow/util/base64.h" #include "arrow/util/checked_cast.h" @@ -484,8 +484,8 @@ bool IsDictionaryReadSupported(const ArrowType& type) { ::arrow::Result> GetTypeForNode( int column_index, const schema::PrimitiveNode& primitive_node, SchemaTreeContext* ctx) { - ASSIGN_OR_RAISE(std::shared_ptr storage_type, - GetArrowType(primitive_node, ctx->properties)); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr storage_type, + GetArrowType(primitive_node, ctx->properties)); if (ctx->properties.read_dictionary(column_index) && IsDictionaryReadSupported(*storage_type)) { return ::arrow::dictionary(::arrow::int32(), storage_type); @@ -723,8 +723,8 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels, // yields list ?nullable const auto& primitive_node = static_cast(list_node); int column_index = ctx->schema->GetColumnIndex(primitive_node); - ASSIGN_OR_RAISE(std::shared_ptr type, - GetTypeForNode(column_index, primitive_node, ctx)); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr type, + GetTypeForNode(column_index, primitive_node, ctx)); auto item_field = ::arrow::field(list_node.name(), type, /*nullable=*/false, FieldIdMetadata(list_node.field_id())); RETURN_NOT_OK( @@ -799,8 +799,8 @@ Status NodeToSchemaField(const Node& node, LevelInfo current_levels, // repeated $TYPE $FIELD_NAME const auto& primitive_node = static_cast(node); int column_index = ctx->schema->GetColumnIndex(primitive_node); - ASSIGN_OR_RAISE(std::shared_ptr type, - GetTypeForNode(column_index, primitive_node, ctx)); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr type, + GetTypeForNode(column_index, primitive_node, ctx)); if (node.is_repeated()) { // One-level list encoding, e.g. // a: repeated int32; diff --git a/cpp/src/parquet/column_page.h b/cpp/src/parquet/column_page.h index b389ffd98e6c7..111265a842ee7 100644 --- a/cpp/src/parquet/column_page.h +++ b/cpp/src/parquet/column_page.h @@ -26,6 +26,7 @@ #include #include +#include "parquet/size_statistics.h" #include "parquet/statistics.h" #include "parquet/types.h" @@ -69,20 +70,22 @@ class DataPage : public Page { /// Currently it is only present from data pages created by ColumnWriter in order /// to collect page index. std::optional first_row_index() const { return first_row_index_; } + const SizeStatistics& size_statistics() const { return size_statistics_; } virtual ~DataPage() = default; protected: DataPage(PageType::type type, const std::shared_ptr& buffer, int32_t num_values, Encoding::type encoding, int64_t uncompressed_size, - EncodedStatistics statistics = EncodedStatistics(), - std::optional first_row_index = std::nullopt) + EncodedStatistics statistics, std::optional first_row_index, + SizeStatistics size_statistics) : Page(buffer, type), num_values_(num_values), encoding_(encoding), uncompressed_size_(uncompressed_size), statistics_(std::move(statistics)), - first_row_index_(std::move(first_row_index)) {} + first_row_index_(std::move(first_row_index)), + size_statistics_(std::move(size_statistics)) {} int32_t num_values_; Encoding::type encoding_; @@ -90,6 +93,7 @@ class DataPage : public Page { EncodedStatistics statistics_; /// Row ordinal within the row group to the first row in the data page. std::optional first_row_index_; + SizeStatistics size_statistics_; }; class DataPageV1 : public DataPage { @@ -98,9 +102,11 @@ class DataPageV1 : public DataPage { Encoding::type encoding, Encoding::type definition_level_encoding, Encoding::type repetition_level_encoding, int64_t uncompressed_size, EncodedStatistics statistics = EncodedStatistics(), - std::optional first_row_index = std::nullopt) + std::optional first_row_index = std::nullopt, + SizeStatistics size_statistics = SizeStatistics()) : DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size, - std::move(statistics), std::move(first_row_index)), + std::move(statistics), std::move(first_row_index), + std::move(size_statistics)), definition_level_encoding_(definition_level_encoding), repetition_level_encoding_(repetition_level_encoding) {} @@ -120,9 +126,11 @@ class DataPageV2 : public DataPage { int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length, int64_t uncompressed_size, bool is_compressed = false, EncodedStatistics statistics = EncodedStatistics(), - std::optional first_row_index = std::nullopt) + std::optional first_row_index = std::nullopt, + SizeStatistics size_statistics = SizeStatistics()) : DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size, - std::move(statistics), std::move(first_row_index)), + std::move(statistics), std::move(first_row_index), + std::move(size_statistics)), num_nulls_(num_nulls), num_rows_(num_rows), definition_levels_byte_length_(definition_levels_byte_length), diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 94c301f918544..12cbcf20affa4 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -55,6 +55,7 @@ #include "parquet/platform.h" #include "parquet/properties.h" #include "parquet/schema.h" +#include "parquet/size_statistics.h" #include "parquet/statistics.h" #include "parquet/thrift_internal.h" #include "parquet/types.h" @@ -437,7 +438,7 @@ class SerializedPageWriter : public PageWriter { /// Collect page index if (column_index_builder_ != nullptr) { - column_index_builder_->AddPage(page.statistics()); + column_index_builder_->AddPage(page.statistics(), page.size_statistics()); } if (offset_index_builder_ != nullptr) { const int64_t compressed_size = output_data_len + header_size; @@ -451,8 +452,9 @@ class SerializedPageWriter : public PageWriter { /// start_pos is a relative offset in the buffered mode. It should be /// adjusted via OffsetIndexBuilder::Finish() after BufferedPageWriter /// has flushed all data pages. - offset_index_builder_->AddPage(start_pos, static_cast(compressed_size), - *page.first_row_index()); + offset_index_builder_->AddPage( + start_pos, static_cast(compressed_size), *page.first_row_index(), + page.size_statistics().unencoded_byte_array_data_bytes); } total_uncompressed_size_ += uncompressed_size + header_size; @@ -774,11 +776,17 @@ class ColumnWriterImpl { // Serializes Dictionary Page if enabled virtual void WriteDictionaryPage() = 0; + // A convenience struct to combine the encoded statistics and size statistics + struct StatisticsPair { + EncodedStatistics encoded_stats; + SizeStatistics size_stats; + }; + // Plain-encoded statistics of the current page - virtual EncodedStatistics GetPageStatistics() = 0; + virtual StatisticsPair GetPageStatistics() = 0; // Plain-encoded statistics of the whole chunk - virtual EncodedStatistics GetChunkStatistics() = 0; + virtual StatisticsPair GetChunkStatistics() = 0; // Merges page statistics into chunk statistics, then resets the values virtual void ResetPageStatistics() = 0; @@ -981,8 +989,7 @@ void ColumnWriterImpl::BuildDataPageV1(int64_t definition_levels_rle_size, PARQUET_THROW_NOT_OK(uncompressed_data_->Resize(uncompressed_size, false)); ConcatenateBuffers(definition_levels_rle_size, repetition_levels_rle_size, values, uncompressed_data_->mutable_data()); - - EncodedStatistics page_stats = GetPageStatistics(); + auto [page_stats, page_size_stats] = GetPageStatistics(); page_stats.ApplyStatSizeLimits(properties_->max_statistics_size(descr_->path())); page_stats.set_is_signed(SortOrder::SIGNED == descr_->sort_order()); ResetPageStatistics(); @@ -1006,13 +1013,15 @@ void ColumnWriterImpl::BuildDataPageV1(int64_t definition_levels_rle_size, compressed_data->CopySlice(0, compressed_data->size(), allocator_)); std::unique_ptr page_ptr = std::make_unique( compressed_data_copy, num_values, encoding_, Encoding::RLE, Encoding::RLE, - uncompressed_size, std::move(page_stats), first_row_index); + uncompressed_size, std::move(page_stats), first_row_index, + std::move(page_size_stats)); total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader); data_pages_.push_back(std::move(page_ptr)); } else { // Eagerly write pages DataPageV1 page(compressed_data, num_values, encoding_, Encoding::RLE, Encoding::RLE, - uncompressed_size, std::move(page_stats), first_row_index); + uncompressed_size, std::move(page_stats), first_row_index, + std::move(page_size_stats)); WriteDataPage(page); } } @@ -1039,7 +1048,7 @@ void ColumnWriterImpl::BuildDataPageV2(int64_t definition_levels_rle_size, ConcatenateBuffers(definition_levels_rle_size, repetition_levels_rle_size, compressed_values, combined->mutable_data()); - EncodedStatistics page_stats = GetPageStatistics(); + auto [page_stats, page_size_stats] = GetPageStatistics(); page_stats.ApplyStatSizeLimits(properties_->max_statistics_size(descr_->path())); page_stats.set_is_signed(SortOrder::SIGNED == descr_->sort_order()); ResetPageStatistics(); @@ -1062,14 +1071,15 @@ void ColumnWriterImpl::BuildDataPageV2(int64_t definition_levels_rle_size, combined->CopySlice(0, combined->size(), allocator_)); std::unique_ptr page_ptr = std::make_unique( combined, num_values, null_count, num_rows, encoding_, def_levels_byte_length, - rep_levels_byte_length, uncompressed_size, pager_->has_compressor(), page_stats, - first_row_index); + rep_levels_byte_length, uncompressed_size, pager_->has_compressor(), + std::move(page_stats), first_row_index, std::move(page_size_stats)); total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader); data_pages_.push_back(std::move(page_ptr)); } else { DataPageV2 page(combined, num_values, null_count, num_rows, encoding_, def_levels_byte_length, rep_levels_byte_length, uncompressed_size, - pager_->has_compressor(), page_stats, first_row_index); + pager_->has_compressor(), std::move(page_stats), first_row_index, + std::move(page_size_stats)); WriteDataPage(page); } } @@ -1083,7 +1093,7 @@ int64_t ColumnWriterImpl::Close() { FlushBufferedDataPages(); - EncodedStatistics chunk_statistics = GetChunkStatistics(); + auto [chunk_statistics, chunk_size_statistics] = GetChunkStatistics(); chunk_statistics.ApplyStatSizeLimits( properties_->max_statistics_size(descr_->path())); chunk_statistics.set_is_signed(SortOrder::SIGNED == descr_->sort_order()); @@ -1092,6 +1102,9 @@ int64_t ColumnWriterImpl::Close() { if (rows_written_ > 0 && chunk_statistics.is_set()) { metadata_->SetStatistics(chunk_statistics); } + if (rows_written_ > 0 && chunk_size_statistics.is_set()) { + metadata_->SetSizeStatistics(chunk_size_statistics); + } metadata_->SetKeyValueMetadata(key_value_metadata_); pager_->Close(has_dictionary_, fallback_); } @@ -1217,6 +1230,11 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< page_statistics_ = MakeStatistics(descr_, allocator_); chunk_statistics_ = MakeStatistics(descr_, allocator_); } + if (properties->size_statistics_level() == SizeStatisticsLevel::ColumnChunk || + properties->size_statistics_level() == SizeStatisticsLevel::PageAndColumnChunk) { + page_size_statistics_ = SizeStatistics::Make(descr_); + chunk_size_statistics_ = SizeStatistics::Make(descr_); + } pages_change_on_record_boundaries_ = properties->data_page_version() == ParquetDataPageVersion::V2 || properties->page_index_enabled(descr_->path()); @@ -1301,6 +1319,10 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< bool single_nullable_element = (level_info_.def_level == level_info_.repeated_ancestor_def_level + 1) && leaf_field_nullable; + if (!leaf_field_nullable && leaf_array.null_count() != 0) { + return Status::Invalid("Column '", descr_->name(), + "' is declared non-nullable but contains nulls"); + } bool maybe_parent_nulls = level_info_.HasNullableValues() && !single_nullable_element; if (maybe_parent_nulls) { ARROW_ASSIGN_OR_RAISE( @@ -1351,15 +1373,26 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< total_bytes_written_ += pager_->WriteDictionaryPage(page); } - EncodedStatistics GetPageStatistics() override { - EncodedStatistics result; - if (page_statistics_) result = page_statistics_->Encode(); + StatisticsPair GetPageStatistics() override { + StatisticsPair result; + if (page_statistics_) { + result.encoded_stats = page_statistics_->Encode(); + } + if (properties_->size_statistics_level() == SizeStatisticsLevel::PageAndColumnChunk) { + ARROW_DCHECK(page_size_statistics_ != nullptr); + result.size_stats = *page_size_statistics_; + } return result; } - EncodedStatistics GetChunkStatistics() override { - EncodedStatistics result; - if (chunk_statistics_) result = chunk_statistics_->Encode(); + StatisticsPair GetChunkStatistics() override { + StatisticsPair result; + if (chunk_statistics_) { + result.encoded_stats = chunk_statistics_->Encode(); + } + if (chunk_size_statistics_) { + result.size_stats = *chunk_size_statistics_; + } return result; } @@ -1368,6 +1401,10 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< chunk_statistics_->Merge(*page_statistics_); page_statistics_->Reset(); } + if (page_size_statistics_ != nullptr) { + chunk_size_statistics_->Merge(*page_size_statistics_); + page_size_statistics_->Reset(); + } } Type::type type() const override { return descr_->physical_type(); } @@ -1421,6 +1458,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< DictEncoder* current_dict_encoder_; std::shared_ptr page_statistics_; std::shared_ptr chunk_statistics_; + std::unique_ptr page_size_statistics_; + std::shared_ptr chunk_size_statistics_; bool pages_change_on_record_boundaries_; // If writing a sequence of ::arrow::DictionaryArray to the writer, we keep the @@ -1463,6 +1502,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< rows_written_ += num_values; num_buffered_rows_ += num_values; } + + UpdateLevelHistogram(num_values, def_levels, rep_levels); return values_to_write; } @@ -1554,6 +1595,47 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< rows_written_ += num_levels; num_buffered_rows_ += num_levels; } + + UpdateLevelHistogram(num_levels, def_levels, rep_levels); + } + + void UpdateLevelHistogram(int64_t num_levels, const int16_t* def_levels, + const int16_t* rep_levels) const { + if (page_size_statistics_ == nullptr) { + return; + } + + auto add_levels = [](std::vector& level_histogram, + ::arrow::util::span levels) { + for (int16_t level : levels) { + ARROW_DCHECK_LT(level, static_cast(level_histogram.size())); + ++level_histogram[level]; + } + }; + + if (descr_->max_definition_level() > 0) { + add_levels(page_size_statistics_->definition_level_histogram, + {def_levels, static_cast(num_levels)}); + } else { + page_size_statistics_->definition_level_histogram[0] += num_levels; + } + + if (descr_->max_repetition_level() > 0) { + add_levels(page_size_statistics_->repetition_level_histogram, + {rep_levels, static_cast(num_levels)}); + } else { + page_size_statistics_->repetition_level_histogram[0] += num_levels; + } + } + + // Update the unencoded data bytes for ByteArray only per the specification. + void UpdateUnencodedDataBytes() const { + if constexpr (std::is_same_v) { + if (page_size_statistics_ != nullptr) { + page_size_statistics_->IncrementUnencodedByteArrayDataBytes( + current_encoder_->ReportUnencodedDataBytes()); + } + } } void CommitWriteAndCheckPageLimit(int64_t num_levels, int64_t num_values, @@ -1607,6 +1689,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< if (page_statistics_ != nullptr) { page_statistics_->Update(values, num_values, num_nulls); } + UpdateUnencodedDataBytes(); } /// \brief Write values with spaces and update page statistics accordingly. @@ -1635,6 +1718,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, num_values, num_nulls); } + UpdateUnencodedDataBytes(); } }; @@ -1735,6 +1819,8 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( writeable_indices, MaybeReplaceValidity(writeable_indices, null_count, ctx->memory_pool)); dict_encoder->PutIndices(*writeable_indices); + // Update unencoded byte array data size to size statistics + UpdateUnencodedDataBytes(); CommitWriteAndCheckPageLimit(batch_size, batch_num_values, null_count, check_page); value_offset += batch_num_spaced_values; }; @@ -2215,6 +2301,7 @@ Status TypedColumnWriterImpl::WriteArrowDense( page_statistics_->IncrementNullCount(batch_size - non_null); page_statistics_->IncrementNumValues(non_null); } + UpdateUnencodedDataBytes(); CommitWriteAndCheckPageLimit(batch_size, batch_num_values, batch_size - non_null, check_page); CheckDictionarySizeLimit(); diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index d2b3aa0dff003..25446aefd6814 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -1001,8 +1001,8 @@ TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) { auto values_data = reinterpret_cast(values_buffer->data()); std::shared_ptr valid_bits; - ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits( - {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1})); + std::vector bitmap_bytes = {1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1}; + ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits(bitmap_bytes)); // valgrind will warn about out of bounds access into def_levels_data typed_writer->WriteBatchSpaced(14, def_levels.data(), rep_levels.data(), diff --git a/cpp/src/parquet/encoder.cc b/cpp/src/parquet/encoder.cc index 89d5d44c5219c..f41eb9a19123c 100644 --- a/cpp/src/parquet/encoder.cc +++ b/cpp/src/parquet/encoder.cc @@ -79,6 +79,15 @@ class EncoderImpl : virtual public Encoder { MemoryPool* memory_pool() const override { return pool_; } + int64_t ReportUnencodedDataBytes() override { + if (descr_->physical_type() != Type::BYTE_ARRAY) { + throw ParquetException("ReportUnencodedDataBytes is only supported for BYTE_ARRAY"); + } + int64_t bytes = unencoded_byte_array_data_bytes_; + unencoded_byte_array_data_bytes_ = 0; + return bytes; + } + protected: // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY const ColumnDescriptor* descr_; @@ -87,6 +96,8 @@ class EncoderImpl : virtual public Encoder { /// Type length from descr const int type_length_; + /// Number of unencoded bytes written to the encoder. Used for ByteArray type only. + int64_t unencoded_byte_array_data_bytes_ = 0; }; // ---------------------------------------------------------------------- @@ -132,6 +143,7 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder { DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL"; sink_.UnsafeAppend(&length, sizeof(uint32_t)); sink_.UnsafeAppend(data, static_cast(length)); + unencoded_byte_array_data_bytes_ += length; } void Put(const ByteArray& val) { @@ -513,6 +525,18 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { static_cast(values[i + position]); } }); + + // Track unencoded bytes based on dictionary value type + if constexpr (std::is_same_v) { + // For ByteArray, need to look up actual lengths from dictionary + for (size_t idx = + buffer_position - static_cast(data.length() - data.null_count()); + idx < buffer_position; ++idx) { + memo_table_.VisitValue(buffered_indices_[idx], [&](std::string_view value) { + unencoded_byte_array_data_bytes_ += value.length(); + }); + } + } } void PutIndices(const ::arrow::Array& data) override { @@ -656,6 +680,7 @@ inline void DictEncoderImpl::PutByteArray(const void* ptr, PARQUET_THROW_NOT_OK( memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index)); buffered_indices_.push_back(memo_index); + unencoded_byte_array_data_bytes_ += length; } template <> @@ -1268,6 +1293,7 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, } length_encoder_.Put({static_cast(view.length())}, 1); PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length())); + unencoded_byte_array_data_bytes_ += view.size(); return Status::OK(); }, []() { return Status::OK(); })); @@ -1313,6 +1339,7 @@ void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { for (int idx = 0; idx < num_values; idx++) { sink_.UnsafeAppend(src[idx].ptr, src[idx].len); } + unencoded_byte_array_data_bytes_ += total_increment_size; } void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values, @@ -1444,6 +1471,8 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder
(); type_length_ = descr_->type_length(); + unencoded_byte_array_data_bytes_ = 0; allocator_ = default_memory_pool(); } @@ -197,6 +198,8 @@ class TestEncodingBase : public ::testing::Test { draws_[nvalues * j + i] = draws_[i]; } } + + InitUnencodedByteArrayDataBytes(); } virtual void CheckRoundtrip() = 0; @@ -222,6 +225,16 @@ class TestEncodingBase : public ::testing::Test { } } + void InitUnencodedByteArrayDataBytes() { + // Calculate expected unencoded bytes based on type + if constexpr (std::is_same_v) { + unencoded_byte_array_data_bytes_ = 0; + for (int i = 0; i < num_values_; i++) { + unencoded_byte_array_data_bytes_ += draws_[i].len; + } + } + } + protected: MemoryPool* allocator_; @@ -235,6 +248,7 @@ class TestEncodingBase : public ::testing::Test { std::shared_ptr encode_buffer_; std::shared_ptr descr_; + int64_t unencoded_byte_array_data_bytes_; // unencoded data size for dense values }; // Member variables are not visible to templated subclasses. Possibly figure @@ -261,6 +275,10 @@ class TestPlainEncoding : public TestEncodingBase { auto decoder = MakeTypedDecoder(Encoding::PLAIN, descr_.get()); encoder->Put(draws_, num_values_); encode_buffer_ = encoder->FlushValues(); + if constexpr (std::is_same_v) { + ASSERT_EQ(encoder->ReportUnencodedDataBytes(), + this->unencoded_byte_array_data_bytes_); + } decoder->SetData(num_values_, encode_buffer_->data(), static_cast(encode_buffer_->size())); @@ -346,6 +364,10 @@ class TestDictionaryEncoding : public TestEncodingBase { AllocateBuffer(default_memory_pool(), dict_traits->dict_encoded_size()); dict_traits->WriteDict(dict_buffer_->mutable_data()); std::shared_ptr indices = encoder->FlushValues(); + if constexpr (std::is_same_v) { + ASSERT_EQ(encoder->ReportUnencodedDataBytes(), + this->unencoded_byte_array_data_bytes_); + } auto base_spaced_encoder = MakeEncoder(Type::type_num, Encoding::PLAIN, true, descr_.get()); @@ -1992,6 +2014,10 @@ class TestDeltaLengthByteArrayEncoding : public TestEncodingBase { encoder->Put(draws_, num_values_); encode_buffer_ = encoder->FlushValues(); + if constexpr (std::is_same_v) { + ASSERT_EQ(encoder->ReportUnencodedDataBytes(), + this->unencoded_byte_array_data_bytes_); + } decoder->SetData(num_values_, encode_buffer_->data(), static_cast(encode_buffer_->size())); @@ -2296,6 +2322,8 @@ class TestDeltaByteArrayEncoding : public TestDeltaLengthByteArrayEncoding draws_[nvalues * j + i] = draws_[i]; } } + + TestEncodingBase::InitUnencodedByteArrayDataBytes(); } Encoding::type GetEncoding() override { return Encoding::DELTA_BYTE_ARRAY; } diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index 3cc42ae370217..1c9b2323de500 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -83,8 +83,6 @@ bool IsColumnChunkFullyDictionaryEncoded(const ColumnChunkMetaData& col) { } } // namespace -// PARQUET-978: Minimize footer reads by reading 64 KB from the end of the file -static constexpr int64_t kDefaultFooterReadSize = 64 * 1024; static constexpr uint32_t kFooterSize = 8; // For PARQUET-816 @@ -482,7 +480,8 @@ class SerializedFile : public ParquetFileReader::Contents { "Parquet file size is ", source_size_, " bytes, smaller than the minimum file footer (", kFooterSize, " bytes)"); } - return std::min(source_size_, kDefaultFooterReadSize); + + return std::min(static_cast(source_size_), properties_.footer_read_size()); } // Validate the magic bytes and get the length of the full footer. diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 8f577be45b96d..f47c61421936c 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -37,6 +37,7 @@ #include "parquet/exception.h" #include "parquet/schema.h" #include "parquet/schema_internal.h" +#include "parquet/size_statistics.h" #include "parquet/thrift_internal.h" namespace parquet { @@ -265,6 +266,11 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { LoadEnumSafe(&encoding_stats.encoding), encoding_stats.count}); } + if (column_metadata_->__isset.size_statistics) { + size_statistics_ = + std::make_shared(FromThrift(column_metadata_->size_statistics)); + size_statistics_->Validate(descr_); + } possible_stats_ = nullptr; InitKeyValueMetadata(); } @@ -308,6 +314,10 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { return is_stats_set() ? possible_stats_ : nullptr; } + inline std::shared_ptr size_statistics() const { + return size_statistics_; + } + inline Compression::type compression() const { return LoadEnumSafe(&column_metadata_->codec); } @@ -396,6 +406,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { const ReaderProperties properties_; const ApplicationVersion* writer_version_; std::shared_ptr key_value_metadata_; + std::shared_ptr size_statistics_; }; std::unique_ptr ColumnChunkMetaData::Make( @@ -439,6 +450,10 @@ std::shared_ptr ColumnChunkMetaData::statistics() const { bool ColumnChunkMetaData::is_stats_set() const { return impl_->is_stats_set(); } +std::shared_ptr ColumnChunkMetaData::size_statistics() const { + return impl_->size_statistics(); +} + std::optional ColumnChunkMetaData::bloom_filter_offset() const { return impl_->bloom_filter_offset(); } @@ -1543,6 +1558,10 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { column_chunk_->meta_data.__set_statistics(ToThrift(val)); } + void SetSizeStatistics(const SizeStatistics& size_stats) { + column_chunk_->meta_data.__set_size_statistics(ToThrift(size_stats)); + } + void Finish(int64_t num_values, int64_t dictionary_page_offset, int64_t index_page_offset, int64_t data_page_offset, int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary, @@ -1752,6 +1771,10 @@ void ColumnChunkMetaDataBuilder::SetStatistics(const EncodedStatistics& result) impl_->SetStatistics(result); } +void ColumnChunkMetaDataBuilder::SetSizeStatistics(const SizeStatistics& size_stats) { + impl_->SetSizeStatistics(size_stats); +} + void ColumnChunkMetaDataBuilder::SetKeyValueMetadata( std::shared_ptr key_value_metadata) { impl_->SetKeyValueMetadata(std::move(key_value_metadata)); diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index dc97d816daa74..9a3964f7d6574 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -28,23 +28,9 @@ #include "parquet/encryption/type_fwd.h" #include "parquet/platform.h" #include "parquet/properties.h" -#include "parquet/schema.h" -#include "parquet/types.h" namespace parquet { -class ColumnDescriptor; -class EncodedStatistics; -class FileCryptoMetaData; -class Statistics; -class SchemaDescriptor; - -namespace schema { - -class ColumnPath; - -} // namespace schema - using KeyValueMetadata = ::arrow::KeyValueMetadata; class PARQUET_EXPORT ApplicationVersion { @@ -156,6 +142,7 @@ class PARQUET_EXPORT ColumnChunkMetaData { std::shared_ptr path_in_schema() const; bool is_stats_set() const; std::shared_ptr statistics() const; + std::shared_ptr size_statistics() const; Compression::type compression() const; // Indicate if the ColumnChunk compression is supported by the current @@ -451,6 +438,7 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder { // column metadata void SetStatistics(const EncodedStatistics& stats); + void SetSizeStatistics(const SizeStatistics& size_stats); void SetKeyValueMetadata(std::shared_ptr key_value_metadata); diff --git a/cpp/src/parquet/page_index.cc b/cpp/src/parquet/page_index.cc index afda4c6064b36..8cc819f10cacd 100644 --- a/cpp/src/parquet/page_index.cc +++ b/cpp/src/parquet/page_index.cc @@ -159,6 +159,22 @@ class TypedColumnIndexImpl : public TypedColumnIndex { const std::vector& max_values() const override { return max_values_; } + bool has_definition_level_histograms() const override { + return column_index_.__isset.definition_level_histograms; + } + + bool has_repetition_level_histograms() const override { + return column_index_.__isset.repetition_level_histograms; + } + + const std::vector& definition_level_histograms() const override { + return column_index_.definition_level_histograms; + } + + const std::vector& repetition_level_histograms() const override { + return column_index_.repetition_level_histograms; + } + private: /// Wrapped thrift column index. const format::ColumnIndex column_index_; @@ -178,14 +194,22 @@ class OffsetIndexImpl : public OffsetIndex { page_location.compressed_page_size, page_location.first_row_index}); } + if (offset_index.__isset.unencoded_byte_array_data_bytes) { + unencoded_byte_array_data_bytes_ = offset_index.unencoded_byte_array_data_bytes; + } } const std::vector& page_locations() const override { return page_locations_; } + const std::vector& unencoded_byte_array_data_bytes() const override { + return unencoded_byte_array_data_bytes_; + } + private: std::vector page_locations_; + std::vector unencoded_byte_array_data_bytes_; }; class RowGroupPageIndexReaderImpl : public RowGroupPageIndexReader { @@ -460,7 +484,8 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { column_index_.boundary_order = format::BoundaryOrder::UNORDERED; } - void AddPage(const EncodedStatistics& stats) override { + void AddPage(const EncodedStatistics& stats, + const SizeStatistics& size_stats) override { if (state_ == BuilderState::kFinished) { throw ParquetException("Cannot add page to finished ColumnIndexBuilder."); } else if (state_ == BuilderState::kDiscarded) { @@ -493,6 +518,17 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { column_index_.__isset.null_counts = false; column_index_.null_counts.clear(); } + + if (size_stats.is_set()) { + const auto& page_def_level_hist = size_stats.definition_level_histogram; + const auto& page_ref_level_hist = size_stats.repetition_level_histogram; + column_index_.definition_level_histograms.insert( + column_index_.definition_level_histograms.end(), page_def_level_hist.cbegin(), + page_def_level_hist.cend()); + column_index_.repetition_level_histograms.insert( + column_index_.repetition_level_histograms.end(), page_ref_level_hist.cbegin(), + page_ref_level_hist.cend()); + } } void Finish() override { @@ -533,6 +569,29 @@ class ColumnIndexBuilderImpl final : public ColumnIndexBuilder { /// Decide the boundary order from decoded min/max values. auto boundary_order = DetermineBoundaryOrder(min_values, max_values); column_index_.__set_boundary_order(ToThrift(boundary_order)); + + // Finalize level histogram. + const int64_t num_pages = column_index_.null_pages.size(); + const int64_t def_level_hist_size = column_index_.definition_level_histograms.size(); + const int64_t rep_level_hist_size = column_index_.repetition_level_histograms.size(); + if (def_level_hist_size != 0 && + def_level_hist_size != (descr_->max_definition_level() + 1) * num_pages) { + std::stringstream ss; + ss << "Invalid definition level histogram size: " << def_level_hist_size + << ", expected: " << (descr_->max_definition_level() + 1) * num_pages; + throw ParquetException(ss.str()); + } + if (rep_level_hist_size != 0 && + rep_level_hist_size != (descr_->max_repetition_level() + 1) * num_pages) { + std::stringstream ss; + ss << "Invalid repetition level histogram size: " << rep_level_hist_size + << ", expected: " << (descr_->max_repetition_level() + 1) * num_pages; + throw ParquetException(ss.str()); + } + column_index_.__isset.definition_level_histograms = + !column_index_.definition_level_histograms.empty(); + column_index_.__isset.repetition_level_histograms = + !column_index_.repetition_level_histograms.empty(); } void WriteTo(::arrow::io::OutputStream* sink, Encryptor* encryptor) const override { @@ -604,8 +663,8 @@ class OffsetIndexBuilderImpl final : public OffsetIndexBuilder { public: OffsetIndexBuilderImpl() = default; - void AddPage(int64_t offset, int32_t compressed_page_size, - int64_t first_row_index) override { + void AddPage(int64_t offset, int32_t compressed_page_size, int64_t first_row_index, + std::optional unencoded_byte_array_length) override { if (state_ == BuilderState::kFinished) { throw ParquetException("Cannot add page to finished OffsetIndexBuilder."); } else if (state_ == BuilderState::kDiscarded) { @@ -620,6 +679,10 @@ class OffsetIndexBuilderImpl final : public OffsetIndexBuilder { page_location.__set_compressed_page_size(compressed_page_size); page_location.__set_first_row_index(first_row_index); offset_index_.page_locations.emplace_back(std::move(page_location)); + if (unencoded_byte_array_length.has_value()) { + offset_index_.unencoded_byte_array_data_bytes.emplace_back( + unencoded_byte_array_length.value()); + } } void Finish(int64_t final_position) override { @@ -636,6 +699,19 @@ class OffsetIndexBuilderImpl final : public OffsetIndexBuilder { page_location.__set_offset(page_location.offset + final_position); } } + + // Finalize unencoded_byte_array_data_bytes and make sure page sizes match. + if (offset_index_.page_locations.size() == + offset_index_.unencoded_byte_array_data_bytes.size()) { + offset_index_.__isset.unencoded_byte_array_data_bytes = true; + } else if (!offset_index_.unencoded_byte_array_data_bytes.empty()) { + std::stringstream ss; + ss << "Invalid count of unencoded BYTE_ARRAY data bytes: " + << offset_index_.unencoded_byte_array_data_bytes.size() + << ", expected page count: " << offset_index_.page_locations.size(); + throw ParquetException(ss.str()); + } + state_ = BuilderState::kFinished; break; } @@ -813,6 +889,14 @@ class PageIndexBuilderImpl final : public PageIndexBuilder { } // namespace +void OffsetIndexBuilder::AddPage(const PageLocation& page_location, + const SizeStatistics& size_stats) { + this->AddPage( + page_location.offset, page_location.compressed_page_size, + page_location.first_row_index, + size_stats.is_set() ? size_stats.unencoded_byte_array_data_bytes : std::nullopt); +} + RowGroupIndexReadRange PageIndexReader::DeterminePageIndexRangesInRowGroup( const RowGroupMetaData& row_group_metadata, const std::vector& columns) { int64_t ci_start = std::numeric_limits::max(); diff --git a/cpp/src/parquet/page_index.h b/cpp/src/parquet/page_index.h index d45c59cab223f..3083159783ba7 100644 --- a/cpp/src/parquet/page_index.h +++ b/cpp/src/parquet/page_index.h @@ -19,6 +19,7 @@ #include "arrow/io/interfaces.h" #include "parquet/encryption/type_fwd.h" +#include "parquet/type_fwd.h" #include "parquet/types.h" #include @@ -26,9 +27,6 @@ namespace parquet { -class EncodedStatistics; -struct PageIndexLocation; - /// \brief ColumnIndex is a proxy around format::ColumnIndex. class PARQUET_EXPORT ColumnIndex { public: @@ -76,6 +74,18 @@ class PARQUET_EXPORT ColumnIndex { /// \brief A vector of page indices for non-null pages. virtual const std::vector& non_null_page_indices() const = 0; + + /// \brief Whether definition level histogram is available. + virtual bool has_definition_level_histograms() const = 0; + + /// \brief Whether repetition level histogram is available. + virtual bool has_repetition_level_histograms() const = 0; + + /// \brief List of definition level histograms for each page concatenated together. + virtual const std::vector& definition_level_histograms() const = 0; + + /// \brief List of repetition level histograms for each page concatenated together. + virtual const std::vector& repetition_level_histograms() const = 0; }; /// \brief Typed implementation of ColumnIndex. @@ -129,6 +139,10 @@ class PARQUET_EXPORT OffsetIndex { /// \brief A vector of locations for each data page in this column. virtual const std::vector& page_locations() const = 0; + + /// \brief A vector of unencoded/uncompressed size of each page for BYTE_ARRAY types, + /// or empty for other types. + virtual const std::vector& unencoded_byte_array_data_bytes() const = 0; }; /// \brief Interface for reading the page index for a Parquet row group. @@ -266,7 +280,9 @@ class PARQUET_EXPORT ColumnIndexBuilder { /// not update statistics anymore. /// /// \param stats Page statistics in the encoded form. - virtual void AddPage(const EncodedStatistics& stats) = 0; + /// \param size_stats Size statistics of the page if available. + virtual void AddPage(const EncodedStatistics& stats, + const SizeStatistics& size_stats) = 0; /// \brief Complete the column index. /// @@ -299,15 +315,13 @@ class PARQUET_EXPORT OffsetIndexBuilder { virtual ~OffsetIndexBuilder() = default; - /// \brief Add page location of a data page. + /// \brief Add page location and size stats of a data page. virtual void AddPage(int64_t offset, int32_t compressed_page_size, - int64_t first_row_index) = 0; + int64_t first_row_index, + std::optional unencoded_byte_array_length = {}) = 0; - /// \brief Add page location of a data page. - void AddPage(const PageLocation& page_location) { - AddPage(page_location.offset, page_location.compressed_page_size, - page_location.first_row_index); - } + /// \brief Add page location and size stats of a data page. + void AddPage(const PageLocation& page_location, const SizeStatistics& size_stats); /// \brief Complete the offset index. /// diff --git a/cpp/src/parquet/page_index_benchmark.cc b/cpp/src/parquet/page_index_benchmark.cc index 5631034105056..e94fa0365d189 100644 --- a/cpp/src/parquet/page_index_benchmark.cc +++ b/cpp/src/parquet/page_index_benchmark.cc @@ -82,7 +82,7 @@ void BM_ReadColumnIndex(::benchmark::State& state) { GenerateBenchmarkData(values_per_page, /*seed=*/0, values.data(), &heap, kDataStringLength); stats->Update(values.data(), values_per_page, /*null_count=*/0); - builder->AddPage(stats->Encode()); + builder->AddPage(stats->Encode(), /*size_stats=*/{}); } builder->Finish(); diff --git a/cpp/src/parquet/page_index_test.cc b/cpp/src/parquet/page_index_test.cc index 4db49b4267415..916e28f8cea8e 100644 --- a/cpp/src/parquet/page_index_test.cc +++ b/cpp/src/parquet/page_index_test.cc @@ -419,15 +419,20 @@ TEST(PageIndex, DeterminePageIndexRangesInRowGroupWithMissingPageIndex) { -1); } -TEST(PageIndex, WriteOffsetIndex) { +void TestWriteOffsetIndex(bool write_size_stats) { /// Create offset index via the OffsetIndexBuilder interface. auto builder = OffsetIndexBuilder::Make(); const size_t num_pages = 5; const std::vector offsets = {100, 200, 300, 400, 500}; const std::vector page_sizes = {1024, 2048, 3072, 4096, 8192}; const std::vector first_row_indices = {0, 10000, 20000, 30000, 40000}; + const std::vector unencoded_byte_array_lengths = {1111, 2222, 0, 3333, 4444}; for (size_t i = 0; i < num_pages; ++i) { - builder->AddPage(offsets[i], page_sizes[i], first_row_indices[i]); + auto unencoded_byte_array_length = + write_size_stats ? std::make_optional(unencoded_byte_array_lengths[i]) + : std::nullopt; + builder->AddPage(offsets[i], page_sizes[i], first_row_indices[i], + unencoded_byte_array_length); } const int64_t final_position = 4096; builder->Finish(final_position); @@ -446,23 +451,73 @@ TEST(PageIndex, WriteOffsetIndex) { /// Verify the data of the offset index. for (const auto& offset_index : offset_indexes) { ASSERT_EQ(num_pages, offset_index->page_locations().size()); + if (write_size_stats) { + ASSERT_EQ(num_pages, offset_index->unencoded_byte_array_data_bytes().size()); + } else { + ASSERT_TRUE(offset_index->unencoded_byte_array_data_bytes().empty()); + } for (size_t i = 0; i < num_pages; ++i) { const auto& page_location = offset_index->page_locations().at(i); ASSERT_EQ(offsets[i] + final_position, page_location.offset); ASSERT_EQ(page_sizes[i], page_location.compressed_page_size); ASSERT_EQ(first_row_indices[i], page_location.first_row_index); + if (write_size_stats) { + ASSERT_EQ(unencoded_byte_array_lengths[i], + offset_index->unencoded_byte_array_data_bytes()[i]); + } } } } +TEST(PageIndex, WriteOffsetIndexWithoutSizeStats) { + TestWriteOffsetIndex(/*write_size_stats=*/false); +} + +TEST(PageIndex, WriteOffsetIndexWithSizeStats) { + TestWriteOffsetIndex(/*write_size_stats=*/true); +} + +struct PageLevelHistogram { + std::vector def_levels; + std::vector rep_levels; +}; + +std::unique_ptr ConstructFakeSizeStatistics( + const ColumnDescriptor* descr, const PageLevelHistogram& page_level_histogram) { + auto stats = SizeStatistics::Make(descr); + stats->definition_level_histogram = page_level_histogram.def_levels; + stats->repetition_level_histogram = page_level_histogram.rep_levels; + return stats; +} + +void VerifyPageLevelHistogram(size_t page_id, + const std::vector& expected_page_levels, + const std::vector& all_page_levels) { + const size_t max_level = expected_page_levels.size() - 1; + const size_t offset = page_id * (max_level + 1); + for (size_t level = 0; level <= max_level; ++level) { + ASSERT_EQ(expected_page_levels[level], all_page_levels[offset + level]); + } +} + void TestWriteTypedColumnIndex(schema::NodePtr node, const std::vector& page_stats, - BoundaryOrder::type boundary_order, bool has_null_counts) { - auto descr = std::make_unique(node, /*max_definition_level=*/1, 0); - + BoundaryOrder::type boundary_order, bool has_null_counts, + int16_t max_definition_level = 1, + int16_t max_repetition_level = 0, + const std::vector& page_levels = {}) { + const bool build_size_stats = !page_levels.empty(); + if (build_size_stats) { + ASSERT_EQ(page_levels.size(), page_stats.size()); + } + auto descr = std::make_unique(node, max_definition_level, + max_repetition_level); auto builder = ColumnIndexBuilder::Make(descr.get()); - for (const auto& stats : page_stats) { - builder->AddPage(stats); + for (size_t i = 0; i < page_stats.size(); ++i) { + auto size_stats = build_size_stats + ? ConstructFakeSizeStatistics(descr.get(), page_levels[i]) + : std::make_unique(); + builder->AddPage(page_stats[i], *size_stats); } ASSERT_NO_THROW(builder->Finish()); @@ -482,6 +537,13 @@ void TestWriteTypedColumnIndex(schema::NodePtr node, ASSERT_EQ(boundary_order, column_index->boundary_order()); ASSERT_EQ(has_null_counts, column_index->has_null_counts()); const size_t num_pages = column_index->null_pages().size(); + if (build_size_stats) { + ASSERT_EQ(num_pages * (max_repetition_level + 1), + column_index->repetition_level_histograms().size()); + ASSERT_EQ(num_pages * (max_definition_level + 1), + column_index->definition_level_histograms().size()); + } + for (size_t i = 0; i < num_pages; ++i) { ASSERT_EQ(page_stats[i].all_null_value, column_index->null_pages()[i]); ASSERT_EQ(page_stats[i].min(), column_index->encoded_min_values()[i]); @@ -489,6 +551,12 @@ void TestWriteTypedColumnIndex(schema::NodePtr node, if (has_null_counts) { ASSERT_EQ(page_stats[i].null_count, column_index->null_counts()[i]); } + if (build_size_stats) { + ASSERT_NO_FATAL_FAILURE(VerifyPageLevelHistogram( + i, page_levels[i].def_levels, column_index->definition_level_histograms())); + ASSERT_NO_FATAL_FAILURE(VerifyPageLevelHistogram( + i, page_levels[i].rep_levels, column_index->repetition_level_histograms())); + } } } } @@ -640,7 +708,7 @@ TEST(PageIndex, WriteColumnIndexWithCorruptedStats) { ColumnDescriptor descr(schema::Int32("c1"), /*max_definition_level=*/1, 0); auto builder = ColumnIndexBuilder::Make(&descr); for (const auto& stats : page_stats) { - builder->AddPage(stats); + builder->AddPage(stats, SizeStatistics()); } ASSERT_NO_THROW(builder->Finish()); ASSERT_EQ(nullptr, builder->Build()); @@ -651,6 +719,31 @@ TEST(PageIndex, WriteColumnIndexWithCorruptedStats) { EXPECT_EQ(0, buffer->size()); } +TEST(PageIndex, WriteInt64ColumnIndexWithSizeStats) { + auto encode = [=](int64_t value) { + return std::string(reinterpret_cast(&value), sizeof(int64_t)); + }; + + // Integer values in the descending order. + std::vector page_stats(3); + page_stats.at(0).set_null_count(4).set_min(encode(-1)).set_max(encode(-2)); + page_stats.at(1).set_null_count(0).set_min(encode(-2)).set_max(encode(-3)); + page_stats.at(2).set_null_count(4).set_min(encode(-3)).set_max(encode(-4)); + + // Page level histograms. + std::vector page_levels; + page_levels.push_back( + PageLevelHistogram{/*def_levels=*/{2, 4, 6, 8}, /*rep_levels=*/{10, 5, 5}}); + page_levels.push_back( + PageLevelHistogram{/*def_levels=*/{1, 3, 5, 7}, /*rep_levels=*/{4, 8, 4}}); + page_levels.push_back( + PageLevelHistogram{/*def_levels=*/{0, 2, 4, 6}, /*rep_levels=*/{3, 4, 5}}); + + TestWriteTypedColumnIndex(schema::Int64("c1"), page_stats, BoundaryOrder::Descending, + /*has_null_counts=*/true, /*max_definition_level=*/3, + /*max_repetition_level=*/2, page_levels); +} + TEST(PageIndex, TestPageIndexBuilderWithZeroRowGroup) { schema::NodeVector fields = {schema::Int32("c1"), schema::ByteArray("c2")}; schema::NodePtr root = schema::GroupNode::Make("schema", Repetition::REPEATED, fields); @@ -689,14 +782,15 @@ class PageIndexBuilderTest : public ::testing::Test { for (int column = 0; column < num_columns; ++column) { if (static_cast(column) < page_stats[row_group].size()) { auto column_index_builder = builder->GetColumnIndexBuilder(column); - ASSERT_NO_THROW(column_index_builder->AddPage(page_stats[row_group][column])); + ASSERT_NO_THROW( + column_index_builder->AddPage(page_stats[row_group][column], {})); ASSERT_NO_THROW(column_index_builder->Finish()); } if (static_cast(column) < page_locations[row_group].size()) { auto offset_index_builder = builder->GetOffsetIndexBuilder(column); - ASSERT_NO_THROW( - offset_index_builder->AddPage(page_locations[row_group][column])); + ASSERT_NO_THROW(offset_index_builder->AddPage(page_locations[row_group][column], + /*size_stats=*/{})); ASSERT_NO_THROW(offset_index_builder->Finish(final_position)); } } diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index 7f2e371df66d7..c942010396826 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -47,6 +47,16 @@ namespace parquet { /// DataPageV2 at all. enum class ParquetDataPageVersion { V1, V2 }; +/// Controls the level of size statistics that are written to the file. +enum class SizeStatisticsLevel : uint8_t { + // No size statistics are written. + None = 0, + // Only column chunk size statistics are written. + ColumnChunk, + // Both size statistics in the column chunk and page index are written. + PageAndColumnChunk +}; + /// Align the default buffer size to a small multiple of a page size. constexpr int64_t kDefaultBufferSize = 4096 * 4; @@ -56,6 +66,9 @@ constexpr int32_t kDefaultThriftStringSizeLimit = 100 * 1000 * 1000; // kDefaultStringSizeLimit. constexpr int32_t kDefaultThriftContainerSizeLimit = 1000 * 1000; +// PARQUET-978: Minimize footer reads by reading 64 KB from the end of the file +constexpr int64_t kDefaultFooterReadSize = 64 * 1024; + class PARQUET_EXPORT ReaderProperties { public: explicit ReaderProperties(MemoryPool* pool = ::arrow::default_memory_pool()) @@ -120,6 +133,12 @@ class PARQUET_EXPORT ReaderProperties { page_checksum_verification_ = check_crc; } + // Set the default read size to read the footer from a file. For high latency + // file systems and files with large metadata (>64KB) this can increase performance + // by reducing the number of round-trips to retrieve the entire file metadata. + void set_footer_read_size(size_t size) { footer_read_size_ = size; } + size_t footer_read_size() const { return footer_read_size_; } + private: MemoryPool* pool_; int64_t buffer_size_ = kDefaultBufferSize; @@ -129,6 +148,7 @@ class PARQUET_EXPORT ReaderProperties { bool page_checksum_verification_ = false; // Used with a RecordReader. bool read_dense_for_nullable_ = false; + size_t footer_read_size_ = kDefaultFooterReadSize; std::shared_ptr file_decryption_properties_; }; @@ -237,7 +257,8 @@ class PARQUET_EXPORT WriterProperties { data_page_version_(ParquetDataPageVersion::V1), created_by_(DEFAULT_CREATED_BY), store_decimal_as_integer_(false), - page_checksum_enabled_(false) {} + page_checksum_enabled_(false), + size_statistics_level_(SizeStatisticsLevel::None) {} explicit Builder(const WriterProperties& properties) : pool_(properties.memory_pool()), @@ -639,6 +660,16 @@ class PARQUET_EXPORT WriterProperties { return this->disable_write_page_index(path->ToDotString()); } + /// \brief Set the level to write size statistics for all columns. Default is None. + /// + /// \param level The level to write size statistics. Note that if page index is not + /// enabled, page level size statistics will not be written even if the level + /// is set to PageAndColumnChunk. + Builder* set_size_statistics_level(SizeStatisticsLevel level) { + size_statistics_level_ = level; + return this; + } + /// \brief Build the WriterProperties with the builder parameters. /// \return The WriterProperties defined by the builder. std::shared_ptr build() { @@ -665,9 +696,9 @@ class PARQUET_EXPORT WriterProperties { return std::shared_ptr(new WriterProperties( pool_, dictionary_pagesize_limit_, write_batch_size_, max_row_group_length_, pagesize_, version_, created_by_, page_checksum_enabled_, - std::move(file_encryption_properties_), default_column_properties_, - column_properties, data_page_version_, store_decimal_as_integer_, - std::move(sorting_columns_))); + size_statistics_level_, std::move(file_encryption_properties_), + default_column_properties_, column_properties, data_page_version_, + store_decimal_as_integer_, std::move(sorting_columns_))); } private: @@ -681,6 +712,7 @@ class PARQUET_EXPORT WriterProperties { std::string created_by_; bool store_decimal_as_integer_; bool page_checksum_enabled_; + SizeStatisticsLevel size_statistics_level_; std::shared_ptr file_encryption_properties_; @@ -719,6 +751,10 @@ class PARQUET_EXPORT WriterProperties { inline bool page_checksum_enabled() const { return page_checksum_enabled_; } + inline SizeStatisticsLevel size_statistics_level() const { + return size_statistics_level_; + } + inline Encoding::type dictionary_index_encoding() const { if (parquet_version_ == ParquetVersion::PARQUET_1_0) { return Encoding::PLAIN_DICTIONARY; @@ -812,6 +848,7 @@ class PARQUET_EXPORT WriterProperties { MemoryPool* pool, int64_t dictionary_pagesize_limit, int64_t write_batch_size, int64_t max_row_group_length, int64_t pagesize, ParquetVersion::type version, const std::string& created_by, bool page_write_checksum_enabled, + SizeStatisticsLevel size_statistics_level, std::shared_ptr file_encryption_properties, const ColumnProperties& default_column_properties, const std::unordered_map& column_properties, @@ -827,6 +864,7 @@ class PARQUET_EXPORT WriterProperties { parquet_created_by_(created_by), store_decimal_as_integer_(store_short_decimal_as_integer), page_checksum_enabled_(page_write_checksum_enabled), + size_statistics_level_(size_statistics_level), file_encryption_properties_(file_encryption_properties), sorting_columns_(std::move(sorting_columns)), default_column_properties_(default_column_properties), @@ -842,6 +880,7 @@ class PARQUET_EXPORT WriterProperties { std::string parquet_created_by_; bool store_decimal_as_integer_; bool page_checksum_enabled_; + SizeStatisticsLevel size_statistics_level_; std::shared_ptr file_encryption_properties_; diff --git a/cpp/src/parquet/properties_test.cc b/cpp/src/parquet/properties_test.cc index b2c574413abf7..35fc11565914e 100644 --- a/cpp/src/parquet/properties_test.cc +++ b/cpp/src/parquet/properties_test.cc @@ -35,6 +35,7 @@ TEST(TestReaderProperties, Basics) { ReaderProperties props; ASSERT_EQ(props.buffer_size(), kDefaultBufferSize); + ASSERT_EQ(props.footer_read_size(), kDefaultFooterReadSize); ASSERT_FALSE(props.is_buffered_stream_enabled()); ASSERT_FALSE(props.page_checksum_verification()); } diff --git a/cpp/src/parquet/size_statistics.cc b/cpp/src/parquet/size_statistics.cc new file mode 100644 index 0000000000000..a02cef7aba46f --- /dev/null +++ b/cpp/src/parquet/size_statistics.cc @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliancec +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/size_statistics.h" + +#include + +#include "arrow/util/logging.h" +#include "parquet/exception.h" +#include "parquet/schema.h" + +namespace parquet { + +void SizeStatistics::Merge(const SizeStatistics& other) { + if (repetition_level_histogram.size() != other.repetition_level_histogram.size()) { + throw ParquetException("Repetition level histogram size mismatch"); + } + if (definition_level_histogram.size() != other.definition_level_histogram.size()) { + throw ParquetException("Definition level histogram size mismatch"); + } + if (unencoded_byte_array_data_bytes.has_value() != + other.unencoded_byte_array_data_bytes.has_value()) { + throw ParquetException("Unencoded byte array data bytes are not consistent"); + } + std::transform(repetition_level_histogram.begin(), repetition_level_histogram.end(), + other.repetition_level_histogram.begin(), + repetition_level_histogram.begin(), std::plus<>()); + std::transform(definition_level_histogram.begin(), definition_level_histogram.end(), + other.definition_level_histogram.begin(), + definition_level_histogram.begin(), std::plus<>()); + if (unencoded_byte_array_data_bytes.has_value()) { + unencoded_byte_array_data_bytes = unencoded_byte_array_data_bytes.value() + + other.unencoded_byte_array_data_bytes.value(); + } +} + +void SizeStatistics::IncrementUnencodedByteArrayDataBytes(int64_t value) { + ARROW_CHECK(unencoded_byte_array_data_bytes.has_value()); + unencoded_byte_array_data_bytes = unencoded_byte_array_data_bytes.value() + value; +} + +void SizeStatistics::Validate(const ColumnDescriptor* descr) const { + if (repetition_level_histogram.size() != + static_cast(descr->max_repetition_level() + 1)) { + throw ParquetException("Repetition level histogram size mismatch"); + } + if (definition_level_histogram.size() != + static_cast(descr->max_definition_level() + 1)) { + throw ParquetException("Definition level histogram size mismatch"); + } + if (unencoded_byte_array_data_bytes.has_value() && + descr->physical_type() != Type::BYTE_ARRAY) { + throw ParquetException("Unencoded byte array data bytes does not support " + + TypeToString(descr->physical_type())); + } + if (!unencoded_byte_array_data_bytes.has_value() && + descr->physical_type() == Type::BYTE_ARRAY) { + throw ParquetException("Missing unencoded byte array data bytes"); + } +} + +void SizeStatistics::Reset() { + repetition_level_histogram.assign(repetition_level_histogram.size(), 0); + definition_level_histogram.assign(definition_level_histogram.size(), 0); + if (unencoded_byte_array_data_bytes.has_value()) { + unencoded_byte_array_data_bytes = 0; + } +} + +std::unique_ptr SizeStatistics::Make(const ColumnDescriptor* descr) { + auto size_stats = std::make_unique(); + size_stats->repetition_level_histogram.resize(descr->max_repetition_level() + 1, 0); + size_stats->definition_level_histogram.resize(descr->max_definition_level() + 1, 0); + if (descr->physical_type() == Type::BYTE_ARRAY) { + size_stats->unencoded_byte_array_data_bytes = 0; + } + return size_stats; +} + +} // namespace parquet diff --git a/cpp/src/parquet/size_statistics.h b/cpp/src/parquet/size_statistics.h new file mode 100644 index 0000000000000..c25e70ee36d8a --- /dev/null +++ b/cpp/src/parquet/size_statistics.h @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "parquet/platform.h" +#include "parquet/type_fwd.h" + +namespace parquet { + +/// A structure for capturing metadata for estimating the unencoded, +/// uncompressed size of data written. This is useful for readers to estimate +/// how much memory is needed to reconstruct data in their memory model and for +/// fine-grained filter push down on nested structures (the histograms contained +/// in this structure can help determine the number of nulls at a particular +/// nesting level and maximum length of lists). +struct PARQUET_EXPORT SizeStatistics { + /// When present, there is expected to be one element corresponding to each + /// definition (i.e. size=max definition+1) where each element + /// represents the number of times the definition level was observed in the + /// data. + /// + /// This field may be omitted (a.k.a. zero-length vector) if max_definition_level + /// is 0 without loss of information. + std::vector definition_level_histogram; + + /// Same as definition_level_histogram except for repetition levels. + /// + /// This field may be omitted (a.k.a. zero-length vector) if max_repetition_level + /// is 0 without loss of information. + std::vector repetition_level_histogram; + + /// The number of physical bytes stored for BYTE_ARRAY data values assuming + /// no encoding. This is exclusive of the bytes needed to store the length of + /// each byte array. In other words, this field is equivalent to the `(size + /// of PLAIN-ENCODING the byte array values) - (4 bytes * number of values + /// written)`. To determine unencoded sizes of other types readers can use + /// schema information multiplied by the number of non-null and null values. + /// The number of null/non-null values can be inferred from the histograms + /// below. + /// + /// For example, if a column chunk is dictionary-encoded with dictionary + /// ["a", "bc", "cde"], and a data page contains the indices [0, 0, 1, 2], + /// then this value for that data page should be 7 (1 + 1 + 2 + 3). + /// + /// This field should only be set for types that use BYTE_ARRAY as their + /// physical type. + std::optional unencoded_byte_array_data_bytes; + + /// \brief Check if the SizeStatistics is set. + bool is_set() const { + return !repetition_level_histogram.empty() || !definition_level_histogram.empty() || + unencoded_byte_array_data_bytes.has_value(); + } + + /// \brief Increment the unencoded byte array data bytes. + void IncrementUnencodedByteArrayDataBytes(int64_t value); + + /// \brief Merge two SizeStatistics. + /// \throws ParquetException if SizeStatistics to merge is not compatible. + void Merge(const SizeStatistics& other); + + /// \brief Validate the SizeStatistics + /// \throws ParquetException if the histograms don't have the right length, + /// or if unencoded_byte_array_data_bytes is present for a non-BYTE_ARRAY column. + void Validate(const ColumnDescriptor* descr) const; + + /// \brief Reset the SizeStatistics to be empty. + void Reset(); + + /// \brief Make an empty SizeStatistics object for specific type. + static std::unique_ptr Make(const ColumnDescriptor* descr); +}; + +} // namespace parquet diff --git a/cpp/src/parquet/size_statistics_test.cc b/cpp/src/parquet/size_statistics_test.cc new file mode 100644 index 0000000000000..cefd31dce285d --- /dev/null +++ b/cpp/src/parquet/size_statistics_test.cc @@ -0,0 +1,279 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include +#include + +#include "arrow/buffer.h" +#include "arrow/table.h" +#include "arrow/testing/builder.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/bit_util.h" +#include "arrow/util/span.h" +#include "parquet/arrow/reader.h" +#include "parquet/arrow/reader_internal.h" +#include "parquet/arrow/schema.h" +#include "parquet/arrow/writer.h" +#include "parquet/column_writer.h" +#include "parquet/file_writer.h" +#include "parquet/page_index.h" +#include "parquet/schema.h" +#include "parquet/size_statistics.h" +#include "parquet/test_util.h" +#include "parquet/thrift_internal.h" +#include "parquet/types.h" + +namespace parquet { + +TEST(SizeStatistics, ThriftSerDe) { + const std::vector kDefLevels = {128, 64, 32, 16}; + const std::vector kRepLevels = {100, 80, 60, 40, 20}; + constexpr int64_t kUnencodedByteArrayDataBytes = 1234; + + for (const auto& descr : + {std::make_unique(schema::Int32("a"), /*max_def_level=*/3, + /*max_rep_level=*/4), + std::make_unique(schema::ByteArray("a"), /*max_def_level=*/3, + /*max_rep_level=*/4)}) { + auto size_statistics = SizeStatistics::Make(descr.get()); + size_statistics->repetition_level_histogram = kRepLevels; + size_statistics->definition_level_histogram = kDefLevels; + if (descr->physical_type() == Type::BYTE_ARRAY) { + size_statistics->IncrementUnencodedByteArrayDataBytes(kUnencodedByteArrayDataBytes); + } + auto thrift_statistics = ToThrift(*size_statistics); + auto restored_statistics = FromThrift(thrift_statistics); + EXPECT_EQ(restored_statistics.definition_level_histogram, kDefLevels); + EXPECT_EQ(restored_statistics.repetition_level_histogram, kRepLevels); + if (descr->physical_type() == Type::BYTE_ARRAY) { + EXPECT_TRUE(restored_statistics.unencoded_byte_array_data_bytes.has_value()); + EXPECT_EQ(restored_statistics.unencoded_byte_array_data_bytes.value(), + kUnencodedByteArrayDataBytes); + } else { + EXPECT_FALSE(restored_statistics.unencoded_byte_array_data_bytes.has_value()); + } + } +} + +bool operator==(const SizeStatistics& lhs, const SizeStatistics& rhs) { + return lhs.repetition_level_histogram == rhs.repetition_level_histogram && + lhs.definition_level_histogram == rhs.definition_level_histogram && + lhs.unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes; +} + +struct PageSizeStatistics { + std::vector def_levels; + std::vector rep_levels; + std::vector byte_array_bytes; + bool operator==(const PageSizeStatistics& other) const { + return def_levels == other.def_levels && rep_levels == other.rep_levels && + byte_array_bytes == other.byte_array_bytes; + } +}; + +class SizeStatisticsRoundTripTest : public ::testing::Test { + public: + void WriteFile(SizeStatisticsLevel level, + const std::shared_ptr<::arrow::Table>& table) { + auto writer_properties = WriterProperties::Builder() + .max_row_group_length(2) /* every row group has 2 rows */ + ->data_pagesize(1) /* every page has 1 row */ + ->enable_write_page_index() + ->enable_statistics() + ->set_size_statistics_level(level) + ->build(); + + // Get schema from table. + auto schema = table->schema(); + std::shared_ptr parquet_schema; + auto arrow_writer_properties = default_arrow_writer_properties(); + ASSERT_OK_NO_THROW(arrow::ToParquetSchema(schema.get(), *writer_properties, + *arrow_writer_properties, &parquet_schema)); + auto schema_node = + std::static_pointer_cast(parquet_schema->schema_root()); + + // Write table to buffer. + auto sink = CreateOutputStream(); + auto pool = ::arrow::default_memory_pool(); + auto writer = ParquetFileWriter::Open(sink, schema_node, writer_properties); + std::unique_ptr arrow_writer; + ASSERT_OK(arrow::FileWriter::Make(pool, std::move(writer), schema, + arrow_writer_properties, &arrow_writer)); + ASSERT_OK_NO_THROW(arrow_writer->WriteTable(*table)); + ASSERT_OK_NO_THROW(arrow_writer->Close()); + ASSERT_OK_AND_ASSIGN(buffer_, sink->Finish()); + } + + void ReadSizeStatistics() { + auto read_properties = default_arrow_reader_properties(); + auto reader = + ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer_)); + + // Read row group size statistics in order. + auto metadata = reader->metadata(); + for (int i = 0; i < metadata->num_row_groups(); ++i) { + auto row_group_metadata = metadata->RowGroup(i); + for (int j = 0; j < metadata->num_columns(); ++j) { + auto column_metadata = row_group_metadata->ColumnChunk(j); + auto size_stats = column_metadata->size_statistics(); + row_group_stats_.push_back(size_stats ? *size_stats : SizeStatistics{}); + } + } + + // Read page size statistics in order. + auto page_index_reader = reader->GetPageIndexReader(); + ASSERT_NE(page_index_reader, nullptr); + + for (int i = 0; i < metadata->num_row_groups(); ++i) { + auto row_group_index_reader = page_index_reader->RowGroup(i); + ASSERT_NE(row_group_index_reader, nullptr); + + for (int j = 0; j < metadata->num_columns(); ++j) { + PageSizeStatistics page_stats; + + auto column_index = row_group_index_reader->GetColumnIndex(j); + if (column_index != nullptr) { + if (column_index->has_definition_level_histograms()) { + page_stats.def_levels = column_index->definition_level_histograms(); + } + if (column_index->has_repetition_level_histograms()) { + page_stats.rep_levels = column_index->repetition_level_histograms(); + } + } + + auto offset_index = row_group_index_reader->GetOffsetIndex(j); + if (offset_index != nullptr) { + page_stats.byte_array_bytes = offset_index->unencoded_byte_array_data_bytes(); + } + + page_stats_.emplace_back(std::move(page_stats)); + } + } + } + + void Reset() { + buffer_.reset(); + row_group_stats_.clear(); + page_stats_.clear(); + } + + protected: + std::shared_ptr buffer_; + std::vector row_group_stats_; + std::vector page_stats_; + inline static const SizeStatistics kEmptyRowGroupStats{}; + inline static const PageSizeStatistics kEmptyPageStats{}; +}; + +TEST_F(SizeStatisticsRoundTripTest, EnableSizeStats) { + auto schema = ::arrow::schema({ + ::arrow::field("a", ::arrow::list(::arrow::list(::arrow::int32()))), + ::arrow::field("b", ::arrow::list(::arrow::list(::arrow::utf8()))), + }); + // First two rows are in one row group, and the other two rows are in another row group. + auto table = ::arrow::TableFromJSON(schema, {R"([ + [ [[1],[1,1],[1,1,1]], [["a"],["a","a"],["a","a","a"]] ], + [ [[0,1,null]], [["foo","bar",null]] ], + [ [], [] ], + [ [[],[null],null], [[],[null],null] ] + ])"}); + + for (auto size_stats_level : + {SizeStatisticsLevel::None, SizeStatisticsLevel::ColumnChunk, + SizeStatisticsLevel::PageAndColumnChunk}) { + WriteFile(size_stats_level, table); + ReadSizeStatistics(); + + if (size_stats_level == SizeStatisticsLevel::None) { + EXPECT_THAT(row_group_stats_, + ::testing::ElementsAre(kEmptyRowGroupStats, kEmptyRowGroupStats, + kEmptyRowGroupStats, kEmptyRowGroupStats)); + } else { + EXPECT_THAT(row_group_stats_, ::testing::ElementsAre( + SizeStatistics{/*def_levels=*/{0, 0, 0, 0, 1, 8}, + /*rep_levels=*/{2, 2, 5}, + /*byte_array_bytes=*/std::nullopt}, + SizeStatistics{/*def_levels=*/{0, 0, 0, 0, 1, 8}, + /*rep_levels=*/{2, 2, 5}, + /*byte_array_bytes=*/12}, + SizeStatistics{/*def_levels=*/{0, 1, 1, 1, 1, 0}, + /*rep_levels=*/{2, 2, 0}, + /*byte_array_bytes=*/std::nullopt}, + SizeStatistics{/*def_levels=*/{0, 1, 1, 1, 1, 0}, + /*rep_levels=*/{2, 2, 0}, + /*byte_array_bytes=*/0})); + } + + if (size_stats_level == SizeStatisticsLevel::PageAndColumnChunk) { + EXPECT_THAT( + page_stats_, + ::testing::ElementsAre( + PageSizeStatistics{/*def_levels=*/{0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 1, 2}, + /*rep_levels=*/{1, 2, 3, 1, 0, 2}, + /*byte_array_bytes=*/{}}, + PageSizeStatistics{/*def_levels=*/{0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 1, 2}, + /*rep_levels=*/{1, 2, 3, 1, 0, 2}, + /*byte_array_bytes=*/{6, 6}}, + PageSizeStatistics{/*def_levels=*/{0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0}, + /*rep_levels=*/{1, 0, 0, 1, 2, 0}, + /*byte_array_bytes=*/{}}, + PageSizeStatistics{/*def_levels=*/{0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0}, + /*rep_levels=*/{1, 0, 0, 1, 2, 0}, + /*byte_array_bytes=*/{0, 0}})); + } else { + EXPECT_THAT(page_stats_, ::testing::ElementsAre(kEmptyPageStats, kEmptyPageStats, + kEmptyPageStats, kEmptyPageStats)); + } + + Reset(); + } +} + +TEST_F(SizeStatisticsRoundTripTest, WriteDictionaryArray) { + auto schema = ::arrow::schema( + {::arrow::field("a", ::arrow::dictionary(::arrow::int16(), ::arrow::utf8()))}); + WriteFile( + SizeStatisticsLevel::PageAndColumnChunk, + ::arrow::TableFromJSON(schema, {R"([["aa"],["aaa"],[null],["a"],["aaa"],["a"]])"})); + + ReadSizeStatistics(); + EXPECT_THAT(row_group_stats_, + ::testing::ElementsAre(SizeStatistics{/*def_levels=*/{0, 2}, + /*rep_levels=*/{2}, + /*byte_array_bytes=*/5}, + SizeStatistics{/*def_levels=*/{1, 1}, + /*rep_levels=*/{2}, + /*byte_array_bytes=*/1}, + SizeStatistics{/*def_levels=*/{0, 2}, + /*rep_levels=*/{2}, + /*byte_array_bytes=*/4})); + EXPECT_THAT(page_stats_, + ::testing::ElementsAre(PageSizeStatistics{/*def_levels=*/{0, 2}, + /*rep_levels=*/{2}, + /*byte_array_bytes=*/{5}}, + PageSizeStatistics{/*def_levels=*/{1, 1}, + /*rep_levels=*/{2}, + /*byte_array_bytes=*/{1}}, + PageSizeStatistics{/*def_levels=*/{0, 2}, + /*rep_levels=*/{2}, + /*byte_array_bytes=*/{4}})); +} + +} // namespace parquet diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index e7bfd434c81a8..744af743118e2 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -43,6 +43,7 @@ #include "parquet/exception.h" #include "parquet/platform.h" #include "parquet/properties.h" +#include "parquet/size_statistics.h" #include "parquet/statistics.h" #include "parquet/types.h" @@ -254,6 +255,14 @@ static inline SortingColumn FromThrift(format::SortingColumn thrift_sorting_colu return sorting_column; } +static inline SizeStatistics FromThrift(const format::SizeStatistics& size_stats) { + return SizeStatistics{ + size_stats.definition_level_histogram, size_stats.repetition_level_histogram, + size_stats.__isset.unencoded_byte_array_data_bytes + ? std::make_optional(size_stats.unencoded_byte_array_data_bytes) + : std::nullopt}; +} + // ---------------------------------------------------------------------- // Convert Thrift enums from Parquet enums @@ -383,6 +392,17 @@ static inline format::EncryptionAlgorithm ToThrift(EncryptionAlgorithm encryptio return encryption_algorithm; } +static inline format::SizeStatistics ToThrift(const SizeStatistics& size_stats) { + format::SizeStatistics size_statistics; + size_statistics.__set_definition_level_histogram(size_stats.definition_level_histogram); + size_statistics.__set_repetition_level_histogram(size_stats.repetition_level_histogram); + if (size_stats.unencoded_byte_array_data_bytes.has_value()) { + size_statistics.__set_unencoded_byte_array_data_bytes( + size_stats.unencoded_byte_array_data_bytes.value()); + } + return size_statistics; +} + // ---------------------------------------------------------------------- // Thrift struct serialization / deserialization utilities diff --git a/cpp/src/parquet/type_fwd.h b/cpp/src/parquet/type_fwd.h index da0d0f7bdee96..cda0dc5a77e1f 100644 --- a/cpp/src/parquet/type_fwd.h +++ b/cpp/src/parquet/type_fwd.h @@ -68,7 +68,10 @@ struct ParquetVersion { }; }; +struct PageIndexLocation; + class FileMetaData; +class FileCryptoMetaData; class RowGroupMetaData; class ColumnDescriptor; @@ -82,10 +85,22 @@ class WriterPropertiesBuilder; class ArrowWriterProperties; class ArrowWriterPropertiesBuilder; +class EncodedStatistics; +class Statistics; +struct SizeStatistics; + +class ColumnIndex; +class OffsetIndex; + namespace arrow { class FileWriter; class FileReader; } // namespace arrow + +namespace schema { +class ColumnPath; +} // namespace schema + } // namespace parquet diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 5d8ccb861060b..53d2034600a7a 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -158,13 +158,13 @@ DEPENDENCIES=( "ARROW_NLOHMANN_JSON_URL nlohmann-json-${ARROW_NLOHMANN_JSON_BUILD_VERSION}.tar.gz https://github.com/nlohmann/json/archive/refs/tags/${ARROW_NLOHMANN_JSON_BUILD_VERSION}.tar.gz" "ARROW_OPENTELEMETRY_URL opentelemetry-cpp-${ARROW_OPENTELEMETRY_BUILD_VERSION}.tar.gz https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/${ARROW_OPENTELEMETRY_BUILD_VERSION}.tar.gz" "ARROW_OPENTELEMETRY_PROTO_URL opentelemetry-proto-${ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION}.tar.gz https://github.com/open-telemetry/opentelemetry-proto/archive/refs/tags/${ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION}.tar.gz" - "ARROW_ORC_URL orc-${ARROW_ORC_BUILD_VERSION}.tar.gz https://archive.apache.org/dist/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz" + "ARROW_ORC_URL orc-${ARROW_ORC_BUILD_VERSION}.tar.gz https://www.apache.org/dyn/closer.lua/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz?action=download" "ARROW_PROTOBUF_URL protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz https://github.com/google/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-all-${ARROW_PROTOBUF_BUILD_VERSION:1}.tar.gz" "ARROW_RAPIDJSON_URL rapidjson-${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz https://github.com/miloyip/rapidjson/archive/${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz" "ARROW_RE2_URL re2-${ARROW_RE2_BUILD_VERSION}.tar.gz https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz" "ARROW_S2N_TLS_URL s2n-${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz https://github.com/aws/s2n-tls/archive/${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz" "ARROW_SNAPPY_URL snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz" - "ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://dlcdn.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" + "ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download" "ARROW_UCX_URL ucx-${ARROW_UCX_BUILD_VERSION}.tar.gz https://github.com/openucx/ucx/archive/v${ARROW_UCX_BUILD_VERSION}.tar.gz" "ARROW_UTF8PROC_URL utf8proc-${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz" "ARROW_XSIMD_URL xsimd-${ARROW_XSIMD_BUILD_VERSION}.tar.gz https://github.com/xtensor-stack/xsimd/archive/${ARROW_XSIMD_BUILD_VERSION}.tar.gz" diff --git a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj index 6a5666d8f06b2..e8c387a1f3946 100644 --- a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj +++ b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj @@ -13,7 +13,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj index 2bf25ee756059..cd4d316452363 100644 --- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj +++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index 944a5add28d7b..20f659176882d 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -5,9 +5,9 @@ - + - + diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj index 8347a5fa110a8..175f25c5bcae7 100644 --- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj +++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 0d39f4456a90b..2c08e109dbfe5 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -22,8 +22,8 @@ all runtime; build; native; contentfiles; analyzers - - + + diff --git a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs index db8369fa618e9..d4e06a91faeff 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs @@ -57,14 +57,15 @@ public void Ctor_LeaveOpenTrue_StreamValidOnDispose() } [Theory] - [InlineData(true, 32153)] - [InlineData(false, 32154)] - public void CanWriteToNetworkStream(bool createDictionaryArray, int port) + [InlineData(true)] + [InlineData(false)] + public void CanWriteToNetworkStream(bool createDictionaryArray) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); - TcpListener listener = new TcpListener(IPAddress.Loopback, port); + TcpListener listener = new TcpListener(IPAddress.Loopback, 0); listener.Start(); + int port = ((IPEndPoint)listener.LocalEndpoint).Port; using (TcpClient sender = new TcpClient()) { @@ -92,14 +93,15 @@ public void CanWriteToNetworkStream(bool createDictionaryArray, int port) } [Theory] - [InlineData(true, 32155)] - [InlineData(false, 32156)] - public async Task CanWriteToNetworkStreamAsync(bool createDictionaryArray, int port) + [InlineData(true)] + [InlineData(false)] + public async Task CanWriteToNetworkStreamAsync(bool createDictionaryArray) { RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: createDictionaryArray); - TcpListener listener = new TcpListener(IPAddress.Loopback, port); + TcpListener listener = new TcpListener(IPAddress.Loopback, 0); listener.Start(); + int port = ((IPEndPoint)listener.LocalEndpoint).Port; using (TcpClient sender = new TcpClient()) { diff --git a/dev/README.md b/dev/README.md index d35dd231bbc42..c813aa6417833 100644 --- a/dev/README.md +++ b/dev/README.md @@ -51,8 +51,7 @@ you'll have to install Python dependencies yourself and then run The merge script requires tokens for access control. There are two options for configuring your tokens: environment variables or a configuration file. -> Note: Arrow only requires a GitHub token. Parquet can use GitHub or -JIRA tokens. +> Note: Arrow and Parquet only requires a GitHub token. #### Pass tokens via Environment Variables @@ -61,12 +60,6 @@ The merge script uses the GitHub REST API. You must set a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). You need to add `workflow` scope to the Personal Access Token. -You can specify the -[Personal Access Token](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html) -of your JIRA account in the -`APACHE_JIRA_TOKEN` environment variable. -If the variable is not set, the script will ask you for it. - #### Pass tokens via configuration file ``` diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 6dfc3a56a73c7..3aa6d8a0733ff 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -733,7 +733,8 @@ def _set_default(opt, default): @click.option('--with-csharp', type=bool, default=False, help='Include C# in integration tests') @click.option('--with-java', type=bool, default=False, - help='Include Java in integration tests') + help='Include Java in integration tests', + envvar="ARCHERY_INTEGRATION_WITH_JAVA") @click.option('--with-js', type=bool, default=False, help='Include JavaScript in integration tests') @click.option('--with-go', type=bool, default=False, diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index 55269835972ba..faf5c29744522 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -24,6 +24,7 @@ from ruamel.yaml import YAML from ..utils.command import Command, default_bin +from ..utils.logger import running_in_ci from ..utils.source import arrow_path from ..compat import _ensure_path @@ -168,6 +169,9 @@ def get(self, service_name): def __getitem__(self, service_name): return self.get(service_name) + def verbosity_args(self): + return ['--quiet'] if running_in_ci() else [] + class Docker(Command): @@ -233,7 +237,7 @@ def _execute_docker(self, *args, **kwargs): def pull(self, service_name, pull_leaf=True, ignore_pull_failures=True): def _pull(service): - args = ['pull', '--quiet'] + args = ['pull'] + self.config.verbosity_args() if service['image'] in self.pull_memory: return @@ -427,10 +431,11 @@ def run(self, service_name, command=None, *, env=None, volumes=None, def push(self, service_name, user=None, password=None): def _push(service): + args = ['push'] + self.config.verbosity_args() if self.config.using_docker: - return self._execute_docker('push', '--quiet', service['image']) + return self._execute_docker(*args, service['image']) else: - return self._execute_compose('push', '--quiet', service['name']) + return self._execute_compose(*args, service['name']) if user is not None: try: diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index 5dd4b1bccecbe..432d1c0a35202 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -217,6 +217,14 @@ def arrow_compose_path(tmpdir): return create_config(tmpdir, arrow_compose_yml, arrow_compose_env) +@pytest.fixture(autouse=True) +def no_ci_env_variables(monkeypatch): + """Make sure that the tests behave the same on CI as when run locally""" + monkeypatch.delenv("APPVEYOR", raising=False) + monkeypatch.delenv("BUILD_BUILDURI", raising=False) + monkeypatch.delenv("GITHUB_ACTIONS", raising=False) + + def test_config_validation(tmpdir): config_path = create_config(tmpdir, missing_service_compose_yml) msg = "`sub-foo` is defined in `x-hierarchy` bot not in `services`" @@ -270,7 +278,7 @@ def test_compose_default_params_and_env(arrow_compose_path): def test_forwarding_env_variables(arrow_compose_path): expected_calls = [ - "pull --quiet --ignore-pull-failures conda-cpp", + "pull --ignore-pull-failures conda-cpp", "build conda-cpp", ] expected_env = PartialEnv( @@ -286,38 +294,48 @@ def test_forwarding_env_variables(arrow_compose_path): compose.build('conda-cpp') -def test_compose_pull(arrow_compose_path): +def test_compose_pull(arrow_compose_path, monkeypatch): compose = DockerCompose(arrow_compose_path) expected_calls = [ - "pull --quiet --ignore-pull-failures conda-cpp", + "pull --ignore-pull-failures conda-cpp", ] with assert_compose_calls(compose, expected_calls): compose.clear_pull_memory() compose.pull('conda-cpp') expected_calls = [ - "pull --quiet --ignore-pull-failures conda-cpp", - "pull --quiet --ignore-pull-failures conda-python", - "pull --quiet --ignore-pull-failures conda-python-pandas" + "pull --ignore-pull-failures conda-cpp", + "pull --ignore-pull-failures conda-python", + "pull --ignore-pull-failures conda-python-pandas" ] with assert_compose_calls(compose, expected_calls): compose.clear_pull_memory() compose.pull('conda-python-pandas') expected_calls = [ - "pull --quiet --ignore-pull-failures conda-cpp", - "pull --quiet --ignore-pull-failures conda-python", + "pull --ignore-pull-failures conda-cpp", + "pull --ignore-pull-failures conda-python", ] with assert_compose_calls(compose, expected_calls): compose.clear_pull_memory() compose.pull('conda-python-pandas', pull_leaf=False) + with monkeypatch.context() as m: + # `--quiet` is passed to `docker` on CI + m.setenv("GITHUB_ACTIONS", "true") + expected_calls = [ + "pull --quiet --ignore-pull-failures conda-cpp", + ] + with assert_compose_calls(compose, expected_calls): + compose.clear_pull_memory() + compose.pull('conda-cpp') + def test_compose_pull_params(arrow_compose_path): expected_calls = [ - "pull --quiet --ignore-pull-failures conda-cpp", - "pull --quiet --ignore-pull-failures conda-python", + "pull --ignore-pull-failures conda-cpp", + "pull --ignore-pull-failures conda-python", ] compose = DockerCompose(arrow_compose_path, params=dict(UBUNTU='18.04')) expected_env = PartialEnv(PYTHON='3.8', PANDAS='latest') @@ -483,7 +501,7 @@ def test_compose_push(arrow_compose_path): for image in ["conda-cpp", "conda-python", "conda-python-pandas"]: expected_calls.append( mock.call(["docker", "compose", f"--file={compose.config.path}", - "push", "--quiet", image], check=True, env=expected_env) + "push", image], check=True, env=expected_env) ) with assert_subprocess_calls(expected_calls): compose.push('conda-python-pandas', user='user', password='pass') diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py index 8d207d3393730..cbc76a1825a0e 100644 --- a/dev/archery/archery/integration/tester_java.py +++ b/dev/archery/archery/integration/tester_java.py @@ -24,6 +24,7 @@ from . import cdata from .tester import Tester, CDataExporter, CDataImporter from .util import run_cmd, log +from ..utils.source import ARROW_ROOT_DEFAULT ARROW_BUILD_ROOT = os.environ.get( @@ -34,7 +35,7 @@ def load_version_from_pom(): import xml.etree.ElementTree as ET - tree = ET.parse(os.path.join(ARROW_BUILD_ROOT, 'java', 'pom.xml')) + tree = ET.parse(os.path.join(ARROW_ROOT_DEFAULT, 'java', 'pom.xml')) tag_pattern = '{http://maven.apache.org/POM/4.0.0}version' version_tag = list(tree.getroot().findall(tag_pattern))[0] return version_tag.text diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index d6eab45e1804c..bbaba2f648f29 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -25,7 +25,6 @@ from git import Repo from github import Github -from jira import JIRA from semver import VersionInfo as SemVer from ..utils.source import ArrowSources @@ -50,14 +49,6 @@ def __init__(self, released=False, release_date=None, **kwargs): def parse(cls, version, **kwargs): return cls(**SemVer.parse(version).to_dict(), **kwargs) - @classmethod - def from_jira(cls, jira_version): - return cls.parse( - jira_version.name, - released=jira_version.released, - release_date=getattr(jira_version, 'releaseDate', None) - ) - @classmethod def from_milestone(cls, milestone): return cls.parse( @@ -76,14 +67,6 @@ def __init__(self, key, type, summary, github_issue=None): self.github_issue_id = getattr(github_issue, "number", None) self._github_issue = github_issue - @classmethod - def from_jira(cls, jira_issue): - return cls( - key=jira_issue.key, - type=jira_issue.fields.issuetype.name, - summary=jira_issue.fields.summary - ) - @classmethod def from_github(cls, github_issue): return cls( @@ -117,15 +100,6 @@ def is_pr(self): return bool(self._github_issue and self._github_issue.pull_request) -class Jira(JIRA): - - def __init__(self, url='https://issues.apache.org/jira'): - super().__init__(url) - - def issue(self, key): - return Issue.from_jira(super().issue(key)) - - class IssueTracker: def __init__(self, github_token=None): @@ -401,10 +375,6 @@ def commits(self): commit_range = f"{lower}..{upper}" return list(map(Commit, self.repo.iter_commits(commit_range))) - @cached_property - def jira_instance(self): - return Jira() - @cached_property def default_branch(self): default_branch_name = os.getenv("ARCHERY_DEFAULT_BRANCH") @@ -459,20 +429,12 @@ def curate(self, minimal=False): else: outside.append( (self.issue_tracker.issue(int(c.issue_id)), c)) - elif c.project == 'ARROW': - if c.issue in release_issues: - within.append((release_issues[c.issue], c)) - else: - outside.append((self.jira_instance.issue(c.issue), c)) - elif c.project == 'PARQUET': - parquet.append((self.jira_instance.issue(c.issue), c)) else: warnings.warn( - f'Issue {c.issue} does not pertain to GH' + - ', ARROW or PARQUET') + f'Issue {c.issue} does not pertain to GH') outside.append((c.issue, c)) - # remaining jira tickets + # remaining tickets within_keys = {i.key for i, c in within} # Take into account that some issues milestoned are prs nopatch = [issue for key, issue in release_issues.items() @@ -488,12 +450,10 @@ def changelog(self): # get organized report for the release curation = self.curate() - # jira tickets having patches in the release + # issues having patches in the release issue_commit_pairs.extend(curation.within) - # parquet patches in the release - issue_commit_pairs.extend(curation.parquet) - # jira tickets without patches + # issues without patches for issue in curation.nopatch: issue_commit_pairs.append((issue, None)) @@ -576,7 +536,7 @@ def cherry_pick_commits(self, recreate_branch=True): logger.info(f"Checking out branch {self.branch}") self.repo.git.checkout(self.branch) - # cherry pick the commits based on the jira tickets + # cherry pick the commits based on the GH issue for commit in self.commits_to_pick(): logger.info(f"Cherry-picking commit {commit.hexsha}") self.repo.git.cherry_pick(commit.hexsha) diff --git a/dev/archery/archery/release/tests/test_release.py b/dev/archery/archery/release/tests/test_release.py index 22b43c7cb3bc4..fae2bdcea04a0 100644 --- a/dev/archery/archery/release/tests/test_release.py +++ b/dev/archery/archery/release/tests/test_release.py @@ -21,7 +21,6 @@ Release, MajorRelease, MinorRelease, PatchRelease, IssueTracker, Version, Issue, CommitTitle, Commit ) -from archery.testing import DotDict # subset of issues per revision @@ -141,22 +140,6 @@ def test_issue(fake_issue_tracker): assert i.project == "PARQUET" assert i.number == 1111 - fake_jira_issue = DotDict({ - 'key': 'ARROW-2222', - 'fields': { - 'issuetype': { - 'name': 'Feature' - }, - 'summary': 'Issue title' - } - }) - i = Issue.from_jira(fake_jira_issue) - assert i.key == "ARROW-2222" - assert i.type == "Feature" - assert i.summary == "Issue title" - assert i.project == "ARROW" - assert i.number == 2222 - def test_commit_title(): t = CommitTitle.parse( diff --git a/dev/archery/archery/templates/release_changelog.md.j2 b/dev/archery/archery/templates/release_changelog.md.j2 index 0eedb217a8b84..9fa9a1476af6f 100644 --- a/dev/archery/archery/templates/release_changelog.md.j2 +++ b/dev/archery/archery/templates/release_changelog.md.j2 @@ -23,11 +23,7 @@ ## {{ category }} {% for issue, commit in issue_commit_pairs -%} -{% if issue.project in ('ARROW', 'PARQUET') -%} -* [{{ issue.key }}](https://issues.apache.org/jira/browse/{{ issue.key }}) - {{ commit.title.to_string(with_issue=False) if commit else issue.summary | md }} -{% else -%} * [GH-{{ issue.key }}](https://github.com/apache/arrow/issues/{{ issue.key }}) - {{ commit.title.to_string(with_issue=False) if commit else issue.summary | md }} -{% endif -%} {% endfor %} {% endfor %} diff --git a/dev/archery/archery/testing.py b/dev/archery/archery/testing.py index 471a54d4c72cf..3b1061ac85fa4 100644 --- a/dev/archery/archery/testing.py +++ b/dev/archery/archery/testing.py @@ -21,19 +21,6 @@ import re -class DotDict(dict): - - def __getattr__(self, key): - try: - item = self[key] - except KeyError: - raise AttributeError(key) - if isinstance(item, dict): - return DotDict(item) - else: - return item - - class PartialEnv(dict): def __eq__(self, other): diff --git a/dev/archery/archery/utils/logger.py b/dev/archery/archery/utils/logger.py index b315a52b7a000..4ab119ea7d951 100644 --- a/dev/archery/archery/utils/logger.py +++ b/dev/archery/archery/utils/logger.py @@ -30,7 +30,23 @@ def __init__(self, quiet=False): ctx = LoggingContext() -in_github_actions = (os.environ.get("GITHUB_ACTIONS") == "true") + +# Note: detection routines for many CI services can be found +# in https://github.com/semantic-release/env-ci +def in_appveyor(): + return os.environ.get("APPVEYOR", "").lower() == "true" + + +def in_azure_pipelines(): + return os.environ.get("BUILD_BUILDURI", "") != "" + + +def in_github_actions(): + return os.environ.get("GITHUB_ACTIONS") == "true" + + +def running_in_ci(): + return in_appveyor() or in_azure_pipelines() or in_github_actions() @contextlib.contextmanager @@ -43,10 +59,10 @@ def group(name, output=None): if output is None: def output(message): print(message, flush=True) - if in_github_actions: + if in_github_actions(): output(f"::group::{name}") try: yield finally: - if in_github_actions: + if in_github_actions(): output("::endgroup::") diff --git a/dev/archery/setup.py b/dev/archery/setup.py index f1e0df6231436..6587e61546b5a 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -39,9 +39,9 @@ 'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8==6.1.0', 'cython-lint', 'cmake_format==0.6.13', 'sphinx-lint==0.9.1'], 'numpydoc': ['numpydoc==1.1.0'], - 'release': ['pygithub', jinja_req, 'jira', 'semver', 'gitpython'], + 'release': ['pygithub', jinja_req, 'semver', 'gitpython'], } -extras['bot'] = extras['crossbow'] + ['pygithub', 'jira'] +extras['bot'] = extras['crossbow'] + ['pygithub'] extras['all'] = list(set(functools.reduce(operator.add, extras.values()))) setup( diff --git a/dev/merge.conf.sample b/dev/merge.conf.sample index c50ef85d70f3e..db436f31eac48 100644 --- a/dev/merge.conf.sample +++ b/dev/merge.conf.sample @@ -18,10 +18,6 @@ # Configuration for the merge_arrow_pr.py tool # Install a copy of this file at ~/.config/arrow/merge.conf -[jira] -# issues.apache.org Jira personal access token -token=abc123 - [github] # GitHub's personal access token. "workflow" scope is needed. api_token=ghp_ABC diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 6694d2373b8f1..fe1dc1e79290e 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -30,7 +30,6 @@ # variables. # # Configuration environment variables: -# - APACHE_JIRA_TOKEN: your Apache JIRA Personal Access Token # - ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests # - ARROW_GITHUB_ORG: the GitHub organisation ('apache' by default) # - DEBUG: use for testing to avoid pushing to apache (0 by default) @@ -44,15 +43,6 @@ import requests import getpass -try: - import jira.client - import jira.exceptions -except ImportError: - print("Could not find jira library. " - "Run 'pip install jira' to install.") - print("Exiting without trying to close the associated JIRA.") - sys.exit(1) - # Remote name which points to the GitHub site ORG_NAME = ( os.environ.get("ARROW_GITHUB_ORG") or @@ -68,9 +58,6 @@ print("**************** DEBUGGING ****************") -JIRA_API_BASE = "https://issues.apache.org/jira" - - def get_json(url, headers=None): response = requests.get(url, headers=headers) if response.status_code != 200: @@ -135,92 +122,6 @@ def fix_version_from_branch(versions): ) -class JiraIssue(object): - - def __init__(self, jira_con, jira_id, project, cmd): - self.jira_con = jira_con - self.jira_id = jira_id - self.project = project - self.cmd = cmd - - try: - self.issue = jira_con.issue(jira_id) - except Exception as e: - self.cmd.fail("ASF JIRA could not find %s\n%s" % (jira_id, e)) - - @property - def current_fix_versions(self): - return self.issue.fields.fixVersions - - @property - def current_versions(self): - # Only suggest versions starting with a number, like 0.x but not JS-0.x - all_versions = self.jira_con.project_versions(self.project) - unreleased_versions = [x for x in all_versions - if not x.raw['released']] - - mainline_versions = self._filter_mainline_versions(unreleased_versions) - return mainline_versions - - def _filter_mainline_versions(self, versions): - if self.project == 'PARQUET': - mainline_regex = re.compile(r'cpp-\d.*') - else: - mainline_regex = re.compile(r'\d.*') - - return [x for x in versions if mainline_regex.match(x.name)] - - def resolve(self, fix_version, comment, *args): - fields = self.issue.fields - cur_status = fields.status.name - - if cur_status == "Resolved" or cur_status == "Closed": - self.cmd.fail("JIRA issue %s already has status '%s'" - % (self.jira_id, cur_status)) - - resolve = [x for x in self.jira_con.transitions(self.jira_id) - if x['name'] == "Resolve Issue"][0] - - # ARROW-6915: do not overwrite existing fix versions corresponding to - # point releases - fix_versions = [v.raw for v in self.jira_con.project_versions( - self.project) if v.name == fix_version] - fix_version_names = set(x['name'] for x in fix_versions) - for version in self.current_fix_versions: - major, minor, patch = version.name.split('.') - if patch != '0' and version.name not in fix_version_names: - fix_versions.append(version.raw) - - if DEBUG: - print("JIRA issue %s untouched -> %s" % - (self.jira_id, [v["name"] for v in fix_versions])) - else: - self.jira_con.transition_issue(self.jira_id, resolve["id"], - comment=comment, - fixVersions=fix_versions) - print("Successfully resolved %s!" % (self.jira_id)) - - self.issue = self.jira_con.issue(self.jira_id) - self.show() - - def show(self): - fields = self.issue.fields - print(format_issue_output("jira", self.jira_id, fields.status.name, - fields.summary, fields.assignee, - fields.components)) - - def github_issue_id(self): - try: - last_jira_comment = self.issue.fields.comment.comments[-1].body - except Exception: - # If no comment found or other issues ignore - return None - matches = MIGRATION_COMMENT_REGEX.search(last_jira_comment) - if matches: - values = matches.groupdict() - return "GH-" + values['issue_id'] - - class GitHubIssue(object): def __init__(self, github_api, github_id, cmd): @@ -328,12 +229,11 @@ def format_issue_output(issue_type, issue_id, status, else: components = ', '.join((getattr(x, "name", x) for x in components)) - if issue_type == "jira": - url = '/'.join((JIRA_API_BASE, 'browse', issue_id)) - else: - url = ( - f'https://github.com/{ORG_NAME}/{PROJECT_NAME}/issues/{issue_id}' - ) + url_id = issue_id + if "GH" in issue_id: + url_id = issue_id.replace("GH-", "") + + url = f'https://github.com/{ORG_NAME}/{PROJECT_NAME}/issues/{url_id}' return """=== {} {} === Summary\t\t{} @@ -476,19 +376,11 @@ def continue_maybe(self, prompt): class PullRequest(object): GITHUB_PR_TITLE_PATTERN = re.compile(r'^GH-([0-9]+)\b.*$') - # We can merge PARQUET patches from JIRA or GH prefixed issues - JIRA_SUPPORTED_PROJECTS = ['PARQUET'] - JIRA_PR_TITLE_REGEXEN = [ - (project, re.compile(r'^(' + project + r'-[0-9]+)\b.*$')) - for project in JIRA_SUPPORTED_PROJECTS - ] - JIRA_UNSUPPORTED_ARROW = re.compile(r'^(ARROW-[0-9]+)\b.*$') - - def __init__(self, cmd, github_api, git_remote, jira_con, number): + + def __init__(self, cmd, github_api, git_remote, number): self.cmd = cmd self._github_api = github_api self.git_remote = git_remote - self.con = jira_con self.number = number self._pr_data = github_api.get_pr_data(number) try: @@ -536,28 +428,8 @@ def _get_issue(self): github_id = m.group(1) return GitHubIssue(self._github_api, github_id, self.cmd) - m = self.JIRA_UNSUPPORTED_ARROW.search(self.title) - if m: - old_jira_id = m.group(1) - jira_issue = JiraIssue(self.con, old_jira_id, 'ARROW', self.cmd) - self.cmd.fail("PR titles with ARROW- prefixed tickets on JIRA " - "are unsupported, update the PR title from " - f"{old_jira_id}. Possible GitHub id could be: " - f"{jira_issue.github_issue_id()}") - - for project, regex in self.JIRA_PR_TITLE_REGEXEN: - m = regex.search(self.title) - if m: - jira_id = m.group(1) - return JiraIssue(self.con, jira_id, project, self.cmd) - - options = ' or '.join( - '{0}-XXX'.format(project) - for project in self.JIRA_SUPPORTED_PROJECTS + ["GH"] - ) - self.cmd.fail("PR title should be prefixed by a GitHub ID or a " - "Jira ID, like: {0}, but found {1}".format( - options, self.title)) + self.cmd.fail("PR title should be prefixed by a GitHub ID, like: " + "GH-XXX, but found {0}".format(self.title)) def merge(self): """ @@ -706,31 +578,6 @@ def load_configuration(): return config -def get_credentials(cmd): - token = None - - config = load_configuration() - if "jira" in config.sections(): - token = config["jira"].get("token") - - # Fallback to environment variables - if not token: - token = os.environ.get("APACHE_JIRA_TOKEN") - - # Fallback to user tty prompt - if not token: - token = cmd.prompt("Env APACHE_JIRA_TOKEN not set, " - "please enter your Jira API token " - "(Jira personal access token):") - - return token - - -def connect_jira(cmd): - return jira.client.JIRA(options={'server': JIRA_API_BASE}, - token_auth=get_credentials(cmd)) - - def get_pr_num(): if len(sys.argv) == 2: return sys.argv[1] @@ -752,9 +599,7 @@ def cli(): os.chdir(ARROW_HOME) github_api = GitHubAPI(PROJECT_NAME, cmd) - - jira_con = connect_jira(cmd) - pr = PullRequest(cmd, github_api, ORG_NAME, jira_con, pr_num) + pr = PullRequest(cmd, github_api, ORG_NAME, pr_num) if pr.is_merged: print("Pull request %s has already been merged" % pr_num) diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index a6e655cd36b59..2081d7ab9d9b1 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -311,34 +311,6 @@ def test_version_pre_tag ] end - Dir.glob("java/**/pom.xml") do |path| - version = "#{@snapshot_version}" - lines = File.readlines(path, chomp: true) - target_lines = lines.grep(/#{Regexp.escape(version)}/) - hunks = [] - target_lines.each do |line| - new_line = line.gsub(@snapshot_version) do - @release_version - end - hunks << [ - "-#{line}", - "+#{new_line}", - ] - end - tag = "main" - target_lines = lines.grep(/#{Regexp.escape(tag)}/) - target_lines.each do |line| - new_line = line.gsub("main") do - "apache-arrow-#{@release_version}" - end - hunks << [ - "-#{line}", - "+#{new_line}", - ] - end - expected_changes << {hunks: hunks, path: path} - end - Dir.glob("ruby/**/version.rb") do |path| version = " VERSION = \"#{@snapshot_version}\"" new_version = " VERSION = \"#{@release_version}\"" diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb index 3dec19326f92b..95c1a4f448389 100644 --- a/dev/release/02-source-test.rb +++ b/dev/release/02-source-test.rb @@ -42,7 +42,7 @@ def source(*targets) targets.each do |target| env["SOURCE_#{target}"] = "1" end - sh(env, @tarball_script, @release_version, "0") + sh(env, @tarball_script, @release_version, "0") output = sh(env, @script, @release_version, "0") sh("tar", "xf", @archive_name) output @@ -143,13 +143,13 @@ def test_vote #{@current_commit} [2] The source release rc0 is hosted at [3]. -The binary artifacts are hosted at [4][5][6][7][8][9][10][11]. -The changelog is located at [12]. +The binary artifacts are hosted at [4][5][6][7][8][9][10]. +The changelog is located at [11]. Please download, verify checksums and signatures, run the unit tests, -and vote on the release. See [13] for how to validate a release candidate. +and vote on the release. See [12] for how to validate a release candidate. -See also a verification result on GitHub pull request [14]. +See also a verification result on GitHub pull request [13]. The vote will be open for at least 72 hours. @@ -164,13 +164,12 @@ def test_vote [5]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/ [6]: https://apache.jfrog.io/artifactory/arrow/centos-rc/ [7]: https://apache.jfrog.io/artifactory/arrow/debian-rc/ -[8]: https://apache.jfrog.io/artifactory/arrow/java-rc/#{@release_version}-rc0 -[9]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{@release_version}-rc0 -[10]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0 -[11]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ -[12]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md -[13]: https://arrow.apache.org/docs/developers/release_verification.html -[14]: #{verify_pr_url || "null"} +[8]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/#{@release_version}-rc0 +[9]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0 +[10]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ +[11]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md +[12]: https://arrow.apache.org/docs/developers/release_verification.html +[13]: #{verify_pr_url || "null"} VOTE end end diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh index cc3f5b7cc5251..94fa6a814a61d 100755 --- a/dev/release/02-source.sh +++ b/dev/release/02-source.sh @@ -59,7 +59,7 @@ echo "Using commit $release_hash" tarball=apache-arrow-${version}.tar.gz if [ ${SOURCE_DOWNLOAD} -gt 0 ]; then - # Wait for the release candidate workflow to finish before attempting + # Wait for the release candidate workflow to finish before attempting # to download the tarball from the GitHub Release. . $SOURCE_DIR/utils-watch-gh-workflow.sh ${tag} "release_candidate.yml" rm -f ${tarball} @@ -163,13 +163,13 @@ This release candidate is based on commit: ${release_hash} [2] The source release rc${rc} is hosted at [3]. -The binary artifacts are hosted at [4][5][6][7][8][9][10][11]. -The changelog is located at [12]. +The binary artifacts are hosted at [4][5][6][7][8][9][10]. +The changelog is located at [11]. Please download, verify checksums and signatures, run the unit tests, -and vote on the release. See [13] for how to validate a release candidate. +and vote on the release. See [12] for how to validate a release candidate. -See also a verification result on GitHub pull request [14]. +See also a verification result on GitHub pull request [13]. The vote will be open for at least 72 hours. @@ -184,13 +184,12 @@ The vote will be open for at least 72 hours. [5]: https://apache.jfrog.io/artifactory/arrow/amazon-linux-rc/ [6]: https://apache.jfrog.io/artifactory/arrow/centos-rc/ [7]: https://apache.jfrog.io/artifactory/arrow/debian-rc/ -[8]: https://apache.jfrog.io/artifactory/arrow/java-rc/${version}-rc${rc} -[9]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/${version}-rc${rc} -[10]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc} -[11]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ -[12]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md -[13]: https://arrow.apache.org/docs/developers/release_verification.html -[14]: ${verify_pr_url} +[8]: https://apache.jfrog.io/artifactory/arrow/nuget-rc/${version}-rc${rc} +[9]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc} +[10]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/ +[11]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md +[12]: https://arrow.apache.org/docs/developers/release_verification.html +[13]: ${verify_pr_url} MAIL echo "---------------------------------------------------------" fi diff --git a/dev/release/06-java-upload.sh b/dev/release/06-java-upload.sh deleted file mode 100755 index d0fd851da5767..0000000000000 --- a/dev/release/06-java-upload.sh +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e -set -u -set -o pipefail - -SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -if [ $# -ne 2 ]; then - echo "Usage: $0 " - exit -fi - -version=$1 -rc=$2 - -: ${UPLOAD_DEFAULT=1} -: ${UPLOAD_FORCE_SIGN=${UPLOAD_DEFAULT}} - -if [ ${UPLOAD_FORCE_SIGN} -gt 0 ]; then - pushd "${SOURCE_DIR}" - if [ ! -f .env ]; then - echo "You must create $(pwd)/.env" - echo "You can use $(pwd)/.env.example as template" - exit 1 - fi - . .env - popd -fi - -version_with_rc="${version}-rc${rc}" -crossbow_job_prefix="release-${version_with_rc}" -crossbow_package_dir="${SOURCE_DIR}/../../packages" - -: ${CROSSBOW_JOB_NUMBER:="0"} -: ${CROSSBOW_JOB_ID:="${crossbow_job_prefix}-${CROSSBOW_JOB_NUMBER}"} -: ${ARROW_ARTIFACTS_DIR:="${crossbow_package_dir}/${CROSSBOW_JOB_ID}/java-jars"} - -if [ ! -e "${ARROW_ARTIFACTS_DIR}" ]; then - echo "${ARROW_ARTIFACTS_DIR} does not exist" - exit 1 -fi - -if [ ! -d "${ARROW_ARTIFACTS_DIR}" ]; then - echo "${ARROW_ARTIFACTS_DIR} is not a directory" - exit 1 -fi - -pushd "${ARROW_ARTIFACTS_DIR}" - -files= -types= -classifiers= - -sign() { - local path="$1" - local classifier="$2" - local type=$(echo "${path}" | grep -o "[^.]*$") - - local asc_path="${path}.asc" - if [ ${UPLOAD_FORCE_SIGN} -gt 0 ]; then - rm -f "${asc_path}" - gpg \ - --detach-sig \ - --local-user "${GPG_KEY_ID}" \ - --output "${asc_path}" \ - "${path}" - fi - if [ -n "${files}" ]; then - files="${files}," - types="${types}," - classifiers="${classifiers}," - fi - files="${files}${asc_path}" - types="${types}${type}.asc" - classifiers="${classifiers}${classifier}" - - # .md5 and .sha1 are generated automatically on repository side. - # local sha512_path="${path}.sha512" - # shasum --algorithm 512 "${path}" > "${sha512_path}" - # files="${files},${sha512_path}" - # types="${types},${type}.sha512" - # classifiers="${classifiers},${classifier}" -} - -for pom in *.pom; do - base=$(basename ${pom} .pom) - files="" - types="" - classifiers="" - args=() - args+=(deploy:deploy-file) - args+=(-DrepositoryId=apache.releases.https) - args+=(-DretryFailedDeploymentCount=10) - args+=(-Durl=https://repository.apache.org/service/local/staging/deploy/maven2) - pom="${PWD}/${pom}" - args+=(-DpomFile="${pom}") - if [ -f "${base}.jar" ]; then - jar="${PWD}/${base}.jar" - args+=(-Dfile="${jar}") - sign "${jar}" "" - else - args+=(-Dfile="${pom}") - fi - sign "${pom}" "" - if [ "$(echo ${base}-*)" != "${base}-*" ]; then - for other_file in ${base}-*; do - file="${PWD}/${other_file}" - type=$(echo "${other_file}" | grep -o "[^.]*$") - case "${type}" in - asc|sha256|sha512) - continue - ;; - esac - classifier=$(basename "${other_file}" ".${type}" | sed -e "s/${base}-//g") - files="${files},${file}" - types="${types},${type}" - classifiers="${classifiers},${classifier}" - sign "${file}" "${classifier}" - done - fi - args+=(-Dfiles="${files}") - args+=(-Dtypes="${types}") - args+=(-Dclassifiers="${classifiers}") - pushd "${SOURCE_DIR}" - mvn "${args[@]}" - popd -done - -popd - -echo "Success!" -echo "Press the 'Close' button manually by Web interface:" -echo " https://repository.apache.org/#stagingRepositories" -echo "It publishes the artifacts to the staging repository:" -echo " https://repository.apache.org/content/repositories/staging/org/apache/arrow/" diff --git a/dev/release/07-matlab-upload.sh b/dev/release/06-matlab-upload.sh similarity index 100% rename from dev/release/07-matlab-upload.sh rename to dev/release/06-matlab-upload.sh diff --git a/dev/release/08-binary-verify.sh b/dev/release/07-binary-verify.sh similarity index 100% rename from dev/release/08-binary-verify.sh rename to dev/release/07-binary-verify.sh diff --git a/dev/release/download_rc_binaries.py b/dev/release/download_rc_binaries.py index 788d1df0ab3eb..473f95ae37e2e 100755 --- a/dev/release/download_rc_binaries.py +++ b/dev/release/download_rc_binaries.py @@ -121,17 +121,29 @@ def _download_url(self, url, dest_path, *, extra_args=None): dest_path, url, ] - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = proc.communicate() - if proc.returncode != 0: - try: - # Don't leave possibly partial file around - os.remove(dest_path) - except IOError: - pass - raise Exception(f"Downloading {url} failed\n" - f"stdout: {stdout}\nstderr: {stderr}") + # Retry subprocess in case it fails with OpenSSL Connection errors + # https://issues.apache.org/jira/browse/INFRA-25274 + for attempt in range(5): + if attempt > 0: + delay = attempt * 3 + print(f"Waiting {delay} seconds before retrying {url}") + time.sleep(delay) + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + if proc.returncode != 0: + try: + # Don't leave possibly partial file around + os.remove(dest_path) + except IOError: + pass + if "OpenSSL" not in stderr: + # We assume curl has already retried on other errors. + break + else: + return + raise Exception(f"Downloading {url} failed\n" + f"stdout: {stdout}\nstderr: {stderr}") def _curl_version(self): cmd = ["curl", "--version"] diff --git a/dev/release/post-04-website.sh b/dev/release/post-04-website.sh index a2b0bd61525eb..dc5c7b58af00e 100755 --- a/dev/release/post-04-website.sh +++ b/dev/release/post-04-website.sh @@ -274,7 +274,6 @@ current: mirrors: 'https://www.apache.org/dyn/closer.lua/arrow/arrow-${version}/' tarball-name: 'apache-arrow-${version}.tar.gz' tarball-url: 'https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/arrow-${version}/apache-arrow-${version}.tar.gz' - java-artifacts: 'https://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%22${version}%22' asc: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.asc' sha256: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha256' sha512: '${apache_download_url}/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha512' diff --git a/dev/release/post-10-docs.sh b/dev/release/post-10-docs.sh index 70c54c555f839..fdf37ce1a757a 100755 --- a/dev/release/post-10-docs.sh +++ b/dev/release/post-10-docs.sh @@ -64,9 +64,6 @@ done versioned_paths+=("docs/dev/") rm -rf docs/dev/ if [ "$is_major_release" = "yes" ] ; then - # copy the current stable docs to temporary directory - # (remove java reference to reduce size) - rm -rf docs/java/reference/ cp -r docs/ docs_temp/ fi # delete current stable docs and restore all previous versioned docs diff --git a/dev/release/post-12-bump-versions-test.rb b/dev/release/post-12-bump-versions-test.rb index 3fac1819d722c..229dc7e1e1b08 100644 --- a/dev/release/post-12-bump-versions-test.rb +++ b/dev/release/post-12-bump-versions-test.rb @@ -42,6 +42,23 @@ def bump_type (data || {})[:bump_type] end + def released_version + return @release_version if bump_type.nil? + + previous_version_components = @previous_version.split(".") + case bump_type + when :minor + previous_version_components[1].succ! + when :patch + previous_version_components[2].succ! + end + previous_version_components.join(".") + end + + def released_compatible_version + compute_compatible_version(released_version) + end + def bump_versions(*targets) if targets.last.is_a?(Hash) additional_env = targets.pop @@ -55,159 +72,70 @@ def bump_versions(*targets) env = env.merge(additional_env) case bump_type when :minor, :patch - previous_version_components = @previous_version.split(".") - case bump_type - when :minor - previous_version_components[1].succ! - when :patch - previous_version_components[2].succ! - end sh(env, "dev/release/post-12-bump-versions.sh", - previous_version_components.join("."), + released_version, @release_version) else sh(env, "dev/release/post-12-bump-versions.sh", - @release_version, + released_version, @next_version) end end - data(:next_release_type, [:major, :minor, :patch]) + data("X.0.0 -> X.0.1", {next_release_type: :patch}) + data("X.0.0 -> X.1.0", {next_release_type: :minor}) + data("X.0.0 -> ${X+1}.0.0", {next_release_type: :major}) + data("X.0.1 -> ${X+1}.0.0", {bump_type: :patch}) + data("X.1.0 -> ${X+1}.0.0", {bump_type: :minor}) def test_version_post_tag omit_on_release_branch - expected_changes = [ - { - path: "c_glib/meson.build", - hunks: [ - ["-version = '#{@snapshot_version}'", - "+version = '#{@next_snapshot_version}'"], - ], - }, - { - path: "c_glib/vcpkg.json", - hunks: [ - ["- \"version-string\": \"#{@snapshot_version}\",", - "+ \"version-string\": \"#{@next_snapshot_version}\","], - ], - }, - { - path: "ci/scripts/PKGBUILD", - hunks: [ - ["-pkgver=#{@previous_version}.9000", - "+pkgver=#{@release_version}.9000"], - ], - }, - { - path: "cpp/CMakeLists.txt", - hunks: [ - ["-set(ARROW_VERSION \"#{@snapshot_version}\")", - "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"], - ], - }, - { - path: "cpp/vcpkg.json", - hunks: [ - ["- \"version-string\": \"#{@snapshot_version}\",", - "+ \"version-string\": \"#{@next_snapshot_version}\","], - ], - }, - { - path: "csharp/Directory.Build.props", - hunks: [ - ["- #{@snapshot_version}", - "+ #{@next_snapshot_version}"], - ], - }, - { - path: "dev/tasks/homebrew-formulae/apache-arrow-glib.rb", - hunks: [ - ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"", - "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""], - ], - }, - { - path: "dev/tasks/homebrew-formulae/apache-arrow.rb", - hunks: [ - ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"", - "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""], - ], - }, - ] - unless next_release_type == :patch - expected_changes += [ + case bump_type + when :patch, :minor + expected_changes = [ { - path: "docs/source/_static/versions.json", + path: "ci/scripts/PKGBUILD", hunks: [ - [ - "- \"name\": \"#{@release_compatible_version} (dev)\",", - "+ \"name\": \"#{@next_compatible_version} (dev)\",", - "- \"name\": \"#{@previous_compatible_version} (stable)\",", - "+ \"name\": \"#{@release_compatible_version} (stable)\",", - "+ {", - "+ \"name\": \"#{@previous_compatible_version}\",", - "+ \"version\": \"#{@previous_compatible_version}/\",", - "+ \"url\": \"https://arrow.apache.org/docs/#{@previous_compatible_version}/\"", - "+ },", - ], + ["-pkgver=#{@previous_version}.9000", + "+pkgver=#{released_version}.9000"], ], }, ] - end - expected_changes += [ - { - path: "js/package.json", - hunks: [ - ["- \"version\": \"#{@snapshot_version}\"", - "+ \"version\": \"#{@next_snapshot_version}\""], - ], - }, - { - path: "matlab/CMakeLists.txt", - hunks: [ - ["-set(MLARROW_VERSION \"#{@snapshot_version}\")", - "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"], - ], - }, - { - path: "python/CMakeLists.txt", - hunks: [ - ["-set(PYARROW_VERSION \"#{@snapshot_version}\")", - "+set(PYARROW_VERSION \"#{@next_snapshot_version}\")"], - ], - }, - { - path: "python/pyproject.toml", - hunks: [ - ["-fallback_version = '#{@release_version}a0'", - "+fallback_version = '#{@next_version}a0'"], - ], - }, - { - path: "r/DESCRIPTION", - hunks: [ - ["-Version: #{@previous_version}.9000", - "+Version: #{@release_version}.9000"], - ], - }, - { - path: "r/NEWS.md", - hunks: [ - ["-# arrow #{@previous_version}.9000", - "+# arrow #{@release_version}.9000", - "+", - "+# arrow #{@release_version}",], - ], - }, - ] - if next_release_type == :major + if bump_type == :minor + expected_changes += [ + { + path: "docs/source/_static/versions.json", + hunks: [ + [ + "- \"name\": \"#{@previous_compatible_version} (stable)\",", + "+ \"name\": \"#{released_compatible_version} (stable)\",", + "+ {", + "+ \"name\": \"#{@previous_compatible_version}\",", + "+ \"version\": \"#{@previous_compatible_version}/\",", + "+ \"url\": \"https://arrow.apache.org/docs/#{@previous_compatible_version}/\"", + "+ },", + ], + ], + }, + ] + end expected_changes += [ { - path: "c_glib/tool/generate-version-header.py", + path: "r/DESCRIPTION", hunks: [ - ["+ (#{@next_major_version}, 0),"], + ["-Version: #{@previous_version}.9000", + "+Version: #{released_version}.9000"], + ], + }, + { + path: "r/NEWS.md", + hunks: [ + ["-# arrow #{@previous_version}.9000", + "+# arrow #{released_version}.9000", + "+", + "+# arrow #{released_version}",], ], }, { @@ -216,9 +144,8 @@ def test_version_post_tag [ "-

#{@previous_version}.9000 (dev)

", "-

#{@previous_r_version} (release)

", - "+

#{@release_version}.9000 (dev)

", - "+

#{@release_version} (release)

", - "+

#{@previous_r_version}

", + "+

#{released_version}.9000 (dev)

", + "+

#{released_version} (release)

", ], ], }, @@ -227,73 +154,214 @@ def test_version_post_tag hunks: [ [ "- \"name\": \"#{@previous_version}.9000 (dev)\",", - "+ \"name\": \"#{@release_version}.9000 (dev)\",", + "+ \"name\": \"#{released_version}.9000 (dev)\",", "- \"name\": \"#{@previous_r_version} (release)\",", - "+ \"name\": \"#{@release_version} (release)\",", - "+ {", - "+ \"name\": \"#{@previous_r_version}\",", - "+ \"version\": \"#{@previous_compatible_version}/\"", - "+ },", + "+ \"name\": \"#{released_version} (release)\",", ], ], }, ] else - expected_changes += [ + expected_changes = [ { - path: "r/pkgdown/assets/versions.html", + path: "c_glib/meson.build", hunks: [ - [ - "-

#{@previous_version}.9000 (dev)

", - "-

#{@previous_r_version} (release)

", - "+

#{@release_version}.9000 (dev)

", - "+

#{@release_version} (release)

", - ], + ["-version = '#{@snapshot_version}'", + "+version = '#{@next_snapshot_version}'"], ], }, { - path: "r/pkgdown/assets/versions.json", + path: "c_glib/vcpkg.json", hunks: [ - [ - "- \"name\": \"#{@previous_version}.9000 (dev)\",", - "+ \"name\": \"#{@release_version}.9000 (dev)\",", - "- \"name\": \"#{@previous_r_version} (release)\",", - "+ \"name\": \"#{@release_version} (release)\",", + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@next_snapshot_version}\","], + ], + }, + { + path: "ci/scripts/PKGBUILD", + hunks: [ + ["-pkgver=#{@previous_version}.9000", + "+pkgver=#{@release_version}.9000"], + ], + }, + { + path: "cpp/CMakeLists.txt", + hunks: [ + ["-set(ARROW_VERSION \"#{@snapshot_version}\")", + "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"], + ], + }, + { + path: "cpp/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@next_snapshot_version}\","], + ], + }, + { + path: "csharp/Directory.Build.props", + hunks: [ + ["- #{@snapshot_version}", + "+ #{@next_snapshot_version}"], + ], + }, + { + path: "dev/tasks/homebrew-formulae/apache-arrow-glib.rb", + hunks: [ + ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"", + "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""], + ], + }, + { + path: "dev/tasks/homebrew-formulae/apache-arrow.rb", + hunks: [ + ["- url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@snapshot_version}/apache-arrow-#{@snapshot_version}.tar.gz\"", + "+ url \"https://www.apache.org/dyn/closer.lua?path=arrow/arrow-#{@next_snapshot_version}/apache-arrow-#{@next_snapshot_version}.tar.gz\""], + ], + }, + ] + unless next_release_type == :patch + expected_changes += [ + { + path: "docs/source/_static/versions.json", + hunks: [ + [ + "- \"name\": \"#{@release_compatible_version} (dev)\",", + "+ \"name\": \"#{@next_compatible_version} (dev)\",", + "- \"name\": \"#{@previous_compatible_version} (stable)\",", + "+ \"name\": \"#{@release_compatible_version} (stable)\",", + "+ {", + "+ \"name\": \"#{@previous_compatible_version}\",", + "+ \"version\": \"#{@previous_compatible_version}/\",", + "+ \"url\": \"https://arrow.apache.org/docs/#{@previous_compatible_version}/\"", + "+ },", + ], ], + }, + ] + end + expected_changes += [ + { + path: "js/package.json", + hunks: [ + ["- \"version\": \"#{@snapshot_version}\"", + "+ \"version\": \"#{@next_snapshot_version}\""], + ], + }, + { + path: "matlab/CMakeLists.txt", + hunks: [ + ["-set(MLARROW_VERSION \"#{@snapshot_version}\")", + "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"], + ], + }, + { + path: "python/CMakeLists.txt", + hunks: [ + ["-set(PYARROW_VERSION \"#{@snapshot_version}\")", + "+set(PYARROW_VERSION \"#{@next_snapshot_version}\")"], + ], + }, + { + path: "python/pyproject.toml", + hunks: [ + ["-fallback_version = '#{@release_version}a0'", + "+fallback_version = '#{@next_version}a0'"], + ], + }, + { + path: "r/DESCRIPTION", + hunks: [ + ["-Version: #{@previous_version}.9000", + "+Version: #{@release_version}.9000"], + ], + }, + { + path: "r/NEWS.md", + hunks: [ + ["-# arrow #{@previous_version}.9000", + "+# arrow #{@release_version}.9000", + "+", + "+# arrow #{@release_version}",], ], }, ] - end - - Dir.glob("java/**/pom.xml") do |path| - version = "#{@snapshot_version}" - lines = File.readlines(path, chomp: true) - target_lines = lines.grep(/#{Regexp.escape(version)}/) - hunks = [] - target_lines.each do |line| - new_line = line.gsub(@snapshot_version) do - @next_snapshot_version - end - hunks << [ - "-#{line}", - "+#{new_line}", + if next_release_type == :major + expected_changes += [ + { + path: "c_glib/tool/generate-version-header.py", + hunks: [ + ["+ (#{@next_major_version}, 0),"], + ], + }, + { + path: "r/pkgdown/assets/versions.html", + hunks: [ + [ + "-

#{@previous_version}.9000 (dev)

", + "-

#{@previous_r_version} (release)

", + "+

#{@release_version}.9000 (dev)

", + "+

#{@release_version} (release)

", + "+

#{@previous_r_version}

", + ], + ], + }, + { + path: "r/pkgdown/assets/versions.json", + hunks: [ + [ + "- \"name\": \"#{@previous_version}.9000 (dev)\",", + "+ \"name\": \"#{@release_version}.9000 (dev)\",", + "- \"name\": \"#{@previous_r_version} (release)\",", + "+ \"name\": \"#{@release_version} (release)\",", + "+ {", + "+ \"name\": \"#{@previous_r_version}\",", + "+ \"version\": \"#{@previous_compatible_version}/\"", + "+ },", + ], + ], + }, + ] + else + expected_changes += [ + { + path: "r/pkgdown/assets/versions.html", + hunks: [ + [ + "-

#{@previous_version}.9000 (dev)

", + "-

#{@previous_r_version} (release)

", + "+

#{@release_version}.9000 (dev)

", + "+

#{@release_version} (release)

", + ], + ], + }, + { + path: "r/pkgdown/assets/versions.json", + hunks: [ + [ + "- \"name\": \"#{@previous_version}.9000 (dev)\",", + "+ \"name\": \"#{@release_version}.9000 (dev)\",", + "- \"name\": \"#{@previous_r_version} (release)\",", + "+ \"name\": \"#{@release_version} (release)\",", + ], + ], + }, ] end - expected_changes << {hunks: hunks, path: path} - end - Dir.glob("ruby/**/version.rb") do |path| - version = " VERSION = \"#{@snapshot_version}\"" - new_version = " VERSION = \"#{@next_snapshot_version}\"" - expected_changes << { - hunks: [ - [ - "-#{version}", - "+#{new_version}", - ] - ], - path: path, - } + Dir.glob("ruby/**/version.rb") do |path| + version = " VERSION = \"#{@snapshot_version}\"" + new_version = " VERSION = \"#{@next_snapshot_version}\"" + expected_changes << { + hunks: [ + [ + "-#{version}", + "+#{new_version}", + ] + ], + path: path, + } + end end stdout = bump_versions("VERSION_POST_TAG") diff --git a/dev/release/setup-rhel-rebuilds.sh b/dev/release/setup-rhel-rebuilds.sh index e8861a19f35b7..ab68a69bcaf7d 100755 --- a/dev/release/setup-rhel-rebuilds.sh +++ b/dev/release/setup-rhel-rebuilds.sh @@ -35,11 +35,9 @@ dnf -y install \ cmake \ git \ gobject-introspection-devel \ - java-11-openjdk-devel \ libcurl-devel \ llvm-devel \ llvm-toolset \ - maven \ ncurses-devel \ ninja-build \ nodejs \ @@ -55,5 +53,3 @@ npm install -g yarn python3 -m ensurepip --upgrade alternatives --set python /usr/bin/python3 -alternatives --set java java-11-openjdk.$(uname -i) -alternatives --set javac java-11-openjdk.$(uname -i) diff --git a/dev/release/setup-ubuntu.sh b/dev/release/setup-ubuntu.sh index ef9d3dde5c1f9..686507d6257a3 100755 --- a/dev/release/setup-ubuntu.sh +++ b/dev/release/setup-ubuntu.sh @@ -22,27 +22,20 @@ set -exu -codename=$(. /etc/os-release && echo ${UBUNTU_CODENAME}) +version=$(. /etc/os-release && echo ${VERSION_ID}) -case ${codename} in - *) - nlohmann_json=3 - python=3 - apt-get update -y -q - apt-get install -y -q --no-install-recommends \ - llvm-dev - ;; -esac +apt-get update -y -q -case ${codename} in - focal) - ;; - *) - apt-get update -y -q - apt-get install -y -q --no-install-recommends \ - libxsimd-dev - ;; -esac +if [ ${version} \> "20.04" ]; then + apt-get install -y -q --no-install-recommends \ + libxsimd-dev +fi + +if [ ${version} \> "22.04" ]; then + # Some tests rely on legacy timezone aliases such as "US/Pacific" + apt-get install -y -q --no-install-recommends \ + tzdata-legacy +fi apt-get install -y -q --no-install-recommends \ build-essential \ @@ -57,13 +50,11 @@ apt-get install -y -q --no-install-recommends \ libglib2.0-dev \ libsqlite3-dev \ libssl-dev \ - maven \ ninja-build \ - nlohmann-json${nlohmann_json}-dev \ - openjdk-11-jdk \ + nlohmann-json3-dev \ pkg-config \ - python${python}-dev \ - python${python}-venv \ + python3-dev \ + python3-venv \ python3-pip \ ruby-dev \ tzdata \ diff --git a/dev/release/test-helper.rb b/dev/release/test-helper.rb index 82400bae2793b..9cb8315c2f4c1 100644 --- a/dev/release/test-helper.rb +++ b/dev/release/test-helper.rb @@ -103,6 +103,10 @@ def next_release_type (data || {})[:next_release_type] || :major end + def compute_compatible_version(version) + version.split(".")[0, 2].join(".") + end + def detect_versions top_dir = Pathname(__dir__).parent.parent cpp_cmake_lists = top_dir + "cpp" + "CMakeLists.txt" @@ -123,7 +127,7 @@ def detect_versions raise "unknown release type: #{release_type.inspect}" end @release_version = release_version_components.join(".") - @release_compatible_version = @release_version.split(".")[0, 2].join(".") + @release_compatible_version = compute_compatible_version(@release_version) @so_version = compute_so_version(@release_version) next_version_components = @release_version.split(".") case next_release_type @@ -141,13 +145,13 @@ def detect_versions end @next_version = next_version_components.join(".") @next_major_version = @next_version.split(".")[0] - @next_compatible_version = @next_version.split(".")[0, 2].join(".") + @next_compatible_version = compute_compatible_version(@next_version) @next_snapshot_version = "#{@next_version}-SNAPSHOT" @next_so_version = compute_so_version(@next_version) r_description = top_dir + "r" + "DESCRIPTION" @previous_version = r_description.read[/^Version: (.+?)\.9000$/, 1] if @previous_version - @previous_compatible_version = @previous_version.split(".")[0, 2].join(".") + @previous_compatible_version = compute_compatible_version(@previous_version) else @previous_compatible_version = nil end diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index a4c136acdf6e6..36b34e4353943 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -43,9 +43,11 @@ update_versions() { rm -f meson.build.bak git add meson.build - # Add a new version entry only when the next release is a new major release - if [ "${type}" = "snapshot" -a \ - "${next_version}" = "${major_version}.0.0" ]; then + # Add a new version entry only when the next release is a new major + # release and it doesn't exist yet. + if [ "${type}" = "snapshot" ] && \ + [ "${next_version}" = "${major_version}.0.0" ] && \ + ! grep -q -F "(${major_version}, 0)" tool/generate-version-header.py; then sed -i.bak -E -e \ "s/^ALL_VERSIONS = \[$/&\\n (${major_version}, 0),/" \ tool/generate-version-header.py @@ -82,17 +84,6 @@ update_versions() { git add vcpkg.json popd - pushd "${ARROW_DIR}/java" - mvn versions:set -DnewVersion=${version} -DprocessAllModules -DgenerateBackupPoms=false - if [ "${type}" = "release" ]; then - # versions-maven-plugin:set-scm-tag does not update the whole reactor. Invoking separately - mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-java-root - mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-bom - fi - git add "pom.xml" - git add "**/pom.xml" - popd - pushd "${ARROW_DIR}/csharp" sed -i.bak -E -e \ "s/^ .+<\/Version>/ ${version}<\/Version>/" \ diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 0305e22843841..52977bc4c0149 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -20,8 +20,6 @@ # Requirements # - Ruby >= 2.3 -# - Maven >= 3.8.7 -# - JDK >= 11 # - gcc >= 4.8 # - Node.js >= 18 # - Go >= 1.22 @@ -439,46 +437,6 @@ maybe_setup_conda() { fi } -install_maven() { - MAVEN_VERSION=3.8.7 - if command -v mvn > /dev/null; then - # --batch-mode is for disabling output color. - SYSTEM_MAVEN_VERSION=$(mvn --batch-mode -v | head -n 1 | awk '{print $3}') - show_info "Found Maven version ${SYSTEM_MAVEN_VERSION} at $(command -v mvn)." - else - SYSTEM_MAVEN_VERSION=0.0.0 - show_info "Maven installation not found." - fi - - if [[ "$MAVEN_VERSION" == "$SYSTEM_MAVEN_VERSION" ]]; then - show_info "System Maven version ${SYSTEM_MAVEN_VERSION} matches required Maven version ${MAVEN_VERSION}. Skipping installation." - else - # Append pipe character to make preview release versions like "X.Y.Z-beta-1" sort - # as older than their corresponding release version "X.Y.Z". This works because - # `sort -V` orders the pipe character lower than any version number character. - older_version=$(printf '%s\n%s\n' "$SYSTEM_MAVEN_VERSION" "$MAVEN_VERSION" | sed 's/$/|/' | sort -V | sed 's/|$//' | head -n1) - if [[ "$older_version" == "$SYSTEM_MAVEN_VERSION" ]]; then - show_info "Installing Maven version ${MAVEN_VERSION}..." - APACHE_MIRROR="https://www.apache.org/dyn/closer.lua?action=download&filename=" - curl -sL -o apache-maven-${MAVEN_VERSION}-bin.tar.gz \ - ${APACHE_MIRROR}/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz - tar xzf apache-maven-${MAVEN_VERSION}-bin.tar.gz - export PATH=$(pwd)/apache-maven-${MAVEN_VERSION}/bin:$PATH - # --batch-mode is for disabling output color. - show_info "Installed Maven version $(mvn --batch-mode -v | head -n 1 | awk '{print $3}')" - else - show_info "System Maven version ${SYSTEM_MAVEN_VERSION} is newer than minimum version ${MAVEN_VERSION}. Skipping installation." - fi - fi -} - -maybe_setup_maven() { - show_info "Ensuring that Maven is installed..." - if [ "${USE_CONDA}" -eq 0 ]; then - install_maven - fi -} - maybe_setup_virtualenv() { # Optionally setup pip virtualenv with the passed dependencies local env="venv-${VENV_ENV:-source}" @@ -535,66 +493,6 @@ maybe_setup_nodejs() { fi } -test_package_java() { - show_header "Build and test Java libraries" - - maybe_setup_maven - maybe_setup_conda maven openjdk - - pushd java - - if [ ${TEST_INTEGRATION_JAVA} -gt 0 ]; then - # Build JNI for C data interface - local -a cmake_options=() - # Enable only C data interface. - cmake_options+=(-DARROW_JAVA_JNI_ENABLE_C=ON) - cmake_options+=(-DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF) - # Disable Testing because GTest might not be present. - cmake_options+=(-DBUILD_TESTING=OFF) - if [ ! -z "${CMAKE_GENERATOR}" ]; then - cmake_options+=(-G "${CMAKE_GENERATOR}") - fi - local build_dir="${ARROW_TMPDIR}/java-jni-build" - local install_dir="${ARROW_TMPDIR}/java-jni-install" - local dist_dir="${ARROW_TMPDIR}/java-jni-dist" - cmake \ - -S . \ - -B "${build_dir}" \ - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-release} \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX="${install_dir}" \ - -DCMAKE_PREFIX_PATH="${ARROW_HOME}" \ - "${cmake_options[@]}" - cmake --build "${build_dir}" - cmake --install "${build_dir}" - - local normalized_arch=$(arch) - case ${normalized_arch} in - aarch64|arm64) - normalized_arch=aarch_64 - ;; - i386) - normalized_arch=x86_64 - ;; - esac - rm -fr ${dist_dir} - mkdir -p ${dist_dir} - mv ${install_dir}/lib/* ${dist_dir} - mvn install \ - -Darrow.c.jni.dist.dir=${dist_dir} \ - -Parrow-c-data - fi - - if [ ${TEST_JAVA} -gt 0 ]; then - mvn test - fi - - # Build jars - mvn package - - popd -} - test_and_install_cpp() { show_header "Build, install and test C++ libraries" @@ -893,13 +791,9 @@ test_integration() { maybe_setup_virtualenv pip install -e dev/archery[integration] - pip install -e dev/archery[integration-java] - JAVA_DIR=$ARROW_SOURCE_DIR/java CPP_BUILD_DIR=$ARROW_TMPDIR/cpp-build - files=( $JAVA_DIR/tools/target/arrow-tools-*-jar-with-dependencies.jar ) - export ARROW_JAVA_INTEGRATION_JAR=${files[0]} export ARROW_CPP_EXE_PATH=$CPP_BUILD_DIR/release INTEGRATION_TEST_ARGS="" @@ -911,7 +805,6 @@ test_integration() { LD_LIBRARY_PATH=$ARROW_CPP_EXE_PATH:$LD_LIBRARY_PATH archery integration \ --run-ipc --run-flight --run-c-data \ --with-cpp=${TEST_INTEGRATION_CPP} \ - --with-java=${TEST_INTEGRATION_JAVA} \ --with-js=${TEST_INTEGRATION_JS} \ $INTEGRATION_TEST_ARGS } @@ -1009,9 +902,6 @@ test_source_distribution() { if [ ${TEST_RUBY} -gt 0 ]; then test_ruby fi - if [ ${BUILD_JAVA} -gt 0 ]; then - test_package_java - fi if [ ${TEST_INTEGRATION} -gt 0 ]; then test_integration fi @@ -1032,9 +922,6 @@ test_binary_distribution() { if [ ${TEST_WHEELS} -gt 0 ]; then test_wheels fi - if [ ${TEST_JARS} -gt 0 ]; then - test_jars - fi } test_linux_wheels() { @@ -1170,30 +1057,6 @@ test_wheels() { popd } -test_jars() { - show_header "Testing Java JNI jars" - - maybe_setup_maven - maybe_setup_conda maven python - - local download_dir=${ARROW_TMPDIR}/jars - mkdir -p ${download_dir} - - ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \ - --dest=${download_dir} \ - --package_type=jars - - verify_dir_artifact_signatures ${download_dir} - - # TODO: This should be replaced with real verification by ARROW-15486. - # https://issues.apache.org/jira/browse/ARROW-15486 - # [Release][Java] Verify staged maven artifacts - if [ ! -d "${download_dir}/arrow-memory/${VERSION}" ]; then - echo "Artifacts for ${VERSION} isn't uploaded yet." - return 1 - fi -} - # By default test all functionalities. # To deactivate one test, deactivate the test and all of its dependents # To explicitly select one test, set TEST_DEFAULT=0 TEST_X=1 @@ -1206,12 +1069,10 @@ test_jars() { # Binary verification tasks : ${TEST_APT:=${TEST_BINARIES}} : ${TEST_BINARY:=${TEST_BINARIES}} -: ${TEST_JARS:=${TEST_BINARIES}} : ${TEST_WHEELS:=${TEST_BINARIES}} : ${TEST_YUM:=${TEST_BINARIES}} # Source verification tasks -: ${TEST_JAVA:=${TEST_SOURCE}} : ${TEST_CPP:=${TEST_SOURCE}} : ${TEST_CSHARP:=${TEST_SOURCE}} : ${TEST_GLIB:=${TEST_SOURCE}} @@ -1222,15 +1083,13 @@ test_jars() { # For selective Integration testing, set TEST_DEFAULT=0 TEST_INTEGRATION_X=1 TEST_INTEGRATION_Y=1 : ${TEST_INTEGRATION_CPP:=${TEST_INTEGRATION}} -: ${TEST_INTEGRATION_JAVA:=${TEST_INTEGRATION}} : ${TEST_INTEGRATION_JS:=${TEST_INTEGRATION}} # Automatically build/test if its activated by a dependent TEST_GLIB=$((${TEST_GLIB} + ${TEST_RUBY})) BUILD_CPP=$((${TEST_CPP} + ${TEST_GLIB} + ${TEST_PYTHON} + ${TEST_INTEGRATION_CPP})) -BUILD_JAVA=$((${TEST_JAVA} + ${TEST_INTEGRATION_JAVA})) BUILD_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS})) -TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS})) +TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JS})) # Execute tests in a conda environment : ${USE_CONDA:=0} diff --git a/dev/requirements_merge_arrow_pr.txt b/dev/requirements_merge_arrow_pr.txt index 99909e434a580..f2293605cf1b0 100644 --- a/dev/requirements_merge_arrow_pr.txt +++ b/dev/requirements_merge_arrow_pr.txt @@ -1,2 +1 @@ -jira requests diff --git a/dev/tasks/fuzz-tests/github.oss-fuzz.yml b/dev/tasks/fuzz-tests/github.oss-fuzz.yml index d7cf516266831..e591499b0ef0a 100644 --- a/dev/tasks/fuzz-tests/github.oss-fuzz.yml +++ b/dev/tasks/fuzz-tests/github.oss-fuzz.yml @@ -33,6 +33,12 @@ jobs: run: | git clone --depth=50 https://github.com/google/oss-fuzz.git + - uses: actions/setup-python@v5 + # Use a Python version that's compatible with the pinned requirements + # for dependencies below. + with: + python-version: '3.11' + - name: Install dependencies working-directory: oss-fuzz run: | diff --git a/dev/tasks/java-jars/README.md b/dev/tasks/java-jars/README.md deleted file mode 100644 index 216c7198d3239..0000000000000 --- a/dev/tasks/java-jars/README.md +++ /dev/null @@ -1,29 +0,0 @@ - - -# Java Jars Task - -This directory is responsible to generate the jar files for the Arrow components that depend on C++ shared libraries to execute. - -The Arrow C++ libraries are compiled both on macOS and Linux distributions, with their dependencies linked statically, and they are added -in the jars at the end, so the file can be used on both systems. - -## Linux Docker Image -To compile the C++ libraries in Linux, a docker image is used. -It is created used the **ci/docker/java-bundled-jars.dockerfile** file. -If it is necessary to add any new dependency, you need to change that file. \ No newline at end of file diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml deleted file mode 100644 index affa26a1d9332..0000000000000 --- a/dev/tasks/java-jars/github.yml +++ /dev/null @@ -1,278 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -{% import 'macros.jinja' as macros with context %} - -{{ macros.github_header() }} - -permissions: - packages: write - -jobs: - - build-cpp-ubuntu: - {% set arch = '${{ matrix.platform.arch }}' %} - name: Build C++ libraries Ubuntu {{ arch }} - runs-on: {{ '${{ matrix.platform.runs_on }}' }} - env: - # architecture name used for archery build - ARCH: {{ '${{ matrix.platform.archery_arch }}' }} - ARCH_ALIAS: {{ '${{ matrix.platform.archery_arch_alias }}' }} - ARCH_SHORT: {{ '${{ matrix.platform.archery_arch_short }}' }} - strategy: - fail-fast: false - matrix: - platform: - - runs_on: ["ubuntu-latest"] - arch: "x86_64" - archery_arch: "amd64" - archery_arch_alias: "x86_64" - archery_arch_short: "amd64" - - runs_on: ["self-hosted", "Linux", "arm64"] - arch: "aarch_64" - archery_arch: "arm64v8" - archery_arch_alias: "aarch64" - archery_arch_short: "arm64" - steps: - {{ macros.github_checkout_arrow()|indent }} - {{ macros.github_free_space()|indent }} - {{ macros.github_install_archery()|indent }} - - name: Build C++ libraries - env: - {{ macros.github_set_sccache_envvars()|indent(8) }} - GITHUB_TOKEN: {{ '${{ secrets.GITHUB_TOKEN }}' }} - run: | - if [ "${ARCH}" = "arm64v8" ]; then - # We can't use NuGet on manylinux2014_aarch64 because Mono is old. - : - else - export VCPKG_BINARY_SOURCES="clear;nuget,GitHub,readwrite" - fi - archery docker run \ - -e ARROW_JAVA_BUILD=OFF \ - -e ARROW_JAVA_TEST=OFF \ - java-jni-manylinux-2014 - - name: Compress into single artifact to keep directory structure - run: tar -cvzf arrow-shared-libs-linux-{{ arch }}.tar.gz arrow/java-dist/ - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: ubuntu-shared-lib-{{ arch }} - path: arrow-shared-libs-linux-{{ arch }}.tar.gz - {% if arrow.is_default_branch() %} - {{ macros.github_login_dockerhub()|indent }} - - name: Push Docker image - shell: bash - run: archery docker push java-jni-manylinux-2014 - {% endif %} - - build-cpp-macos: - {% set arch = '${{ matrix.platform.arch }}' %} - name: Build C++ libraries macOS {{ arch }} - runs-on: {{ '${{ matrix.platform.runs_on }}' }} - strategy: - fail-fast: false - matrix: - platform: - - { runs_on: ["macos-13"], arch: "x86_64"} - - { runs_on: ["macos-14"], arch: "aarch_64" } - env: - MACOSX_DEPLOYMENT_TARGET: "14.0" - steps: - {{ macros.github_checkout_arrow()|indent }} - - name: Set up Python - uses: actions/setup-python@v4 - with: - cache: 'pip' - python-version: 3.12 - - name: Install Archery - shell: bash - run: pip install -e arrow/dev/archery[all] - - name: Install dependencies - run: | - # We want to use llvm@14 to avoid shared z3 - # dependency. llvm@14 doesn't depend on z3 and llvm depends - # on z3. And Homebrew's z3 provides only shared library. It - # doesn't provides static z3 because z3's CMake doesn't accept - # building both shared and static libraries at once. - # See also: Z3_BUILD_LIBZ3_SHARED in - # https://github.com/Z3Prover/z3/blob/master/README-CMake.md - # - # If llvm is installed, Apache Arrow C++ uses llvm rather than - # llvm@14 because llvm is newer than llvm@14. - brew uninstall llvm || : - - # Ensure updating python@XXX with the "--overwrite" option. - # If python@XXX is updated without "--overwrite", it causes - # a conflict error. Because Python 3 installed not by - # Homebrew exists in /usr/local on GitHub Actions. If - # Homebrew's python@XXX is updated without "--overwrite", it - # tries to replace /usr/local/bin/2to3 and so on and causes - # a conflict error. - brew update - for python_package in $(brew list | grep python@); do - brew install --overwrite ${python_package} - done - brew install --overwrite python - - if [ "$(uname -m)" = "arm64" ]; then - # pkg-config formula is deprecated but it's still installed - # in GitHub Actions runner now. We can remove this once - # pkg-config formula is removed from GitHub Actions runner. - brew uninstall pkg-config || : - brew uninstall pkg-config@0.29.2 || : - fi - - brew bundle --file=arrow/cpp/Brewfile - # We want to link aws-sdk-cpp statically but Homebrew's - # aws-sdk-cpp provides only shared library. If we have - # Homebrew's aws-sdk-cpp, our build mix Homebrew's - # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's - # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. - brew uninstall aws-sdk-cpp - # We want to use bundled RE2 for static linking. If - # Homebrew's RE2 is installed, its header file may be used. - # We uninstall Homebrew's RE2 to ensure using bundled RE2. - brew uninstall grpc || : # gRPC depends on RE2 - brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too - brew uninstall re2 - # We want to use bundled Protobuf for static linking. If - # Homebrew's Protobuf is installed, its library file may be - # used on test We uninstall Homebrew's Protobuf to ensure using - # bundled Protobuf. - brew uninstall protobuf - - brew bundle --file=arrow/java/Brewfile - - name: Build C++ libraries - env: - {{ macros.github_set_sccache_envvars()|indent(8) }} - run: | - set -e - # make brew Java available to CMake - export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home - arrow/ci/scripts/java_jni_macos_build.sh \ - $GITHUB_WORKSPACE/arrow \ - $GITHUB_WORKSPACE/arrow/cpp-build \ - $GITHUB_WORKSPACE/arrow/java-dist - - name: Compress into single artifact to keep directory structure - run: tar -cvzf arrow-shared-libs-macos-{{ arch }}.tar.gz arrow/java-dist/ - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: macos-shared-lib-{{ arch }} - path: arrow-shared-libs-macos-{{ arch }}.tar.gz - - build-cpp-windows: - name: Build C++ libraries Windows - runs-on: windows-2019 - steps: - {{ macros.github_checkout_arrow()|indent }} - - name: Set up Java - uses: actions/setup-java@v3 - with: - java-version: '11' - distribution: 'temurin' - - name: Download Timezone Database - shell: bash - run: arrow/ci/scripts/download_tz_database.sh - - name: Install sccache - shell: bash - run: arrow/ci/scripts/install_sccache.sh pc-windows-msvc $(pwd)/sccache - - name: Build C++ libraries - shell: cmd - env: - {{ macros.github_set_sccache_envvars()|indent(8) }} - run: | - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 - REM For ORC - set TZDIR=/c/msys64/usr/share/zoneinfo - bash -c "arrow/ci/scripts/java_jni_windows_build.sh $(pwd)/arrow $(pwd)/arrow/cpp-build $(pwd)/arrow/java-dist" - - name: Compress into single artifact to keep directory structure - shell: bash - run: tar -cvzf arrow-shared-libs-windows.tar.gz arrow/java-dist/ - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: windows-shared-lib - path: arrow-shared-libs-windows.tar.gz - - package-jars: - name: Build jar files - runs-on: macos-latest - needs: - - build-cpp-ubuntu - - build-cpp-macos - - build-cpp-windows - steps: - {{ macros.github_checkout_arrow(fetch_depth=0)|indent }} - - name: Download Libraries - uses: actions/download-artifact@v4 - with: - path: artifacts - - name: Decompress artifacts - run: | - mv artifacts/*/*.tar.gz . - tar -xvzf arrow-shared-libs-linux-x86_64.tar.gz - tar -xvzf arrow-shared-libs-linux-aarch_64.tar.gz - tar -xvzf arrow-shared-libs-macos-x86_64.tar.gz - tar -xvzf arrow-shared-libs-macos-aarch_64.tar.gz - tar -xvzf arrow-shared-libs-windows.tar.gz - - name: Test that shared libraries exist - run: | - set -x - - test -f arrow/java-dist/arrow_cdata_jni/x86_64/libarrow_cdata_jni.so - test -f arrow/java-dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.so - test -f arrow/java-dist/arrow_orc_jni/x86_64/libarrow_orc_jni.so - test -f arrow/java-dist/gandiva_jni/x86_64/libgandiva_jni.so - - test -f arrow/java-dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.so - test -f arrow/java-dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.so - test -f arrow/java-dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.so - test -f arrow/java-dist/gandiva_jni/aarch_64/libgandiva_jni.so - - test -f arrow/java-dist/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib - test -f arrow/java-dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib - test -f arrow/java-dist/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib - test -f arrow/java-dist/gandiva_jni/x86_64/libgandiva_jni.dylib - - test -f arrow/java-dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib - test -f arrow/java-dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib - test -f arrow/java-dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib - test -f arrow/java-dist/gandiva_jni/aarch_64/libgandiva_jni.dylib - - test -f arrow/java-dist/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll - test -f arrow/java-dist/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll - test -f arrow/java-dist/arrow_orc_jni/x86_64/arrow_orc_jni.dll - - name: Build bundled jar - env: - MAVEN_ARGS: >- - --no-transfer-progress - run: | - set -e - pushd arrow/java - mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} - mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f bom - popd - arrow/ci/scripts/java_full_build.sh \ - $GITHUB_WORKSPACE/arrow \ - $GITHUB_WORKSPACE/arrow/java-dist - {{ macros.github_upload_releases(["arrow/java-dist/*.jar", - "arrow/java-dist/*.json", - "arrow/java-dist/*.pom", - "arrow/java-dist/*.xml", - "arrow/java-dist/*.zip"])|indent }} diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml index f083b7c0c8f61..ec5b9b31da8e1 100644 --- a/dev/tasks/python-wheels/github.linux.yml +++ b/dev/tasks/python-wheels/github.linux.yml @@ -50,6 +50,15 @@ jobs: {{ macros.github_install_archery()|indent }} {{ macros.github_login_dockerhub()|indent }} + - name: Prepare + run: | + if [ "${PYTHON_ABI_TAG}" = "cp313t" ]; then + test_image_prefix=python-free-threaded + else + test_image_prefix=python + fi + echo "TEST_IMAGE_PREFIX=${test_image_prefix}" >> ${GITHUB_ENV} + - name: Build wheel shell: bash env: @@ -72,23 +81,11 @@ jobs: # TODO(kszucs): auditwheel show - name: Test wheel - if: | - '{{ python_abi_tag }}' != 'cp313t' shell: bash run: | source arrow/ci/scripts/util_enable_core_dumps.sh - archery docker run python-wheel-manylinux-test-imports - archery docker run python-wheel-manylinux-test-unittests - - # Free-threaded wheels need to be tested using a different Docker Compose service - - name: Test free-threaded wheel - if: | - '{{ python_abi_tag }}' == 'cp313t' - shell: bash - run: | - source arrow/ci/scripts/util_enable_core_dumps.sh - archery docker run python-free-threaded-wheel-manylinux-test-imports - archery docker run python-free-threaded-wheel-manylinux-test-unittests + archery docker run ${TEST_IMAGE_PREFIX}-wheel-manylinux-test-imports + archery docker run ${TEST_IMAGE_PREFIX}-wheel-manylinux-test-unittests - name: Test wheel on AlmaLinux 8 shell: bash @@ -136,14 +133,29 @@ jobs: -e TEST_WHEELS=1 \ ubuntu-verify-rc + - name: Test wheel on Ubuntu 24.04 + shell: bash + if: | + '{{ python_version }}' == '3.12' + env: + UBUNTU: "24.04" + run: | + archery docker run \ + -e TEST_DEFAULT=0 \ + -e TEST_PYARROW_VERSION={{ arrow.no_rc_version }} \ + -e TEST_PYTHON_VERSIONS={{ python_version }} \ + -e TEST_WHEEL_PLATFORM_TAGS={{ wheel_platform_tag }} \ + -e TEST_WHEELS=1 \ + ubuntu-verify-rc + {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }} {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }} {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }} {% if arrow.is_default_branch() %} - - name: Push Docker Image + - name: Push Docker images shell: bash run: | archery docker push python-wheel-manylinux-{{ manylinux_version }} - archery docker push python-wheel-manylinux-test-unittests + archery docker push ${TEST_IMAGE_PREFIX}-wheel-manylinux-test-unittests {% endif %} diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml index 1799bd6ad6b6f..031bad94227e8 100644 --- a/dev/tasks/python-wheels/github.osx.yml +++ b/dev/tasks/python-wheels/github.osx.yml @@ -89,6 +89,7 @@ jobs: --x-feature=flight \ --x-feature=gcs \ --x-feature=json \ + --x-feature=orc \ --x-feature=parquet \ --x-feature=s3 diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 839e3d5341070..181e978569104 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -40,7 +40,7 @@ jobs: - uses: r-lib/actions/setup-r@v2 with: - install-r: false + install-r: true - name: Build R source package shell: bash @@ -447,7 +447,7 @@ jobs: - name: Install R uses: r-lib/actions/setup-r@v2 with: - install-r: false + install-r: true - name: Rename artifacts shell: Rscript {0} run: | diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 69235bf0cf4fe..c8c311f5137df 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -59,7 +59,6 @@ groups: - centos-* - conan-* - debian-* - - java-jars - matlab - nuget - python-sdist @@ -83,10 +82,6 @@ groups: c-glib: - test-*c-glib* - java: - - "*java*" - - test-*spark* - python: - test-*python* - example-*python* @@ -102,7 +97,6 @@ groups: vcpkg: - test-*vcpkg* - wheel-* - - java-jars integration: - test-*dask* @@ -154,7 +148,6 @@ groups: - centos-* - conan-* - conda-* - - java-jars - homebrew-cpp - nuget - test-* @@ -184,7 +177,6 @@ groups: - ~conda-linux-x64-cuda-py3 - ~conda-osx-arm64-cpu-py3 - conan-* - - java-jars - homebrew-cpp - nuget - wheel-* @@ -695,173 +687,6 @@ tasks: artifacts: - matlab-arrow-{no_rc_no_dev_version}.mltbx - ############################## Arrow JAR's ################################## - - java-jars: - # Build jar's that contains cpp libraries dependencies - ci: github - template: java-jars/github.yml - artifacts: - - arrow-algorithm-{no_rc_snapshot_version}-cyclonedx.json - - arrow-algorithm-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-algorithm-{no_rc_snapshot_version}-javadoc.jar - - arrow-algorithm-{no_rc_snapshot_version}-sources.jar - - arrow-algorithm-{no_rc_snapshot_version}-tests.jar - - arrow-algorithm-{no_rc_snapshot_version}.jar - - arrow-algorithm-{no_rc_snapshot_version}.pom - - arrow-avro-{no_rc_snapshot_version}-cyclonedx.json - - arrow-avro-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-avro-{no_rc_snapshot_version}-javadoc.jar - - arrow-avro-{no_rc_snapshot_version}-sources.jar - - arrow-avro-{no_rc_snapshot_version}-tests.jar - - arrow-avro-{no_rc_snapshot_version}.jar - - arrow-avro-{no_rc_snapshot_version}.pom - - arrow-bom-{no_rc_snapshot_version}.pom - - arrow-c-data-{no_rc_snapshot_version}-cyclonedx.json - - arrow-c-data-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-c-data-{no_rc_snapshot_version}-javadoc.jar - - arrow-c-data-{no_rc_snapshot_version}-sources.jar - - arrow-c-data-{no_rc_snapshot_version}-tests.jar - - arrow-c-data-{no_rc_snapshot_version}.jar - - arrow-c-data-{no_rc_snapshot_version}.pom - - arrow-compression-{no_rc_snapshot_version}-cyclonedx.json - - arrow-compression-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-compression-{no_rc_snapshot_version}-javadoc.jar - - arrow-compression-{no_rc_snapshot_version}-sources.jar - - arrow-compression-{no_rc_snapshot_version}-tests.jar - - arrow-compression-{no_rc_snapshot_version}.jar - - arrow-compression-{no_rc_snapshot_version}.pom - - arrow-dataset-{no_rc_snapshot_version}-cyclonedx.json - - arrow-dataset-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-dataset-{no_rc_snapshot_version}-javadoc.jar - - arrow-dataset-{no_rc_snapshot_version}-sources.jar - - arrow-dataset-{no_rc_snapshot_version}-tests.jar - - arrow-dataset-{no_rc_snapshot_version}.jar - - arrow-dataset-{no_rc_snapshot_version}.pom - - arrow-flight-{no_rc_snapshot_version}-cyclonedx.json - - arrow-flight-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-flight-{no_rc_snapshot_version}.pom - - arrow-format-{no_rc_snapshot_version}-cyclonedx.json - - arrow-format-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-format-{no_rc_snapshot_version}-javadoc.jar - - arrow-format-{no_rc_snapshot_version}-sources.jar - - arrow-format-{no_rc_snapshot_version}-tests.jar - - arrow-format-{no_rc_snapshot_version}.jar - - arrow-format-{no_rc_snapshot_version}.pom - - arrow-gandiva-{no_rc_snapshot_version}-cyclonedx.json - - arrow-gandiva-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-gandiva-{no_rc_snapshot_version}-javadoc.jar - - arrow-gandiva-{no_rc_snapshot_version}-sources.jar - - arrow-gandiva-{no_rc_snapshot_version}-tests.jar - - arrow-gandiva-{no_rc_snapshot_version}.jar - - arrow-gandiva-{no_rc_snapshot_version}.pom - - arrow-java-root-{no_rc_snapshot_version}-cyclonedx.json - - arrow-java-root-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-java-root-{no_rc_snapshot_version}-source-release.zip - - arrow-java-root-{no_rc_snapshot_version}.pom - - arrow-jdbc-{no_rc_snapshot_version}-cyclonedx.json - - arrow-jdbc-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-jdbc-{no_rc_snapshot_version}-javadoc.jar - - arrow-jdbc-{no_rc_snapshot_version}-sources.jar - - arrow-jdbc-{no_rc_snapshot_version}-tests.jar - - arrow-jdbc-{no_rc_snapshot_version}.jar - - arrow-jdbc-{no_rc_snapshot_version}.pom - - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json - - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-memory-core-{no_rc_snapshot_version}-javadoc.jar - - arrow-memory-core-{no_rc_snapshot_version}-sources.jar - - arrow-memory-core-{no_rc_snapshot_version}-tests.jar - - arrow-memory-core-{no_rc_snapshot_version}.jar - - arrow-memory-core-{no_rc_snapshot_version}.pom - - arrow-memory-netty-{no_rc_snapshot_version}-cyclonedx.json - - arrow-memory-netty-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-memory-netty-{no_rc_snapshot_version}-javadoc.jar - - arrow-memory-netty-{no_rc_snapshot_version}-sources.jar - - arrow-memory-netty-{no_rc_snapshot_version}-tests.jar - - arrow-memory-netty-{no_rc_snapshot_version}.jar - - arrow-memory-netty-{no_rc_snapshot_version}.pom - - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-cyclonedx.json - - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-javadoc.jar - - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-sources.jar - - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-tests.jar - - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}.jar - - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}.pom - - arrow-memory-unsafe-{no_rc_snapshot_version}-cyclonedx.json - - arrow-memory-unsafe-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-memory-unsafe-{no_rc_snapshot_version}-javadoc.jar - - arrow-memory-unsafe-{no_rc_snapshot_version}-sources.jar - - arrow-memory-unsafe-{no_rc_snapshot_version}-tests.jar - - arrow-memory-unsafe-{no_rc_snapshot_version}.jar - - arrow-memory-unsafe-{no_rc_snapshot_version}.pom - - arrow-memory-{no_rc_snapshot_version}-cyclonedx.json - - arrow-memory-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-memory-{no_rc_snapshot_version}.pom - - arrow-orc-{no_rc_snapshot_version}-cyclonedx.json - - arrow-orc-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-orc-{no_rc_snapshot_version}-javadoc.jar - - arrow-orc-{no_rc_snapshot_version}-sources.jar - - arrow-orc-{no_rc_snapshot_version}-tests.jar - - arrow-orc-{no_rc_snapshot_version}.jar - - arrow-orc-{no_rc_snapshot_version}.pom - - arrow-performance-{no_rc_snapshot_version}-cyclonedx.json - - arrow-performance-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-performance-{no_rc_snapshot_version}-sources.jar - - arrow-performance-{no_rc_snapshot_version}-tests.jar - - arrow-performance-{no_rc_snapshot_version}.jar - - arrow-performance-{no_rc_snapshot_version}.pom - - arrow-tools-{no_rc_snapshot_version}-cyclonedx.json - - arrow-tools-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-tools-{no_rc_snapshot_version}-jar-with-dependencies.jar - - arrow-tools-{no_rc_snapshot_version}-javadoc.jar - - arrow-tools-{no_rc_snapshot_version}-sources.jar - - arrow-tools-{no_rc_snapshot_version}-tests.jar - - arrow-tools-{no_rc_snapshot_version}.jar - - arrow-tools-{no_rc_snapshot_version}.pom - - arrow-vector-{no_rc_snapshot_version}-cyclonedx.json - - arrow-vector-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-vector-{no_rc_snapshot_version}-javadoc.jar - - arrow-vector-{no_rc_snapshot_version}-shade-format-flatbuffers.jar - - arrow-vector-{no_rc_snapshot_version}-sources.jar - - arrow-vector-{no_rc_snapshot_version}-tests.jar - - arrow-vector-{no_rc_snapshot_version}.jar - - arrow-vector-{no_rc_snapshot_version}.pom - - flight-core-{no_rc_snapshot_version}-cyclonedx.json - - flight-core-{no_rc_snapshot_version}-cyclonedx.xml - - flight-core-{no_rc_snapshot_version}-javadoc.jar - - flight-core-{no_rc_snapshot_version}-sources.jar - - flight-core-{no_rc_snapshot_version}-tests.jar - - flight-core-{no_rc_snapshot_version}.jar - - flight-core-{no_rc_snapshot_version}.pom - - flight-integration-tests-{no_rc_snapshot_version}-cyclonedx.json - - flight-integration-tests-{no_rc_snapshot_version}-cyclonedx.xml - - flight-integration-tests-{no_rc_snapshot_version}-jar-with-dependencies.jar - - flight-integration-tests-{no_rc_snapshot_version}-javadoc.jar - - flight-integration-tests-{no_rc_snapshot_version}-sources.jar - - flight-integration-tests-{no_rc_snapshot_version}-tests.jar - - flight-integration-tests-{no_rc_snapshot_version}.jar - - flight-integration-tests-{no_rc_snapshot_version}.pom - - flight-sql-{no_rc_snapshot_version}-cyclonedx.json - - flight-sql-{no_rc_snapshot_version}-cyclonedx.xml - - flight-sql-{no_rc_snapshot_version}-javadoc.jar - - flight-sql-{no_rc_snapshot_version}-sources.jar - - flight-sql-{no_rc_snapshot_version}-tests.jar - - flight-sql-{no_rc_snapshot_version}.jar - - flight-sql-{no_rc_snapshot_version}.pom - - flight-sql-jdbc-core-{no_rc_snapshot_version}-cyclonedx.json - - flight-sql-jdbc-core-{no_rc_snapshot_version}-cyclonedx.xml - - flight-sql-jdbc-core-{no_rc_snapshot_version}-javadoc.jar - - flight-sql-jdbc-core-{no_rc_snapshot_version}-sources.jar - - flight-sql-jdbc-core-{no_rc_snapshot_version}-tests.jar - - flight-sql-jdbc-core-{no_rc_snapshot_version}.jar - - flight-sql-jdbc-core-{no_rc_snapshot_version}.pom - - flight-sql-jdbc-driver-{no_rc_snapshot_version}-cyclonedx.json - - flight-sql-jdbc-driver-{no_rc_snapshot_version}-cyclonedx.xml - - flight-sql-jdbc-driver-{no_rc_snapshot_version}-sources.jar - - flight-sql-jdbc-driver-{no_rc_snapshot_version}-tests.jar - - flight-sql-jdbc-driver-{no_rc_snapshot_version}.jar - - flight-sql-jdbc-driver-{no_rc_snapshot_version}.pom - ############################## NuGet packages ############################### nuget: @@ -928,7 +753,6 @@ tasks: {% for target in ["cpp", "csharp", "integration", - "java", "js", "python", "ruby"] %} @@ -988,7 +812,6 @@ tasks: {% for target in ["cpp", "csharp", "integration", - "java", "js", "python", "ruby"] %} @@ -1013,7 +836,6 @@ tasks: env: ARROW_FLIGHT: 0 ARROW_GANDIVA: 0 - TEST_INTEGRATION_JAVA: 0 PYTEST_ADDOPTS: "-k 'not test_cancellation'" target: {{ target }} github_runner: "macos-14" @@ -1354,7 +1176,7 @@ tasks: test-r-extra-packages: ci: github - template: r/github.linux.extra.packages.yml + template: r/github.linux.extra.packages.yml test-r-linux-as-cran: ci: github diff --git a/dev/test_merge_arrow_pr.py b/dev/test_merge_arrow_pr.py index 0067c10414c65..1db07ca91a401 100755 --- a/dev/test_merge_arrow_pr.py +++ b/dev/test_merge_arrow_pr.py @@ -25,99 +25,77 @@ FakeIssue = namedtuple('issue', ['fields']) -FakeFields = namedtuple('fields', ['status', 'summary', 'assignee', - 'components', 'fixVersions', 'milestone']) -FakeAssignee = namedtuple('assignee', ['displayName']) -FakeStatus = namedtuple('status', ['name']) -FakeComponent = namedtuple('component', ['name']) -FakeVersion = namedtuple('version', ['name', 'raw']) -FakeMilestone = namedtuple('milestone', ['state']) +FakeFields = namedtuple( + 'fields', ['assignees', 'labels', 'milestone', 'state', 'title']) +FakeAssignee = namedtuple('assignees', ['login']) +FakeLabel = namedtuple('label', ['name']) +FakeMilestone = namedtuple('milestone', ['title', 'state']) RAW_VERSION_JSON = [ - {'name': 'JS-0.4.0', 'released': False}, - {'name': '1.0.0', 'released': False}, - {'name': '2.0.0', 'released': False}, - {'name': '0.9.0', 'released': False}, - {'name': '0.10.0', 'released': False}, - {'name': '0.8.0', 'released': True}, - {'name': '0.7.0', 'released': True} + {'title': 'JS-0.4.0', 'state': 'open'}, + {'title': '1.0.0', 'state': 'open'}, + {'title': '2.0.0', 'state': 'open'}, + {'title': '0.9.0', 'state': 'open'}, + {'title': '0.10.0', 'state': 'open'}, + {'title': '0.8.0', 'state': 'closed'}, + {'title': '0.7.0', 'state': 'closed'} ] - -SOURCE_VERSIONS = [FakeVersion(raw['name'], raw) +SOURCE_VERSIONS = [FakeMilestone(raw['title'], raw['state']) for raw in RAW_VERSION_JSON] - -TRANSITIONS = [{'name': 'Resolve Issue', 'id': 1}] - -jira_id = 'ARROW-1234' -status = FakeStatus('In Progress') -fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'), - [FakeComponent('C++'), FakeComponent('Format')], - [], FakeMilestone('closed')._asdict()) +fake_issue_id = 'GH-1234' +fields = FakeFields([FakeAssignee('groundhog')._asdict()], + [FakeLabel('Component: C++')._asdict(), + FakeLabel('Component: Format')._asdict()], + FakeMilestone('', 'open')._asdict(), + 'open', '[C++][Format] The issue Title') FAKE_ISSUE_1 = FakeIssue(fields) -class FakeJIRA: +class FakeGitHub: - def __init__(self, issue=None, project_versions=None, transitions=None, - current_fix_versions=None): - self._issue = issue + def __init__(self, issues=None, project_versions=None, state='open'): + self._issues = issues self._project_versions = project_versions - self._transitions = transitions - - def issue(self, jira_id): - return self._issue + self._state = state + self._transitions = [] - def transitions(self, jira_id): - return self._transitions - - def transition_issue(self, jira_id, transition_id, comment=None, - fixVersions=None): - self.captured_transition = { - 'jira_id': jira_id, - 'transition_id': transition_id, - 'comment': comment, - 'fixVersions': fixVersions - } + @property + def issue(self): + return self._issues[fake_issue_id].fields._asdict() @property def current_versions(self): - all_versions = self._project_versions or SOURCE_VERSIONS return [ - v for v in all_versions if not v.raw.get("released") - ] + ['0.11.0'] + v.title for v in self._project_versions if not v.state == 'closed' + ] @property def current_fix_versions(self): return 'JS-0.4.0' - def project_versions(self, project): - return self._project_versions - + @property + def state(self): + return self._state -class FakeGitHub: + def get_issue_data(self, issue_id): + return self._issues.get(issue_id).fields._asdict() - def __init__(self, issue=None, project_versions=None): - self._issue = issue - self._project_versions = project_versions + def get_milestones(self): + return [v._asdict() for v in self._project_versions] - @property - def issue(self): - return self._issue.fields._asdict() + def assign_milestone(self, issue_id, milestone): + self._transitions.append( + {'action': 'assign_milestone', 'issue_id': issue_id, + 'milestone': milestone}) - @property - def current_versions(self): - all_versions = self._project_versions or SOURCE_VERSIONS - return [ - v for v in all_versions if not v.raw.get("released") - ] + ['0.11.0'] + def close_issue(self, issue_id, comment): + self._transitions.append( + {'action': 'close_issue', 'issue_id': issue_id, 'comment': comment}) @property - def current_fix_versions(self): - return 'JS-0.4.0' - - def project_versions(self, project): - return self._project_versions + def captured_transitions(self): + return self._transitions class FakeCLI: @@ -135,23 +113,23 @@ def fail(self, msg): raise Exception(msg) -def test_jira_fix_versions(): - jira = FakeJIRA(project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) +def test_gh_fix_versions(): + gh = FakeGitHub(issues={fake_issue_id: FAKE_ISSUE_1}, + project_versions=SOURCE_VERSIONS) - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + issue = merge_arrow_pr.GitHubIssue(gh, fake_issue_id, FakeCLI()) fix_version = merge_arrow_pr.get_candidate_fix_version( issue.current_versions ) assert fix_version == '1.0.0' -def test_jira_fix_versions_filters_maintenance(): +def test_gh_fix_versions_filters_maintenance(): maintenance_branches = ["maint-1.0.0"] - jira = FakeJIRA(project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) + gh = FakeGitHub(issues={fake_issue_id: FAKE_ISSUE_1}, + project_versions=SOURCE_VERSIONS) - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + issue = merge_arrow_pr.GitHubIssue(gh, fake_issue_id, FakeCLI()) fix_version = merge_arrow_pr.get_candidate_fix_version( issue.current_versions, maintenance_branches=maintenance_branches @@ -159,102 +137,72 @@ def test_jira_fix_versions_filters_maintenance(): assert fix_version == '2.0.0' -def test_jira_only_suggest_major_release(): +def test_gh_only_suggest_major_release(): versions_json = [ - {'name': '0.9.1', 'released': False}, - {'name': '0.10.0', 'released': False}, - {'name': '1.0.0', 'released': False}, + {'name': '0.9.1', 'state': "open"}, + {'name': '0.10.0', 'state': "open"}, + {'name': '1.0.0', 'state': "open"}, ] - versions = [FakeVersion(raw['name'], raw) for raw in versions_json] + versions = [FakeMilestone(raw['name'], raw['state']) for raw in versions_json] - jira = FakeJIRA(project_versions=versions, transitions=TRANSITIONS) - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + gh = FakeGitHub(issues={fake_issue_id: FAKE_ISSUE_1}, project_versions=versions) + issue = merge_arrow_pr.GitHubIssue(gh, fake_issue_id, FakeCLI()) fix_version = merge_arrow_pr.get_candidate_fix_version( issue.current_versions ) assert fix_version == '1.0.0' -def test_jira_parquet_no_suggest_non_cpp(): - # ARROW-7351 - versions_json = [ - {'name': 'cpp-1.5.0', 'released': True}, - {'name': 'cpp-1.6.0', 'released': False}, - {'name': 'cpp-1.7.0', 'released': False}, - {'name': 'cpp-2.0.0', 'released': False}, - {'name': '1.11.0', 'released': False}, - {'name': '1.12.0', 'released': False}, - {'name': '2.0.0', 'released': False} - ] - - versions = [FakeVersion(raw['name'], raw) - for raw in versions_json] - - jira = FakeJIRA(project_versions=versions, transitions=TRANSITIONS) - issue = merge_arrow_pr.JiraIssue(jira, 'PARQUET-1713', 'PARQUET', - FakeCLI()) - fix_version = merge_arrow_pr.get_candidate_fix_version( - issue.current_versions - ) - assert fix_version == 'cpp-2.0.0' - - -def test_jira_invalid_issue(): +def test_gh_invalid_issue(): class Mock: - def issue(self, jira_id): + def issue(self, gh_id): raise Exception("not found") with pytest.raises(Exception): - merge_arrow_pr.JiraIssue(Mock(), 'ARROW-1234', 'ARROW', FakeCLI()) + merge_arrow_pr.GitHubIssue(Mock(), fake_issue_id, FakeCLI()) -def test_jira_resolve(): - jira = FakeJIRA(issue=FAKE_ISSUE_1, - project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) +def test_gh_resolve(): + gh = FakeGitHub(issues={fake_issue_id: FAKE_ISSUE_1}, + project_versions=SOURCE_VERSIONS) my_comment = 'my comment' fix_version = "0.10.0" - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - issue.resolve(fix_version, my_comment) + issue = merge_arrow_pr.GitHubIssue(gh, fake_issue_id, FakeCLI()) + issue.resolve(fix_version, my_comment, pr_body="") - assert jira.captured_transition == { - 'jira_id': 'ARROW-1234', - 'transition_id': 1, - 'comment': my_comment, - 'fixVersions': [{'name': '0.10.0', 'released': False}] - } + assert len(gh.captured_transitions) == 2 + assert gh.captured_transitions[0]['action'] == 'assign_milestone' + assert gh.captured_transitions[1]['action'] == 'close_issue' + assert gh.captured_transitions[1]['comment'] == my_comment + assert gh.captured_transitions[0]['milestone'] == fix_version -def test_jira_resolve_non_mainline(): - jira = FakeJIRA(issue=FAKE_ISSUE_1, - project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) +def test_gh_resolve_non_mainline(): + gh = FakeGitHub(issues={fake_issue_id: FAKE_ISSUE_1}, + project_versions=SOURCE_VERSIONS) my_comment = 'my comment' fix_version = "JS-0.4.0" - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - issue.resolve(fix_version, my_comment) + issue = merge_arrow_pr.GitHubIssue(gh, fake_issue_id, FakeCLI()) + issue.resolve(fix_version, my_comment, "") - assert jira.captured_transition == { - 'jira_id': 'ARROW-1234', - 'transition_id': 1, - 'comment': my_comment, - 'fixVersions': [{'name': 'JS-0.4.0', 'released': False}] - } + assert len(gh.captured_transitions) == 2 + assert gh.captured_transitions[1]['comment'] == my_comment + assert gh.captured_transitions[0]['milestone'] == fix_version -def test_jira_resolve_released_fix_version(): +def test_gh_resolve_released_fix_version(): # ARROW-5083 - jira = FakeGitHub(issue=FAKE_ISSUE_1, - project_versions=SOURCE_VERSIONS) + gh = FakeGitHub(issues={fake_issue_id: FAKE_ISSUE_1}, + project_versions=SOURCE_VERSIONS) cmd = FakeCLI(responses=['1.0.0']) - fix_versions_json = merge_arrow_pr.prompt_for_fix_version(cmd, jira) + fix_versions_json = merge_arrow_pr.prompt_for_fix_version(cmd, gh) assert fix_versions_json == "1.0.0" @@ -283,107 +231,58 @@ def test_multiple_authors_bad_input(): assert distinct_other_authors == [a0, a1] -def test_jira_already_resolved(): - status = FakeStatus('Resolved') - fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'), - [FakeComponent('Java')], [], None) +def test_gh_already_resolved(): + fields = FakeFields([FakeAssignee('groundhog')._asdict()], + [FakeLabel('Component: Java')._asdict()], + FakeMilestone('', 'open')._asdict(), + 'closed', '[Java] The issue Title') issue = FakeIssue(fields) - jira = FakeJIRA(issue=issue, - project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) + gh = FakeGitHub(issues={fake_issue_id: issue}, + project_versions=SOURCE_VERSIONS) - fix_versions = [SOURCE_VERSIONS[0].raw] - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) + fix_versions = [SOURCE_VERSIONS[0]._asdict()] + issue = merge_arrow_pr.GitHubIssue(gh, fake_issue_id, FakeCLI()) with pytest.raises(Exception, - match="ARROW-1234 already has status 'Resolved'"): - issue.resolve(fix_versions, "") - - -def test_no_unset_point_release_fix_version(): - # ARROW-6915: We have had the problem of issues marked with a point release - # having their fix versions overwritten by the merge tool. This verifies - # that existing patch release versions are carried over - status = FakeStatus('In Progress') - - versions_json = { - '0.14.2': {'name': '0.14.2', 'id': 1}, - '0.15.1': {'name': '0.15.1', 'id': 2}, - '0.16.0': {'name': '0.16.0', 'id': 3}, - '0.17.0': {'name': '0.17.0', 'id': 4} - } - - fields = FakeFields(status, 'summary', FakeAssignee('someone'), - [FakeComponent('Java')], - [FakeVersion(v, versions_json[v]) - for v in ['0.17.0', '0.15.1', '0.14.2']], None) - issue = FakeIssue(fields) - - jira = FakeJIRA( - issue=issue, - project_versions=[ - FakeVersion(v, vdata) for v, vdata in versions_json.items() - ], - transitions=TRANSITIONS - ) - - issue = merge_arrow_pr.JiraIssue(jira, 'ARROW-1234', 'ARROW', FakeCLI()) - issue.resolve('0.16.0', "a comment") - - assert jira.captured_transition == { - 'jira_id': 'ARROW-1234', - 'transition_id': 1, - 'comment': 'a comment', - 'fixVersions': [versions_json[v] - for v in ['0.16.0', '0.15.1', '0.14.2']] - } - - issue.resolve([versions_json['0.15.1']], "a comment") - - assert jira.captured_transition == { - 'jira_id': 'ARROW-1234', - 'transition_id': 1, - 'comment': 'a comment', - 'fixVersions': [versions_json[v] for v in ['0.15.1', '0.14.2']] - } + match="GitHub issue GH-1234 already has status 'closed'"): + issue.resolve(fix_versions, "", "") -def test_jira_output_no_components(): +def test_gh_output_no_components(): # ARROW-5472 status = 'Interesting work' - components = [] output = merge_arrow_pr.format_issue_output( - "jira", 'ARROW-1234', 'Resolved', status, - FakeAssignee('Foo Bar'), components + 'github', 'GH-1234', 'Resolved', status, + 'username', [] ) - assert output == """=== JIRA ARROW-1234 === + assert output == """=== GITHUB GH-1234 === Summary\t\tInteresting work -Assignee\tFoo Bar +Assignee\tusername Components\tNO COMPONENTS!!! Status\t\tResolved -URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234""" +URL\t\thttps://github.com/apache/arrow/issues/1234""" output = merge_arrow_pr.format_issue_output( - "jira", 'ARROW-1234', 'Resolved', status, FakeAssignee('Foo Bar'), - [FakeComponent('C++'), FakeComponent('Python')] + 'github', 'GH-1234', 'Resolved', status, 'username', + [FakeLabel('C++'), FakeLabel('Python')] ) - assert output == """=== JIRA ARROW-1234 === + assert output == """=== GITHUB GH-1234 === Summary\t\tInteresting work -Assignee\tFoo Bar +Assignee\tusername Components\tC++, Python Status\t\tResolved -URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234""" +URL\t\thttps://github.com/apache/arrow/issues/1234""" def test_sorting_versions(): versions_json = [ - {'name': '11.0.0', 'released': False}, - {'name': '9.0.0', 'released': False}, - {'name': '10.0.0', 'released': False}, + {'title': '11.0.0', 'state': 'open'}, + {'title': '9.0.0', 'state': 'open'}, + {'title': '10.0.0', 'state': 'open'}, ] - versions = [FakeVersion(raw['name'], raw) for raw in versions_json] - fix_version = merge_arrow_pr.get_candidate_fix_version(versions) + versions = [FakeMilestone(raw['title'], raw['state']) for raw in versions_json] + fix_version = merge_arrow_pr.get_candidate_fix_version([x.title for x in versions]) assert fix_version == "9.0.0" diff --git a/docker-compose.yml b/docker-compose.yml index 6ee1bf4d116c6..7aabbb43b491a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -127,8 +127,6 @@ x-hierarchy: - conda-python-dask - conda-python-emscripten - conda-python-hdfs - - conda-python-java-integration - - conda-python-jpype - conda-python-no-numpy - conda-python-spark - conda-python-substrait @@ -142,7 +140,6 @@ x-hierarchy: - debian-js - fedora-cpp: - fedora-python - - java - python-sdist - ubuntu-cpp: - ubuntu-cpp-static @@ -173,8 +170,7 @@ x-hierarchy: # helper services - impala - postgres - - python-wheel-manylinux-2014: - - java-jni-manylinux-2014 + - python-wheel-manylinux-2014 - python-wheel-manylinux-2-28 - python-wheel-manylinux-test-imports - python-free-threaded-wheel-manylinux-test-imports @@ -643,6 +639,7 @@ services: ARROW_FLIGHT_SQL: "OFF" ARROW_FUZZING: "ON" # Check fuzz regressions ARROW_JEMALLOC: "OFF" + ARROW_MIMALLOC: "OFF" ARROW_ORC: "OFF" ARROW_S3: "OFF" ARROW_USE_ASAN: "ON" @@ -681,6 +678,7 @@ services: ARROW_FLIGHT: "OFF" ARROW_FLIGHT_SQL: "OFF" ARROW_JEMALLOC: "OFF" + ARROW_MIMALLOC: "OFF" ARROW_ORC: "OFF" ARROW_USE_TSAN: "ON" command: *cpp-command @@ -1301,28 +1299,6 @@ services: target: "C:/arrow" command: arrow\\ci\\scripts\\python_wheel_windows_test.bat - java-jni-manylinux-2014: - image: ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG} - build: - args: - base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG} - java: 11 - context: . - dockerfile: ci/docker/java-jni-manylinux-201x.dockerfile - cache_from: - - ${REPO}:${ARCH}-java-jni-manylinux-2014-vcpkg-${VCPKG} - environment: - <<: [*common, *ccache] - volumes: - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - - ${DOCKER_VOLUME_PREFIX}python-wheel-manylinux2014-ccache:/ccache:delegated - command: - ["pip install -e /arrow/dev/archery && \ - /arrow/ci/scripts/java_jni_manylinux_build.sh /arrow /build /arrow/java-dist && \ - /arrow/ci/scripts/java_build.sh /arrow /build /arrow/java-dist && \ - /arrow/ci/scripts/java_test.sh /arrow /build /arrow/java-dist"] - ############################## Integration ################################# conda-python-pandas: @@ -1478,79 +1454,6 @@ services: /arrow/ci/scripts/python_build.sh /arrow /build && /arrow/ci/scripts/integration_substrait.sh"] - conda-python-jpype: - # Usage: - # docker compose build conda - # docker compose build conda-cpp - # docker compose build conda-python - # docker compose build conda-python-jpype - # docker compose run --rm conda-python-jpype - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-jpype - build: - context: . - dockerfile: ci/docker/conda-python-jpype.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-jpype - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - shm_size: *shm-size - environment: - <<: [*common, *ccache] - ARROW_FLIGHT: "OFF" - ARROW_FLIGHT_SQL: "OFF" - ARROW_GANDIVA: "OFF" - volumes: *conda-volumes - command: - ["/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/java_build.sh /arrow /build && - /arrow/ci/scripts/python_test.sh /arrow"] - - conda-python-java-integration: - # Usage: - # docker compose build conda - # docker compose build conda-cpp - # docker compose build conda-python - # docker compose build conda-python-java-integration - # docker compose run --rm conda-python-java-integration - image: ${REPO}:${ARCH}-conda-python-${PYTHON}-java-integration - build: - context: . - dockerfile: ci/docker/conda-python-jpype.dockerfile - cache_from: - - ${REPO}:${ARCH}-conda-python-${PYTHON}-java-integration - args: - repo: ${REPO} - arch: ${ARCH} - python: ${PYTHON} - llvm: ${LLVM} - shm_size: *shm-size - environment: - <<: [*common, *ccache] - ARROW_ACERO: "OFF" - ARROW_DATASET: "OFF" - ARROW_FLIGHT: "OFF" - ARROW_FLIGHT_SQL: "OFF" - ARROW_GANDIVA: "OFF" - ARROW_JAVA_CDATA: "ON" - ARROW_ORC: "OFF" - ARROW_PARQUET: "OFF" - JAVA_JNI_CMAKE_ARGS: >- - -DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF - -DARROW_JAVA_JNI_ENABLE_C=ON - volumes: - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - - ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated - command: - [ "/arrow/ci/scripts/cpp_build.sh /arrow /build && - /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/java_jni_build.sh /arrow $${ARROW_HOME} /build /tmp/dist/java/ && - /arrow/ci/scripts/java_build.sh /arrow /build /tmp/dist/java && - /arrow/ci/scripts/java_cdata_integration.sh /arrow /build" ] - conda-python-cython2: # Usage: # docker compose build conda @@ -1836,25 +1739,6 @@ services: /arrow/ci/scripts/csharp_test.sh /arrow && /arrow/ci/scripts/csharp_pack.sh /arrow" - ################################ Java ####################################### - - java: - # Usage: - # docker compose build java - # docker compose run java - # Parameters: - # MAVEN: 3.9.5 - # JDK: 11, 17, 21 - image: ${ARCH}/maven:${MAVEN}-eclipse-temurin-${JDK} - shm_size: *shm-size - volumes: &java-volumes - - .:/arrow:delegated - - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated - command: &java-command > - /bin/bash -c " - /arrow/ci/scripts/java_build.sh /arrow /build && - /arrow/ci/scripts/java_test.sh /arrow /build" - ############################## Integration ################################## conda-integration: @@ -1924,7 +1808,6 @@ services: ARROW_SUBSTRAIT: "ON" BUILD_DOCS_C_GLIB: "ON" BUILD_DOCS_CPP: "ON" - BUILD_DOCS_JAVA: "ON" BUILD_DOCS_JS: "ON" BUILD_DOCS_PYTHON: "ON" BUILD_DOCS_R: "ON" @@ -1937,8 +1820,7 @@ services: /arrow/ci/scripts/python_build.sh /arrow /build && /arrow/ci/scripts/c_glib_build.sh /arrow /build && /arrow/ci/scripts/r_build.sh /arrow /build && - /arrow/ci/scripts/js_build.sh /arrow /build && - /arrow/ci/scripts/java_build.sh /arrow /build" + /arrow/ci/scripts/js_build.sh /arrow /build" ################################# Tools ##################################### @@ -2070,8 +1952,7 @@ services: command: ["/arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/java_build.sh /arrow /build && - /arrow/ci/scripts/integration_spark.sh /arrow /spark ${TEST_PYARROW_ONLY:-false}"] + /arrow/ci/scripts/integration_spark.sh /arrow /spark"] ################################# Source Verification ##################################### diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 093b160d8e9a0..ec53fb04688bc 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -476,6 +476,8 @@ Mixed time resolution temporal inputs will be cast to finest input resolution. +------------------+--------+-------------------------+---------------------------+-------+ | exp | Unary | Numeric | Float32/Float64 | | +------------------+--------+-------------------------+---------------------------+-------+ +| expm1 | Unary | Numeric | Float32/Float64 | | ++------------------+--------+-------------------------+---------------------------+-------+ | multiply | Binary | Numeric/Temporal | Numeric/Temporal | \(1) | +------------------+--------+-------------------------+---------------------------+-------+ | multiply_checked | Binary | Numeric/Temporal | Numeric/Temporal | \(1) | @@ -719,6 +721,35 @@ Decimal values are accepted, but are cast to Float64 first. | tan_checked | Unary | Float32/Float64/Decimal | Float32/Float64 | +--------------------------+------------+-------------------------+---------------------+ +Hyperbolic trigonometric functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Hyperbolic trigonometric functions are also supported, and, where applicable, also offer +``_checked`` variants that check for domain errors if needed. + +Decimal values are accepted, but are cast to Float64 first. + ++--------------------------+------------+-------------------------+---------------------+ +| Function name | Arity | Input types | Output type | ++==========================+============+=========================+=====================+ +| acosh | Unary | Float32/Float64/Decimal | Float32/Float64 | ++--------------------------+------------+-------------------------+---------------------+ +| acosh_checked | Unary | Float32/Float64/Decimal | Float32/Float64 | ++--------------------------+------------+-------------------------+---------------------+ +| asinh | Unary | Float32/Float64/Decimal | Float32/Float64 | ++--------------------------+------------+-------------------------+---------------------+ +| atanh | Unary | Float32/Float64/Decimal | Float32/Float64 | ++--------------------------+------------+-------------------------+---------------------+ +| atanh_checked | Unary | Float32/Float64/Decimal | Float32/Float64 | ++--------------------------+------------+-------------------------+---------------------+ +| cosh | Unary | Float32/Float64/Decimal | Float32/Float64 | ++--------------------------+------------+-------------------------+---------------------+ +| sinh | Unary | Float32/Float64/Decimal | Float32/Float64 | ++--------------------------+------------+-------------------------+---------------------+ +| tanh | Unary | Float32/Float64/Decimal | Float32/Float64 | ++--------------------------+------------+-------------------------+---------------------+ + + Comparisons ~~~~~~~~~~~ diff --git a/docs/source/cpp/flight.rst b/docs/source/cpp/flight.rst index a1e9420bfd34e..c076e0b8c1a67 100644 --- a/docs/source/cpp/flight.rst +++ b/docs/source/cpp/flight.rst @@ -362,38 +362,4 @@ Closing unresponsive connections .. _ARROW-16697: https://issues.apache.org/jira/browse/ARROW-16697 .. _ARROW-6062: https://issues.apache.org/jira/browse/ARROW-6062 - -Alternative Transports -====================== - -The standard transport for Arrow Flight is gRPC_. The C++ -implementation also experimentally supports a transport based on -UCX_. To use it, use the protocol scheme ``ucx:`` when starting a -server or creating a client. - -UCX Transport -------------- - -Not all features of the gRPC transport are supported. See -:ref:`status-flight-rpc` for details. Also note these specific -caveats: - -- The server creates an independent UCP worker for each client. This - consumes more resources but provides better throughput. -- The client creates an independent UCP worker for each RPC - call. Again, this trades off resource consumption for - performance. This also means that unlike with gRPC, it is - essentially equivalent to make all calls with a single client or - with multiple clients. -- The UCX transport attempts to avoid copies where possible. In some - cases, it can directly reuse UCX-allocated buffers to back - :class:`arrow::Buffer` objects, however, this will also extend the - lifetime of associated UCX resources beyond the lifetime of the - Flight client or server object. -- Depending on the transport that UCX itself selects, you may find - that increasing ``UCX_MM_SEG_SIZE`` from the default (around 8KB) to - around 60KB improves performance (UCX will copy more data in a - single call). - .. _gRPC: https://grpc.io/ -.. _UCX: https://openucx.org/ diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index b4c563aae1a3b..bfa0c5bc35021 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -33,7 +33,9 @@ activate it using the commands below (see https://emscripten.org/docs/getting_st git clone https://github.com/emscripten-core/emsdk.git cd emsdk # replace with the desired EMSDK version. - # e.g. for Pyodide 0.24, you need EMSDK version 3.1.45 + # e.g. for Pyodide 0.26, you need EMSDK version 3.1.58 + # the versions can be found in the Makefile.envs file in the Pyodide repo: + # https://github.com/pyodide/pyodide/blob/10b484cfe427e076c929a55dc35cfff01ea8d3bc/Makefile.envs ./emsdk install ./emsdk activate source ./emsdk_env.sh @@ -46,8 +48,8 @@ versions of emsdk tools. .. code:: shell # install Pyodide build tools. - # e.g. for version 0.24 of Pyodide: - pip install pyodide-build==0.24 + # e.g., for version 0.26 of Pyodide, pyodide-build 0.26 and later work + pip install "pyodide-build>=0.26" Then build with the ``ninja-release-emscripten`` CMake preset, like below: @@ -69,8 +71,7 @@ go to ``arrow/python`` and run pyodide build It should make a wheel targeting the currently enabled version of -Pyodide (i.e. the version corresponding to the currently installed -``pyodide-build``) in the ``dist`` subdirectory. +Pyodide in the ``dist`` subdirectory. Manual Build @@ -85,9 +86,8 @@ you will need to override. In particular you will need: #. ``CMAKE_TOOLCHAIN_FILE`` set by using ``emcmake cmake`` instead of just ``cmake``. -#. You will quite likely need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` - for builds targeting single threaded Emscripten environments such as - Pyodide. +#. You will need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` for builds + targeting single-threaded Emscripten environments such as Pyodide. #. ``ARROW_FLIGHT`` and anything else that uses network probably won't work. diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index 53550752d497b..52f4a751dcc81 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -55,13 +55,6 @@ generated properly. default-key ${YOUR_GPG_KEY_ID} - The GPG key needs to be added to this `SVN repo `_ and `this one `_. - - Configure Maven to `publish artifacts to Apache repositories `_. You will need to `setup a master password `_ at ``~/.m2/settings-security.xml`` and ``settings.xml`` as specified on the `Apache guide `_. It can be tested with the following command: - - .. code-block:: - - # You might need to export GPG_TTY=$(tty) to properly prompt for a passphrase - mvn clean install -Papache-release - - Have the build requirements for cpp and c_glib installed. - Set the ``CROSSBOW_GITHUB_TOKEN`` environment variable to automatically create the verify release Pull Request. - Install ``en_US.UTF-8`` locale. You can confirm available locales by ``locale -a``. @@ -220,20 +213,13 @@ Build source and binaries and submit them # otherwise I got errors referencing "ioctl" errors. dev/release/05-binary-upload.sh - # Sign and upload the Java artifacts - # - # Note that you need to press the "Close" button manually by Web interface - # after you complete the script: - # https://repository.apache.org/#stagingRepositories - dev/release/06-java-upload.sh - # Sign and upload MATLAB artifacts to the GitHub Releases area. # # Note that you need to have GitHub CLI installed to run this script. - dev/release/07-matlab-upload.sh + dev/release/06-matlab-upload.sh # Start verifications for binaries and wheels - dev/release/08-binary-verify.sh + dev/release/07-binary-verify.sh Verify the Release ------------------ @@ -276,7 +262,6 @@ Be sure to go through on the following checklist: #. Upload C# packages #. Update conda recipes #. Upload wheels/sdist to pypi -#. Publish Maven artifacts #. Update R packages #. Update vcpkg port #. Update Conan recipe @@ -532,15 +517,6 @@ Be sure to go through on the following checklist: # dev/release/post-11-python.sh 10.0.0 dev/release/post-11-python.sh -.. dropdown:: Publish Maven packages - :animate: fade-in-slide-down - :class-title: sd-fs-5 - :class-container: sd-shadow-md - - - Logon to the Apache repository: https://repository.apache.org/#stagingRepositories - - Select the Arrow staging repository you created for RC: ``orgapachearrow-XXXX`` - - Click the ``release`` button - .. dropdown:: Update R packages :animate: fade-in-slide-down :class-title: sd-fs-5 diff --git a/docs/source/format/Flight.rst b/docs/source/format/Flight.rst index 2c5487d857ea4..2a34db0f1ba50 100644 --- a/docs/source/format/Flight.rst +++ b/docs/source/format/Flight.rst @@ -333,9 +333,14 @@ schemes for the given transports: +----------------------------+--------------------------------+ | (reuse connection) | arrow-flight-reuse-connection: | +----------------------------+--------------------------------+ -| UCX_ (plaintext) | ucx: | +| UCX_ (plaintext) (1) | ucx: | +----------------------------+--------------------------------+ +Notes: + +* \(1) Flight UCX transport has been deprecated on the 19.0.0 release. + The :ref:`dissociated-ipc` section proposes an alternative solution. + .. _UCX: https://openucx.org/ Connection Reuse diff --git a/docs/source/java/memory.rst b/docs/source/java/memory.rst index 8014a27444ac9..28ff01fb9447f 100644 --- a/docs/source/java/memory.rst +++ b/docs/source/java/memory.rst @@ -107,7 +107,7 @@ Child allocators can also be named, which makes it easier to tell where an Arrow Reference counting ------------------ -Because direct memory is expensive to allocate and deallocate, allocators may share direct buffers. To managed shared buffers +Because direct memory is expensive to allocate and deallocate, allocators may share direct buffers. To manage shared buffers deterministically, we use manual reference counting instead of the garbage collector. This simply means that each buffer has a counter keeping track of the number of references to the buffer, and the user is responsible for properly incrementing/decrementing the counter as the buffer is used. diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst index 4ad35b190cdd0..dc24be8bd06d8 100644 --- a/docs/source/python/api/arrays.rst +++ b/docs/source/python/api/arrays.rst @@ -72,7 +72,10 @@ may expose data type-specific methods or properties. TimestampArray DurationArray MonthDayNanoIntervalArray + Decimal32Array + Decimal64Array Decimal128Array + Decimal256Array DictionaryArray ListArray FixedSizeListArray @@ -86,6 +89,9 @@ may expose data type-specific methods or properties. ExtensionArray FixedShapeTensorArray OpaqueArray + JsonArray + UuidArray + Bool8Array .. _api.scalar: @@ -112,6 +118,7 @@ classes may expose data type-specific methods or properties. Int16Scalar Int32Scalar Int64Scalar + NullScalar UInt8Scalar UInt16Scalar UInt32Scalar @@ -134,9 +141,11 @@ classes may expose data type-specific methods or properties. DurationScalar MonthDayNanoIntervalScalar Decimal128Scalar + Decimal256Scalar DictionaryScalar RunEndEncodedScalar ListScalar + FixedSizeListScalar LargeListScalar ListViewScalar LargeListViewScalar @@ -146,3 +155,6 @@ classes may expose data type-specific methods or properties. ExtensionScalar FixedShapeTensorScalar OpaqueScalar + JsonScalar + UuidScalar + Bool8Scalar diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst index 86c29296873e5..5e151a1f93af5 100644 --- a/docs/source/python/api/datatypes.rst +++ b/docs/source/python/api/datatypes.rst @@ -68,7 +68,13 @@ These should be used to create Arrow data types and schemas. dictionary run_end_encoded fixed_shape_tensor + union + dense_union + sparse_union opaque + bool8 + uuid + json_ field schema from_numpy_dtype @@ -96,14 +102,22 @@ functions above. DataType DictionaryType ListType + ListViewType + FixedSizeListType LargeListType + LargeListViewType MapType StructType UnionType + DenseUnionType + SparseUnionType TimestampType Time32Type Time64Type + DurationType FixedSizeBinaryType + Decimal32Type + Decimal64Type Decimal128Type Decimal256Type Field @@ -115,8 +129,10 @@ Specific classes and functions for extension types. .. autosummary:: :toctree: ../generated/ + BaseExtensionType ExtensionType PyExtensionType + UnknownExtensionType register_extension_type unregister_extension_type @@ -128,6 +144,9 @@ implemented by PyArrow. FixedShapeTensorType OpaqueType + JsonType + UuidType + Bool8Type .. _api.types.checking: .. currentmodule:: pyarrow.types diff --git a/docs/source/status.rst b/docs/source/status.rst index 83bee8975bbf7..c5883afa8f345 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -202,7 +202,7 @@ Flight RPC +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ | gRPC + TLS transport (grpc+tls:) | ✓ | ✓ | ✓ | | ✓ | ✓ | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ -| UCX_ transport (ucx:) | ✓ | | | | | | | | +| UCX_ transport (ucx:) (1) | ✓ | | | | | | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ Supported features in the gRPC transport: @@ -212,13 +212,13 @@ Supported features in the gRPC transport: +============================================+=======+=======+=======+====+=======+=======+=======+=======+ | All RPC methods | ✓ | ✓ | ✓ | | ✓ | ✓ | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ -| Authentication handlers | ✓ | ✓ | ✓ | | ✓ (1) | ✓ | | | +| Authentication handlers | ✓ | ✓ | ✓ | | ✓ (2) | ✓ | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ | Call timeouts | ✓ | ✓ | ✓ | | | ✓ | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ | Call cancellation | ✓ | ✓ | ✓ | | | ✓ | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ -| Concurrent client calls (2) | ✓ | ✓ | ✓ | | ✓ | ✓ | | | +| Concurrent client calls (3) | ✓ | ✓ | ✓ | | ✓ | ✓ | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ | Custom middleware | ✓ | ✓ | ✓ | | | ✓ | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ @@ -230,7 +230,7 @@ Supported features in the UCX transport: +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ | Flight RPC Feature | C++ | Java | Go | JS | C# | Rust | Julia | Swift | +============================================+=======+=======+=======+====+=======+=======+=======+=======+ -| All RPC methods | ✓ (3) | | | | | | | | +| All RPC methods | ✓ (4) | | | | | | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ | Authentication handlers | | | | | | | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ @@ -238,7 +238,7 @@ Supported features in the UCX transport: +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ | Call cancellation | | | | | | | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ -| Concurrent client calls | ✓ (4) | | | | | | | | +| Concurrent client calls | ✓ (5) | | | | | | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ | Custom middleware | | | | | | | | | +--------------------------------------------+-------+-------+-------+----+-------+-------+-------+-------+ @@ -247,10 +247,11 @@ Supported features in the UCX transport: Notes: -* \(1) Support using AspNetCore authentication handlers. -* \(2) Whether a single client can support multiple concurrent calls. -* \(3) Only support for DoExchange, DoGet, DoPut, and GetFlightInfo. -* \(4) Each concurrent call is a separate connection to the server +* \(1) Flight UCX transport has been deprecated on the 19.0.0 release. +* \(2) Support using AspNetCore authentication handlers. +* \(3) Whether a single client can support multiple concurrent calls. +* \(4) Only support for DoExchange, DoGet, DoPut, and GetFlightInfo. +* \(5) Each concurrent call is a separate connection to the server (unlike gRPC where concurrent calls are multiplexed over a single connection). This will generally provide better throughput but consumes more resources both on the server and the client. diff --git a/format/Schema.fbs b/format/Schema.fbs index e8e14b112a771..f902b6bc1e56d 100644 --- a/format/Schema.fbs +++ b/format/Schema.fbs @@ -61,8 +61,8 @@ enum MetadataVersion:short { /// forward compatibility guarantees). /// 2. A means of negotiating between a client and server /// what features a stream is allowed to use. The enums -/// values here are intented to represent higher level -/// features, additional details maybe negotiated +/// values here are intended to represent higher level +/// features, additional details may be negotiated /// with key-value pairs specific to the protocol. /// /// Enums added to this list should be assigned power-of-two values @@ -421,7 +421,7 @@ table Interval { // An absolute length of time unrelated to any calendar artifacts. // // For the purposes of Arrow Implementations, adding this value to a Timestamp -// ("t1") naively (i.e. simply summing the two number) is acceptable even +// ("t1") naively (i.e. simply summing the two numbers) is acceptable even // though in some cases the resulting Timestamp (t2) would not account for // leap-seconds during the elapsed time between "t1" and "t2". Similarly, // representing the difference between two Unix timestamp is acceptable, but @@ -510,7 +510,7 @@ table DictionaryEncoding { /// nested type. table Field { - /// Name is not required, in i.e. a List + /// Name is not required (e.g., in a List) name: string; /// Whether or not this field can contain nulls. Should be true in general. diff --git a/java/.gitattributes b/java/.gitattributes deleted file mode 100644 index 366d3c2b3cdf6..0000000000000 --- a/java/.gitattributes +++ /dev/null @@ -1,3 +0,0 @@ -.gitattributes export-ignore -.gitignore export-ignore -* text=auto eol=lf diff --git a/java/.gitignore b/java/.gitignore deleted file mode 100644 index 63c90af7b7d4c..0000000000000 --- a/java/.gitignore +++ /dev/null @@ -1,23 +0,0 @@ -*.DS_Store -*.iml -*.lck -*.log -*.patch -*~ -.buildpath -.checkstyle -.classpath -.factorypath -.idea/ -.project -.settings/ -/build/ -/*-build/ -CMakeCache.txt -CMakeFiles/ -Makefile -TAGS -arrow-git.properties -cmake_install.cmake -install_manifest.txt -target/ diff --git a/java/.mvn/develocity.xml b/java/.mvn/develocity.xml deleted file mode 100644 index df3cbccd2b6cb..0000000000000 --- a/java/.mvn/develocity.xml +++ /dev/null @@ -1,45 +0,0 @@ - - - - - https://ge.apache.org - false - - - - true - true - true - - #{isFalse(env['CI'])} - true - true - - #{{'0.0.0.0'}} - - - - - false - - - diff --git a/java/.mvn/extensions.xml b/java/.mvn/extensions.xml deleted file mode 100644 index 0836fc47d0100..0000000000000 --- a/java/.mvn/extensions.xml +++ /dev/null @@ -1,33 +0,0 @@ - - - - - com.gradle - develocity-maven-extension - 1.22.2 - - - com.gradle - common-custom-user-data-maven-extension - 2.0.1 - - diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt deleted file mode 100644 index 8b29f37d80a1b..0000000000000 --- a/java/CMakeLists.txt +++ /dev/null @@ -1,105 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -cmake_minimum_required(VERSION 3.16) -message(STATUS "Building using CMake version: ${CMAKE_VERSION}") - -# find_package() uses _ROOT variables. -# https://cmake.org/cmake/help/latest/policy/CMP0074.html -if(POLICY CMP0074) - cmake_policy(SET CMP0074 NEW) -endif() - -project(arrow-java-jni) - -if("${CMAKE_CXX_STANDARD}" STREQUAL "") - set(CMAKE_CXX_STANDARD 17) -endif() -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -# Components -option(ARROW_JAVA_JNI_ENABLE_DEFAULT "Whether enable components by default or not" ON) -option(ARROW_JAVA_JNI_ENABLE_C "Enable C data interface" ${ARROW_JAVA_JNI_ENABLE_DEFAULT}) -option(ARROW_JAVA_JNI_ENABLE_DATASET "Enable dataset" ${ARROW_JAVA_JNI_ENABLE_DEFAULT}) -option(ARROW_JAVA_JNI_ENABLE_GANDIVA "Enable Gandiva" ${ARROW_JAVA_JNI_ENABLE_DEFAULT}) -option(ARROW_JAVA_JNI_ENABLE_ORC "Enable ORC" ${ARROW_JAVA_JNI_ENABLE_DEFAULT}) - -include(GNUInstallDirs) - -# ccache -option(ARROW_JAVA_JNI_USE_CCACHE "Use ccache when compiling (if available)" ON) -if(ARROW_USE_CCACHE - AND NOT CMAKE_C_COMPILER_LAUNCHER - AND NOT CMAKE_CXX_COMPILER_LAUNCHER) - find_program(CCACHE ccache) - if(CCACHE) - message(STATUS "Using ccache: ${CCACHE}") - set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE}) - set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE}) - # ARROW-3985: let ccache preserve C++ comments, because some of them may be - # meaningful to the compiler - set(ENV{CCACHE_COMMENTS} "1") - endif() -endif() - -# Build -find_package(Java REQUIRED) -find_package(JNI REQUIRED) - -include(UseJava) - -add_library(jni INTERFACE IMPORTED) -set_target_properties(jni PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${JNI_INCLUDE_DIRS}") - -include(CTest) -if(BUILD_TESTING) - find_package(ArrowTesting REQUIRED) - find_package(GTest REQUIRED) - add_library(arrow_java_test INTERFACE IMPORTED) - target_link_libraries(arrow_java_test INTERFACE ArrowTesting::arrow_testing_static - GTest::gtest_main) -endif() - -# The ARROW_JAVA_JNI_ARCH_DIR will automatically be derived the normalized -# operating system from system processor. The user can override this variable -# if auto-detection fails. -if("${ARROW_JAVA_JNI_ARCH_DIR}" STREQUAL "") - if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64") - set(ARROW_JAVA_JNI_ARCH_DIR "aarch_64") - elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "i386") - set(ARROW_JAVA_JNI_ARCH_DIR "x86_64") - elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") - set(ARROW_JAVA_JNI_ARCH_DIR "aarch_64") - elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "AMD64") - set(ARROW_JAVA_JNI_ARCH_DIR "x86_64") - else() - set(ARROW_JAVA_JNI_ARCH_DIR "${CMAKE_SYSTEM_PROCESSOR}") - endif() -endif() - -if(ARROW_JAVA_JNI_ENABLE_C) - add_subdirectory(c) -endif() -if(ARROW_JAVA_JNI_ENABLE_DATASET) - add_subdirectory(dataset) -endif() -if(ARROW_JAVA_JNI_ENABLE_GANDIVA) - add_subdirectory(gandiva) -endif() -if(ARROW_JAVA_JNI_ENABLE_ORC) - add_subdirectory(adapter/orc) -endif() diff --git a/java/README.md b/java/README.md deleted file mode 100644 index 9f1b1c63c8f41..0000000000000 --- a/java/README.md +++ /dev/null @@ -1,139 +0,0 @@ - - -# Arrow Java - -## Getting Started - -The following guides explain the fundamental data structures used in the Java implementation of Apache Arrow. - -- [ValueVector](https://arrow.apache.org/docs/java/vector.html) is an abstraction that is used to store a sequence of values having the same type in an individual column. -- [VectorSchemaRoot](https://arrow.apache.org/docs/java/vector_schema_root.html) is a container that can hold multiple vectors based on a schema. -- The [Reading/Writing IPC formats](https://arrow.apache.org/docs/java/ipc.html) guide explains how to stream record batches as well as serializing record batches to files. - -Generated javadoc documentation is available [here](https://arrow.apache.org/docs/java/). - -## Building from source - -Refer to [Building Apache Arrow](https://arrow.apache.org/docs/dev/developers/java/building.html) for documentation of environment setup and build instructions. - -## Flatbuffers dependency - -Arrow uses Google's Flatbuffers to transport metadata. The java version of the library -requires the generated flatbuffer classes can only be used with the same version that -generated them. Arrow packages a version of the arrow-vector module that shades flatbuffers -and arrow-format into a single JAR. Using the classifier "shade-format-flatbuffers" in your -`pom.xml` will make use of this JAR, you can then exclude/resolve the original dependency to -a version of your choosing. - -### Updating the flatbuffers generated code - -1. Verify that your version of flatc matches the declared dependency: - -```bash -$ flatc --version -flatc version 24.3.25 - -$ grep "dep.fbs.version" java/pom.xml - 24.3.25 -``` - -2. Generate the flatbuffer java files by performing the following: - -```bash -cd $ARROW_HOME - -# remove the existing files -rm -rf java/format/src - -# regenerate from the .fbs files -flatc --java -o java/format/src/main/java format/*.fbs - -# prepend license header -mvn spotless:apply -pl :arrow-format -``` - -## Performance Tuning - -There are several system/environmental variables that users can configure. These trade off safety (they turn off checking) for speed. Typically they are only used in production settings after the code has been thoroughly tested without using them. - -* Bounds Checking for memory accesses: Bounds checking is on by default. You can disable it by setting either the -system property(`arrow.enable_unsafe_memory_access`) or the environmental variable -(`ARROW_ENABLE_UNSAFE_MEMORY_ACCESS`) to `true`. When both the system property and the environmental -variable are set, the system property takes precedence. - -* null checking for gets: `ValueVector` get methods (not `getObject`) methods by default verify the slot is not null. You can disable it by setting either the -system property(`arrow.enable_null_check_for_get`) or the environmental variable -(`ARROW_ENABLE_NULL_CHECK_FOR_GET`) to `false`. When both the system property and the environmental -variable are set, the system property takes precedence. - -## Java Properties - - * `-Dio.netty.tryReflectionSetAccessible=true` should be set. -This fixes `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available`. thrown by Netty. - * To support duplicate fields in a `StructVector` enable `-Darrow.struct.conflict.policy=CONFLICT_APPEND`. -Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for -conflicting or duplicate fields set this JVM flag or use the correct static constructor methods for `StructVector`s. - -## Java Code Style Guide - -Arrow Java follows the Google style guide [here][3] with the following -differences: - -* Imports are grouped, from top to bottom, in this order: static imports, -standard Java, org.\*, com.\* -* Line length can be up to 120 characters -* Operators for line wrapping are at end-of-line -* Naming rules for methods, parameters, etc. have been relaxed -* Disabled `NoFinalizer`, `OverloadMethodsDeclarationOrder`, and -`VariableDeclarationUsageDistance` due to the existing code base. These rules -should be followed when possible. - -Refer to [checkstyle.xml](dev/checkstyle/checkstyle.xml) for rule specifics. - -## Test Logging Configuration - -When running tests, Arrow Java uses the Logback logger with SLF4J. By default, -it uses the `logback.xml` present in the corresponding module's `src/test/resources` -directory, which has the default log level set to `INFO`. -Arrow Java can be built with an alternate logback configuration file using the -following command run in the project root directory: - -```bash -mvn -Dlogback.configurationFile=file: -``` - -See [Logback Configuration][1] for more details. - -## Integration Tests - -Integration tests which require more time or more memory can be run by activating -the `integration-tests` profile. This activates the [maven failsafe][4] plugin -and any class prefixed with `IT` will be run during the testing phase. The integration -tests currently require a larger amount of memory (>4GB) and time to complete. To activate -the profile: - -```bash -mvn -Pintegration-tests -``` - -[1]: https://logback.qos.ch/manual/configuration.html -[2]: https://github.com/apache/arrow/blob/main/cpp/README.md -[3]: http://google.github.io/styleguide/javaguide.html -[4]: https://maven.apache.org/surefire/maven-failsafe-plugin/ diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml deleted file mode 100644 index 827d19f2a2060..0000000000000 --- a/java/adapter/avro/pom.xml +++ /dev/null @@ -1,73 +0,0 @@ - - - - 4.0.0 - - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - ../../pom.xml - - - arrow-avro - Arrow AVRO Adapter - (Contrib/Experimental) A library for converting Avro data to Arrow data. - http://maven.apache.org - - - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - arrow-memory-netty - runtime - - - org.apache.arrow - arrow-vector - - - org.immutables - value-annotations - - - org.apache.avro - avro - ${dep.avro.version} - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Werror - - - - - - diff --git a/java/adapter/avro/src/main/java/module-info.java b/java/adapter/avro/src/main/java/module-info.java deleted file mode 100644 index 5c6204be60e9c..0000000000000 --- a/java/adapter/avro/src/main/java/module-info.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.adapter.avro { - exports org.apache.arrow.adapter.avro.consumers; - exports org.apache.arrow.adapter.avro.consumers.logical; - exports org.apache.arrow.adapter.avro; - - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; - requires org.apache.avro; -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java deleted file mode 100644 index 2392c36f94cee..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import java.io.IOException; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.avro.Schema; -import org.apache.avro.io.Decoder; - -/** Utility class to convert Avro objects to columnar Arrow format objects. */ -public class AvroToArrow { - - /** - * Fetch the data from {@link Decoder} and convert it to Arrow objects. Only for testing purpose. - * - * @param schema avro schema. - * @param decoder avro decoder - * @param config configuration of the conversion. - * @return Arrow Data Objects {@link VectorSchemaRoot} - */ - static VectorSchemaRoot avroToArrow(Schema schema, Decoder decoder, AvroToArrowConfig config) - throws IOException { - Preconditions.checkNotNull(schema, "Avro schema object cannot be null"); - Preconditions.checkNotNull(decoder, "Avro decoder object cannot be null"); - Preconditions.checkNotNull(config, "config cannot be null"); - - return AvroToArrowUtils.avroToArrowVectors(schema, decoder, config); - } - - /** - * Fetch the data from {@link Decoder} and iteratively convert it to Arrow objects. - * - * @param schema avro schema - * @param decoder avro decoder - * @param config configuration of the conversion. - * @throws IOException on error - */ - public static AvroToArrowVectorIterator avroToArrowIterator( - Schema schema, Decoder decoder, AvroToArrowConfig config) throws IOException { - - Preconditions.checkNotNull(schema, "Avro schema object cannot be null"); - Preconditions.checkNotNull(decoder, "Avro decoder object cannot be null"); - Preconditions.checkNotNull(config, "config cannot be null"); - - return AvroToArrowVectorIterator.create(decoder, schema, config); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java deleted file mode 100644 index 290d1a77d956c..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import java.util.Set; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.dictionary.DictionaryProvider; - -/** This class configures the Avro-to-Arrow conversion process. */ -public class AvroToArrowConfig { - - private final BufferAllocator allocator; - /** - * The maximum rowCount to read each time when partially convert data. Default value is 1024 and - * -1 means read all data into one vector. - */ - private final int targetBatchSize; - - /** - * The dictionary provider used for enum type. If avro schema has enum type, will create - * dictionary and update this provider. - */ - private final DictionaryProvider.MapDictionaryProvider provider; - - /** The field names which to skip when reading decoder values. */ - private final Set skipFieldNames; - - /** - * Instantiate an instance. - * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param targetBatchSize The maximum rowCount to read each time when partially convert data. - * @param provider The dictionary provider used for enum type, adapter will update this provider. - * @param skipFieldNames Field names which to skip. - */ - AvroToArrowConfig( - BufferAllocator allocator, - int targetBatchSize, - DictionaryProvider.MapDictionaryProvider provider, - Set skipFieldNames) { - - Preconditions.checkArgument( - targetBatchSize == AvroToArrowVectorIterator.NO_LIMIT_BATCH_SIZE || targetBatchSize > 0, - "invalid targetBatchSize: %s", - targetBatchSize); - - this.allocator = allocator; - this.targetBatchSize = targetBatchSize; - this.provider = provider; - this.skipFieldNames = skipFieldNames; - } - - public BufferAllocator getAllocator() { - return allocator; - } - - public int getTargetBatchSize() { - return targetBatchSize; - } - - public DictionaryProvider.MapDictionaryProvider getProvider() { - return provider; - } - - public Set getSkipFieldNames() { - return skipFieldNames; - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java deleted file mode 100644 index 1fa176a7fea38..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import java.util.HashSet; -import java.util.Set; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.dictionary.DictionaryProvider; - -/** This class builds {@link AvroToArrowConfig}s. */ -public class AvroToArrowConfigBuilder { - - private BufferAllocator allocator; - - private int targetBatchSize; - - private DictionaryProvider.MapDictionaryProvider provider; - - private Set skipFieldNames; - - /** Default constructor for the {@link AvroToArrowConfigBuilder}. */ - public AvroToArrowConfigBuilder(BufferAllocator allocator) { - this.allocator = allocator; - this.targetBatchSize = AvroToArrowVectorIterator.DEFAULT_BATCH_SIZE; - this.provider = new DictionaryProvider.MapDictionaryProvider(); - this.skipFieldNames = new HashSet<>(); - } - - public AvroToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { - this.targetBatchSize = targetBatchSize; - return this; - } - - public AvroToArrowConfigBuilder setProvider(DictionaryProvider.MapDictionaryProvider provider) { - this.provider = provider; - return this; - } - - public AvroToArrowConfigBuilder setSkipFieldNames(Set skipFieldNames) { - this.skipFieldNames = skipFieldNames; - return this; - } - - /** This builds the {@link AvroToArrowConfig} from the provided params. */ - public AvroToArrowConfig build() { - return new AvroToArrowConfig(allocator, targetBatchSize, provider, skipFieldNames); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java deleted file mode 100644 index b39121cfd1ae7..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java +++ /dev/null @@ -1,827 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; -import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; - -import java.io.EOFException; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.arrow.adapter.avro.consumers.AvroArraysConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroBooleanConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroBytesConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroDoubleConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroEnumConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroFixedConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroFloatConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroIntConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroLongConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroMapConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroNullConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroStringConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroStructConsumer; -import org.apache.arrow.adapter.avro.consumers.AvroUnionsConsumer; -import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer; -import org.apache.arrow.adapter.avro.consumers.Consumer; -import org.apache.arrow.adapter.avro.consumers.SkipConsumer; -import org.apache.arrow.adapter.avro.consumers.SkipFunction; -import org.apache.arrow.adapter.avro.consumers.logical.AvroDateConsumer; -import org.apache.arrow.adapter.avro.consumers.logical.AvroDecimalConsumer; -import org.apache.arrow.adapter.avro.consumers.logical.AvroTimeMicroConsumer; -import org.apache.arrow.adapter.avro.consumers.logical.AvroTimeMillisConsumer; -import org.apache.arrow.adapter.avro.consumers.logical.AvroTimestampMicrosConsumer; -import org.apache.arrow.adapter.avro.consumers.logical.AvroTimestampMillisConsumer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.ValueVectorUtility; -import org.apache.avro.LogicalType; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.io.Decoder; - -/** - * Class that does most of the work to convert Avro data into Arrow columnar format Vector objects. - */ -public class AvroToArrowUtils { - - /** - * Creates a {@link Consumer} from the {@link Schema} - * - *

This method currently performs following type mapping for Avro data types to corresponding - * Arrow data types. - * - *

    - *
  • STRING --> ArrowType.Utf8 - *
  • INT --> ArrowType.Int(32, signed) - *
  • LONG --> ArrowType.Int(64, signed) - *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) - *
  • BOOLEAN --> ArrowType.Bool - *
  • BYTES --> ArrowType.Binary - *
  • ARRAY --> ArrowType.List - *
  • MAP --> ArrowType.Map - *
  • FIXED --> ArrowType.FixedSizeBinary - *
  • RECORD --> ArrowType.Struct - *
  • UNION --> ArrowType.Union - *
  • ENUM--> ArrowType.Int - *
  • DECIMAL --> ArrowType.Decimal - *
  • Date --> ArrowType.Date(DateUnit.DAY) - *
  • TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32) - *
  • TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64) - *
  • TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null) - *
  • TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null) - *
- */ - private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config) { - return createConsumer(schema, name, false, config, null); - } - - private static Consumer createConsumer( - Schema schema, String name, AvroToArrowConfig config, FieldVector vector) { - return createConsumer(schema, name, false, config, vector); - } - - /** - * Create a consumer with the given Avro schema. - * - * @param schema avro schema - * @param name arrow field name - * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via - * field. - * @return consumer - */ - private static Consumer createConsumer( - Schema schema, - String name, - boolean nullable, - AvroToArrowConfig config, - FieldVector consumerVector) { - - Preconditions.checkNotNull(schema, "Avro schema object can't be null"); - Preconditions.checkNotNull(config, "Config can't be null"); - - final BufferAllocator allocator = config.getAllocator(); - - final Schema.Type type = schema.getType(); - final LogicalType logicalType = schema.getLogicalType(); - - final ArrowType arrowType; - final FieldType fieldType; - final FieldVector vector; - final Consumer consumer; - - switch (type) { - case UNION: - consumer = createUnionConsumer(schema, name, config, consumerVector); - break; - case ARRAY: - consumer = createArrayConsumer(schema, name, config, consumerVector); - break; - case MAP: - consumer = createMapConsumer(schema, name, config, consumerVector); - break; - case RECORD: - consumer = createStructConsumer(schema, name, config, consumerVector); - break; - case ENUM: - consumer = createEnumConsumer(schema, name, config, consumerVector); - break; - case STRING: - arrowType = new ArrowType.Utf8(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroStringConsumer((VarCharVector) vector); - break; - case FIXED: - Map extProps = createExternalProps(schema); - if (logicalType instanceof LogicalTypes.Decimal) { - arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - fieldType = - new FieldType( - nullable, arrowType, /*dictionary=*/ null, getMetaData(schema, extProps)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = - new AvroDecimalConsumer.FixedDecimalConsumer( - (DecimalVector) vector, schema.getFixedSize()); - } else { - arrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize()); - fieldType = - new FieldType( - nullable, arrowType, /*dictionary=*/ null, getMetaData(schema, extProps)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroFixedConsumer((FixedSizeBinaryVector) vector, schema.getFixedSize()); - } - break; - case INT: - if (logicalType instanceof LogicalTypes.Date) { - arrowType = new ArrowType.Date(DateUnit.DAY); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroDateConsumer((DateDayVector) vector); - } else if (logicalType instanceof LogicalTypes.TimeMillis) { - arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroTimeMillisConsumer((TimeMilliVector) vector); - } else { - arrowType = new ArrowType.Int(32, /*isSigned=*/ true); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroIntConsumer((IntVector) vector); - } - break; - case BOOLEAN: - arrowType = new ArrowType.Bool(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroBooleanConsumer((BitVector) vector); - break; - case LONG: - if (logicalType instanceof LogicalTypes.TimeMicros) { - arrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroTimeMicroConsumer((TimeMicroVector) vector); - } else if (logicalType instanceof LogicalTypes.TimestampMillis) { - arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroTimestampMillisConsumer((TimeStampMilliVector) vector); - } else if (logicalType instanceof LogicalTypes.TimestampMicros) { - arrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroTimestampMicrosConsumer((TimeStampMicroVector) vector); - } else { - arrowType = new ArrowType.Int(64, /*isSigned=*/ true); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroLongConsumer((BigIntVector) vector); - } - break; - case FLOAT: - arrowType = new ArrowType.FloatingPoint(SINGLE); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroFloatConsumer((Float4Vector) vector); - break; - case DOUBLE: - arrowType = new ArrowType.FloatingPoint(DOUBLE); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroDoubleConsumer((Float8Vector) vector); - break; - case BYTES: - if (logicalType instanceof LogicalTypes.Decimal) { - arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroDecimalConsumer.BytesDecimalConsumer((DecimalVector) vector); - } else { - arrowType = new ArrowType.Binary(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroBytesConsumer((VarBinaryVector) vector); - } - break; - case NULL: - arrowType = new ArrowType.Null(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); - vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallBack=*/ null); - consumer = new AvroNullConsumer((NullVector) vector); - break; - default: - // no-op, shouldn't get here - throw new UnsupportedOperationException( - "Can't convert avro type %s to arrow type." + type.getName()); - } - return consumer; - } - - private static ArrowType createDecimalArrowType(LogicalTypes.Decimal logicalType) { - final int scale = logicalType.getScale(); - final int precision = logicalType.getPrecision(); - Preconditions.checkArgument( - precision > 0 && precision <= 38, "Precision must be in range of 1 to 38"); - Preconditions.checkArgument(scale >= 0 && scale <= 38, "Scale must be in range of 0 to 38."); - Preconditions.checkArgument( - scale <= precision, - "Invalid decimal scale: %s (greater than precision: %s)", - scale, - precision); - - return new ArrowType.Decimal(precision, scale, 128); - } - - private static Consumer createSkipConsumer(Schema schema) { - - SkipFunction skipFunction; - Schema.Type type = schema.getType(); - - switch (type) { - case UNION: - List unionDelegates = - schema.getTypes().stream().map(s -> createSkipConsumer(s)).collect(Collectors.toList()); - skipFunction = decoder -> unionDelegates.get(decoder.readInt()).consume(decoder); - - break; - case ARRAY: - Consumer elementDelegate = createSkipConsumer(schema.getElementType()); - skipFunction = - decoder -> { - for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) { - for (long j = 0; j < i; j++) { - elementDelegate.consume(decoder); - } - } - }; - break; - case MAP: - Consumer valueDelegate = createSkipConsumer(schema.getValueType()); - skipFunction = - decoder -> { - for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) { - for (long j = 0; j < i; j++) { - decoder.skipString(); // Discard key - valueDelegate.consume(decoder); - } - } - }; - break; - case RECORD: - List delegates = - schema.getFields().stream() - .map(field -> createSkipConsumer(field.schema())) - .collect(Collectors.toList()); - - skipFunction = - decoder -> { - for (Consumer consumer : delegates) { - consumer.consume(decoder); - } - }; - - break; - case ENUM: - skipFunction = decoder -> decoder.readEnum(); - break; - case STRING: - skipFunction = decoder -> decoder.skipString(); - break; - case FIXED: - skipFunction = decoder -> decoder.skipFixed(schema.getFixedSize()); - break; - case INT: - skipFunction = decoder -> decoder.readInt(); - break; - case BOOLEAN: - skipFunction = decoder -> decoder.skipFixed(1); - break; - case LONG: - skipFunction = decoder -> decoder.readLong(); - break; - case FLOAT: - skipFunction = decoder -> decoder.readFloat(); - break; - case DOUBLE: - skipFunction = decoder -> decoder.readDouble(); - break; - case BYTES: - skipFunction = decoder -> decoder.skipBytes(); - break; - case NULL: - skipFunction = decoder -> {}; - break; - default: - // no-op, shouldn't get here - throw new UnsupportedOperationException("Invalid avro type: " + type.getName()); - } - - return new SkipConsumer(skipFunction); - } - - static CompositeAvroConsumer createCompositeConsumer(Schema schema, AvroToArrowConfig config) { - - List consumers = new ArrayList<>(); - final Set skipFieldNames = config.getSkipFieldNames(); - - Schema.Type type = schema.getType(); - if (type == Schema.Type.RECORD) { - for (Schema.Field field : schema.getFields()) { - if (skipFieldNames.contains(field.name())) { - consumers.add(createSkipConsumer(field.schema())); - } else { - Consumer consumer = createConsumer(field.schema(), field.name(), config); - consumers.add(consumer); - } - } - } else { - Consumer consumer = createConsumer(schema, "", config); - consumers.add(consumer); - } - - return new CompositeAvroConsumer(consumers); - } - - private static FieldVector createVector( - FieldVector consumerVector, FieldType fieldType, String name, BufferAllocator allocator) { - return consumerVector != null - ? consumerVector - : fieldType.createNewSingleVector(name, allocator, null); - } - - private static String getDefaultFieldName(ArrowType type) { - Types.MinorType minorType = Types.getMinorTypeForArrowType(type); - return minorType.name().toLowerCase(Locale.ROOT); - } - - private static Field avroSchemaToField(Schema schema, String name, AvroToArrowConfig config) { - return avroSchemaToField(schema, name, config, null); - } - - private static Field avroSchemaToField( - Schema schema, String name, AvroToArrowConfig config, Map externalProps) { - - final Schema.Type type = schema.getType(); - final LogicalType logicalType = schema.getLogicalType(); - final List children = new ArrayList<>(); - final FieldType fieldType; - - switch (type) { - case UNION: - for (int i = 0; i < schema.getTypes().size(); i++) { - Schema childSchema = schema.getTypes().get(i); - // Union child vector should use default name - children.add(avroSchemaToField(childSchema, null, config)); - } - fieldType = - createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps); - break; - case ARRAY: - Schema elementSchema = schema.getElementType(); - children.add(avroSchemaToField(elementSchema, elementSchema.getName(), config)); - fieldType = createFieldType(new ArrowType.List(), schema, externalProps); - break; - case MAP: - // MapVector internal struct field and key field should be non-nullable - FieldType keyFieldType = - new FieldType(/*nullable=*/ false, new ArrowType.Utf8(), /*dictionary=*/ null); - Field keyField = new Field("key", keyFieldType, /*children=*/ null); - Field valueField = avroSchemaToField(schema.getValueType(), "value", config); - - FieldType structFieldType = - new FieldType(false, new ArrowType.Struct(), /*dictionary=*/ null); - Field structField = - new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); - children.add(structField); - fieldType = - createFieldType(new ArrowType.Map(/*keysSorted=*/ false), schema, externalProps); - break; - case RECORD: - final Set skipFieldNames = config.getSkipFieldNames(); - for (int i = 0; i < schema.getFields().size(); i++) { - final Schema.Field field = schema.getFields().get(i); - Schema childSchema = field.schema(); - String fullChildName = String.format("%s.%s", name, field.name()); - if (!skipFieldNames.contains(fullChildName)) { - final Map extProps = new HashMap<>(); - String doc = field.doc(); - Set aliases = field.aliases(); - if (doc != null) { - extProps.put("doc", doc); - } - if (aliases != null) { - extProps.put("aliases", convertAliases(aliases)); - } - children.add(avroSchemaToField(childSchema, fullChildName, config, extProps)); - } - } - fieldType = createFieldType(new ArrowType.Struct(), schema, externalProps); - break; - case ENUM: - DictionaryProvider.MapDictionaryProvider provider = config.getProvider(); - int current = provider.getDictionaryIds().size(); - int enumCount = schema.getEnumSymbols().size(); - ArrowType.Int indexType = DictionaryEncoder.getIndexType(enumCount); - - fieldType = - createFieldType( - indexType, - schema, - externalProps, - new DictionaryEncoding(current, /*ordered=*/ false, /*indexType=*/ indexType)); - break; - - case STRING: - fieldType = createFieldType(new ArrowType.Utf8(), schema, externalProps); - break; - case FIXED: - final ArrowType fixedArrowType; - if (logicalType instanceof LogicalTypes.Decimal) { - fixedArrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - } else { - fixedArrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize()); - } - fieldType = createFieldType(fixedArrowType, schema, externalProps); - break; - case INT: - final ArrowType intArrowType; - if (logicalType instanceof LogicalTypes.Date) { - intArrowType = new ArrowType.Date(DateUnit.DAY); - } else if (logicalType instanceof LogicalTypes.TimeMillis) { - intArrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - } else { - intArrowType = new ArrowType.Int(32, /*isSigned=*/ true); - } - fieldType = createFieldType(intArrowType, schema, externalProps); - break; - case BOOLEAN: - fieldType = createFieldType(new ArrowType.Bool(), schema, externalProps); - break; - case LONG: - final ArrowType longArrowType; - if (logicalType instanceof LogicalTypes.TimeMicros) { - longArrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64); - } else if (logicalType instanceof LogicalTypes.TimestampMillis) { - longArrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - } else if (logicalType instanceof LogicalTypes.TimestampMicros) { - longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); - } else { - longArrowType = new ArrowType.Int(64, /*isSigned=*/ true); - } - fieldType = createFieldType(longArrowType, schema, externalProps); - break; - case FLOAT: - fieldType = createFieldType(new ArrowType.FloatingPoint(SINGLE), schema, externalProps); - break; - case DOUBLE: - fieldType = createFieldType(new ArrowType.FloatingPoint(DOUBLE), schema, externalProps); - break; - case BYTES: - final ArrowType bytesArrowType; - if (logicalType instanceof LogicalTypes.Decimal) { - bytesArrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - } else { - bytesArrowType = new ArrowType.Binary(); - } - fieldType = createFieldType(bytesArrowType, schema, externalProps); - break; - case NULL: - fieldType = createFieldType(ArrowType.Null.INSTANCE, schema, externalProps); - break; - default: - // no-op, shouldn't get here - throw new UnsupportedOperationException(); - } - - if (name == null) { - name = getDefaultFieldName(fieldType.getType()); - } - return new Field(name, fieldType, children.size() == 0 ? null : children); - } - - private static Consumer createArrayConsumer( - Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { - - ListVector listVector; - if (consumerVector == null) { - final Field field = avroSchemaToField(schema, name, config); - listVector = (ListVector) field.createVector(config.getAllocator()); - } else { - listVector = (ListVector) consumerVector; - } - - FieldVector dataVector = listVector.getDataVector(); - - // create delegate - Schema childSchema = schema.getElementType(); - Consumer delegate = createConsumer(childSchema, childSchema.getName(), config, dataVector); - - return new AvroArraysConsumer(listVector, delegate); - } - - private static Consumer createStructConsumer( - Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { - - final Set skipFieldNames = config.getSkipFieldNames(); - - StructVector structVector; - if (consumerVector == null) { - final Field field = avroSchemaToField(schema, name, config, createExternalProps(schema)); - structVector = (StructVector) field.createVector(config.getAllocator()); - } else { - structVector = (StructVector) consumerVector; - } - - Consumer[] delegates = new Consumer[schema.getFields().size()]; - int vectorIndex = 0; - for (int i = 0; i < schema.getFields().size(); i++) { - Schema.Field childField = schema.getFields().get(i); - Consumer delegate; - // use full name to distinguish fields have same names between parent and child fields. - final String fullChildName = String.format("%s.%s", name, childField.name()); - if (skipFieldNames.contains(fullChildName)) { - delegate = createSkipConsumer(childField.schema()); - } else { - delegate = - createConsumer( - childField.schema(), - fullChildName, - config, - structVector.getChildrenFromFields().get(vectorIndex++)); - } - - delegates[i] = delegate; - } - - return new AvroStructConsumer(structVector, delegates); - } - - private static Consumer createEnumConsumer( - Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { - - BaseIntVector indexVector; - if (consumerVector == null) { - final Field field = avroSchemaToField(schema, name, config, createExternalProps(schema)); - indexVector = (BaseIntVector) field.createVector(config.getAllocator()); - } else { - indexVector = (BaseIntVector) consumerVector; - } - - final int valueCount = schema.getEnumSymbols().size(); - VarCharVector dictVector = new VarCharVector(name, config.getAllocator()); - dictVector.allocateNewSafe(); - dictVector.setValueCount(valueCount); - for (int i = 0; i < valueCount; i++) { - dictVector.set(i, schema.getEnumSymbols().get(i).getBytes(StandardCharsets.UTF_8)); - } - Dictionary dictionary = new Dictionary(dictVector, indexVector.getField().getDictionary()); - config.getProvider().put(dictionary); - - return new AvroEnumConsumer(indexVector); - } - - private static Consumer createMapConsumer( - Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { - - MapVector mapVector; - if (consumerVector == null) { - final Field field = avroSchemaToField(schema, name, config); - mapVector = (MapVector) field.createVector(config.getAllocator()); - } else { - mapVector = (MapVector) consumerVector; - } - - // create delegate struct consumer - StructVector structVector = (StructVector) mapVector.getDataVector(); - - // keys in avro map are always assumed to be strings. - Consumer keyConsumer = - new AvroStringConsumer((VarCharVector) structVector.getChildrenFromFields().get(0)); - Consumer valueConsumer = - createConsumer( - schema.getValueType(), - schema.getValueType().getName(), - config, - structVector.getChildrenFromFields().get(1)); - - AvroStructConsumer internalConsumer = - new AvroStructConsumer(structVector, new Consumer[] {keyConsumer, valueConsumer}); - - return new AvroMapConsumer(mapVector, internalConsumer); - } - - private static Consumer createUnionConsumer( - Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { - final int size = schema.getTypes().size(); - - final boolean nullable = - schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL); - - UnionVector unionVector; - if (consumerVector == null) { - final Field field = avroSchemaToField(schema, name, config); - unionVector = (UnionVector) field.createVector(config.getAllocator()); - } else { - unionVector = (UnionVector) consumerVector; - } - - List childVectors = unionVector.getChildrenFromFields(); - - Consumer[] delegates = new Consumer[size]; - Types.MinorType[] types = new Types.MinorType[size]; - - for (int i = 0; i < size; i++) { - FieldVector child = childVectors.get(i); - Schema subSchema = schema.getTypes().get(i); - Consumer delegate = createConsumer(subSchema, subSchema.getName(), nullable, config, child); - delegates[i] = delegate; - types[i] = child.getMinorType(); - } - return new AvroUnionsConsumer(unionVector, delegates, types); - } - - /** - * Read data from {@link Decoder} and generate a {@link VectorSchemaRoot}. - * - * @param schema avro schema - * @param decoder avro decoder to read data from - */ - static VectorSchemaRoot avroToArrowVectors( - Schema schema, Decoder decoder, AvroToArrowConfig config) throws IOException { - - List vectors = new ArrayList<>(); - List consumers = new ArrayList<>(); - final Set skipFieldNames = config.getSkipFieldNames(); - - Schema.Type type = schema.getType(); - if (type == Schema.Type.RECORD) { - for (Schema.Field field : schema.getFields()) { - if (skipFieldNames.contains(field.name())) { - consumers.add(createSkipConsumer(field.schema())); - } else { - Consumer consumer = createConsumer(field.schema(), field.name(), config); - consumers.add(consumer); - vectors.add(consumer.getVector()); - } - } - } else { - Consumer consumer = createConsumer(schema, "", config); - consumers.add(consumer); - vectors.add(consumer.getVector()); - } - - long validConsumerCount = consumers.stream().filter(c -> !c.skippable()).count(); - Preconditions.checkArgument( - vectors.size() == validConsumerCount, "vectors size not equals consumers size."); - - List fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList()); - - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0); - - CompositeAvroConsumer compositeConsumer = new CompositeAvroConsumer(consumers); - - int valueCount = 0; - try { - while (true) { - ValueVectorUtility.ensureCapacity(root, valueCount + 1); - compositeConsumer.consume(decoder); - valueCount++; - } - } catch (EOFException eof) { - // reach the end of encoder stream. - root.setRowCount(valueCount); - } catch (Exception e) { - compositeConsumer.close(); - throw new UnsupportedOperationException("Error occurs while consume process.", e); - } - - return root; - } - - private static Map getMetaData(Schema schema) { - Map metadata = new HashMap<>(); - schema.getObjectProps().forEach((k, v) -> metadata.put(k, v.toString())); - return metadata; - } - - private static Map getMetaData(Schema schema, Map externalProps) { - Map metadata = getMetaData(schema); - if (externalProps != null) { - metadata.putAll(externalProps); - } - return metadata; - } - - /** Parse avro attributes and convert them to metadata. */ - private static Map createExternalProps(Schema schema) { - final Map extProps = new HashMap<>(); - String doc = schema.getDoc(); - Set aliases = schema.getAliases(); - if (doc != null) { - extProps.put("doc", doc); - } - if (aliases != null) { - extProps.put("aliases", convertAliases(aliases)); - } - return extProps; - } - - private static FieldType createFieldType( - ArrowType arrowType, Schema schema, Map externalProps) { - return createFieldType(arrowType, schema, externalProps, /*dictionary=*/ null); - } - - private static FieldType createFieldType( - ArrowType arrowType, - Schema schema, - Map externalProps, - DictionaryEncoding dictionary) { - - return new FieldType( - /*nullable=*/ false, arrowType, dictionary, getMetaData(schema, externalProps)); - } - - private static String convertAliases(Set aliases) { - JsonStringArrayList jsonList = new JsonStringArrayList(); - aliases.stream().forEach(a -> jsonList.add(a)); - return jsonList.toString(); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java deleted file mode 100644 index 4123370061794..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import java.io.EOFException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.ValueVectorUtility; -import org.apache.avro.Schema; -import org.apache.avro.io.Decoder; - -/** VectorSchemaRoot iterator for partially converting avro data. */ -public class AvroToArrowVectorIterator implements Iterator, AutoCloseable { - - public static final int NO_LIMIT_BATCH_SIZE = -1; - public static final int DEFAULT_BATCH_SIZE = 1024; - - private final Decoder decoder; - private final Schema schema; - - private final AvroToArrowConfig config; - - private CompositeAvroConsumer compositeConsumer; - - private org.apache.arrow.vector.types.pojo.Schema rootSchema; - - private VectorSchemaRoot nextBatch; - - private final int targetBatchSize; - - /** Construct an instance. */ - private AvroToArrowVectorIterator(Decoder decoder, Schema schema, AvroToArrowConfig config) { - - this.decoder = decoder; - this.schema = schema; - this.config = config; - this.targetBatchSize = config.getTargetBatchSize(); - } - - /** Create a ArrowVectorIterator to partially convert data. */ - public static AvroToArrowVectorIterator create( - Decoder decoder, Schema schema, AvroToArrowConfig config) { - - AvroToArrowVectorIterator iterator = new AvroToArrowVectorIterator(decoder, schema, config); - try { - iterator.initialize(); - return iterator; - } catch (Exception e) { - iterator.close(); - throw new RuntimeException("Error occurs while creating iterator.", e); - } - } - - private void initialize() { - // create consumers - compositeConsumer = AvroToArrowUtils.createCompositeConsumer(schema, config); - List vectors = new ArrayList<>(); - compositeConsumer.getConsumers().forEach(c -> vectors.add(c.getVector())); - List fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList()); - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0); - rootSchema = root.getSchema(); - - load(root); - } - - private void consumeData(VectorSchemaRoot root) { - int readRowCount = 0; - try { - while ((targetBatchSize == NO_LIMIT_BATCH_SIZE || readRowCount < targetBatchSize)) { - compositeConsumer.consume(decoder); - readRowCount++; - } - - if (targetBatchSize == NO_LIMIT_BATCH_SIZE) { - while (true) { - ValueVectorUtility.ensureCapacity(root, readRowCount + 1); - compositeConsumer.consume(decoder); - readRowCount++; - } - } else { - while (readRowCount < targetBatchSize) { - compositeConsumer.consume(decoder); - readRowCount++; - } - } - - root.setRowCount(readRowCount); - } catch (EOFException eof) { - // reach the end of encoder stream. - root.setRowCount(readRowCount); - } catch (Exception e) { - compositeConsumer.close(); - throw new RuntimeException("Error occurs while consuming data.", e); - } - } - - // Loads the next schema root or null if no more rows are available. - private void load(VectorSchemaRoot root) { - final int targetBatchSize = config.getTargetBatchSize(); - if (targetBatchSize != NO_LIMIT_BATCH_SIZE) { - ValueVectorUtility.preAllocate(root, targetBatchSize); - } - - long validConsumerCount = - compositeConsumer.getConsumers().stream().filter(c -> !c.skippable()).count(); - Preconditions.checkArgument( - root.getFieldVectors().size() == validConsumerCount, - "Schema root vectors size not equals to consumers size."); - - compositeConsumer.resetConsumerVectors(root); - - // consume data - consumeData(root); - - if (root.getRowCount() == 0) { - root.close(); - nextBatch = null; - } else { - nextBatch = root; - } - } - - @Override - public boolean hasNext() { - return nextBatch != null; - } - - /** Gets the next vector. The user is responsible for freeing its resources. */ - @Override - public VectorSchemaRoot next() { - Preconditions.checkArgument(hasNext()); - VectorSchemaRoot returned = nextBatch; - try { - load(VectorSchemaRoot.create(rootSchema, config.getAllocator())); - } catch (Exception e) { - returned.close(); - throw new RuntimeException("Error occurs while getting next schema root.", e); - } - return returned; - } - - /** Clean up resources. */ - @Override - public void close() { - if (nextBatch != null) { - nextBatch.close(); - } - compositeConsumer.close(); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java deleted file mode 100644 index 4555ce7a295f7..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume array type values from avro decoder. Write the data to {@link ListVector}. - */ -public class AvroArraysConsumer extends BaseAvroConsumer { - - private final Consumer delegate; - - /** Instantiate a ArrayConsumer. */ - public AvroArraysConsumer(ListVector vector, Consumer delegate) { - super(vector); - this.delegate = delegate; - } - - @Override - public void consume(Decoder decoder) throws IOException { - - vector.startNewValue(currentIndex); - long totalCount = 0; - for (long count = decoder.readArrayStart(); count != 0; count = decoder.arrayNext()) { - totalCount += count; - ensureInnerVectorCapacity(totalCount); - for (int element = 0; element < count; element++) { - delegate.consume(decoder); - } - } - vector.endValue(currentIndex, (int) totalCount); - currentIndex++; - } - - @Override - public void close() throws Exception { - super.close(); - delegate.close(); - } - - @Override - public boolean resetValueVector(ListVector vector) { - this.delegate.resetValueVector(vector.getDataVector()); - return super.resetValueVector(vector); - } - - void ensureInnerVectorCapacity(long targetCapacity) { - while (vector.getDataVector().getValueCapacity() < targetCapacity) { - vector.getDataVector().reAlloc(); - } - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java deleted file mode 100644 index 09eb5f3b255d5..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.BitVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume boolean type values from avro decoder. Write the data to {@link - * BitVector}. - */ -public class AvroBooleanConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroBooleanConsumer. */ - public AvroBooleanConsumer(BitVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex, decoder.readBoolean() ? 1 : 0); - currentIndex++; - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java deleted file mode 100644 index 86b6cbb13d881..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import java.nio.ByteBuffer; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume bytes type values from avro decoder. Write the data to {@link - * VarBinaryVector}. - */ -public class AvroBytesConsumer extends BaseAvroConsumer { - - private ByteBuffer cacheBuffer; - - /** Instantiate a AvroBytesConsumer. */ - public AvroBytesConsumer(VarBinaryVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - // cacheBuffer is initialized null and create in the first consume, - // if its capacity < size to read, decoder will create a new one with new capacity. - cacheBuffer = decoder.readBytes(cacheBuffer); - vector.setSafe(currentIndex, cacheBuffer, 0, cacheBuffer.limit()); - currentIndex++; - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java deleted file mode 100644 index 011cbccc09c5b..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.Float8Vector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume double type values from avro decoder. Write the data to {@link - * Float8Vector}. - */ -public class AvroDoubleConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroDoubleConsumer. */ - public AvroDoubleConsumer(Float8Vector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readDouble()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java deleted file mode 100644 index f47988fb962a1..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.IntVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume enum type values from avro decoder. Write the data to {@link IntVector}. - */ -public class AvroEnumConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroEnumConsumer. */ - public AvroEnumConsumer(BaseIntVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.setWithPossibleTruncate(currentIndex++, decoder.readEnum()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java deleted file mode 100644 index 6b78afd3c95d4..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume fixed type values from avro decoder. Write the data to {@link - * org.apache.arrow.vector.FixedSizeBinaryVector}. - */ -public class AvroFixedConsumer extends BaseAvroConsumer { - - private final byte[] reuseBytes; - - /** Instantiate a AvroFixedConsumer. */ - public AvroFixedConsumer(FixedSizeBinaryVector vector, int size) { - super(vector); - reuseBytes = new byte[size]; - } - - @Override - public void consume(Decoder decoder) throws IOException { - decoder.readFixed(reuseBytes); - vector.setSafe(currentIndex++, reuseBytes); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java deleted file mode 100644 index 2c6d4aa5a05f6..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.Float4Vector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume float type values from avro decoder. Write the data to {@link - * Float4Vector}. - */ -public class AvroFloatConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroFloatConsumer. */ - public AvroFloatConsumer(Float4Vector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readFloat()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java deleted file mode 100644 index 22c7b10aa65f7..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.IntVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume int type values from avro decoder. Write the data to {@link IntVector}. - */ -public class AvroIntConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroIntConsumer. */ - public AvroIntConsumer(IntVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readInt()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java deleted file mode 100644 index 90c5313417d7c..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.BigIntVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume long type values from avro decoder. Write the data to {@link - * BigIntVector}. - */ -public class AvroLongConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroLongConsumer. */ - public AvroLongConsumer(BigIntVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readLong()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java deleted file mode 100644 index 543471533ec01..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume map type values from avro decoder. Write the data to {@link MapVector}. - */ -public class AvroMapConsumer extends BaseAvroConsumer { - - private final Consumer delegate; - - /** Instantiate a AvroMapConsumer. */ - public AvroMapConsumer(MapVector vector, Consumer delegate) { - super(vector); - this.delegate = delegate; - } - - @Override - public void consume(Decoder decoder) throws IOException { - - vector.startNewValue(currentIndex); - long totalCount = 0; - for (long count = decoder.readMapStart(); count != 0; count = decoder.mapNext()) { - totalCount += count; - ensureInnerVectorCapacity(totalCount); - for (int element = 0; element < count; element++) { - delegate.consume(decoder); - } - } - vector.endValue(currentIndex, (int) totalCount); - currentIndex++; - } - - @Override - public void close() throws Exception { - super.close(); - delegate.close(); - } - - @Override - public boolean resetValueVector(MapVector vector) { - this.delegate.resetValueVector(vector.getDataVector()); - return super.resetValueVector(vector); - } - - void ensureInnerVectorCapacity(long targetCapacity) { - StructVector innerVector = (StructVector) vector.getDataVector(); - for (FieldVector v : innerVector.getChildrenFromFields()) { - while (v.getValueCapacity() < targetCapacity) { - v.reAlloc(); - } - } - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java deleted file mode 100644 index 0f80c2b7b2db3..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.NullVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume null type values from avro decoder. Corresponding to {@link - * org.apache.arrow.vector.NullVector}. - */ -public class AvroNullConsumer extends BaseAvroConsumer { - - public AvroNullConsumer(NullVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - currentIndex++; - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java deleted file mode 100644 index 164d595e9c6ac..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import java.nio.ByteBuffer; -import org.apache.arrow.vector.VarCharVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume string type values from avro decoder. Write the data to {@link - * VarCharVector}. - */ -public class AvroStringConsumer extends BaseAvroConsumer { - - private ByteBuffer cacheBuffer; - - /** Instantiate a AvroStringConsumer. */ - public AvroStringConsumer(VarCharVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - // cacheBuffer is initialized null and create in the first consume, - // if its capacity < size to read, decoder will create a new one with new capacity. - cacheBuffer = decoder.readBytes(cacheBuffer); - vector.setSafe(currentIndex++, cacheBuffer, 0, cacheBuffer.limit()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java deleted file mode 100644 index 94c2f611e84b7..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume nested record type values from avro decoder. Write the data to {@link - * org.apache.arrow.vector.complex.StructVector}. - */ -public class AvroStructConsumer extends BaseAvroConsumer { - - private final Consumer[] delegates; - - /** Instantiate a AvroStructConsumer. */ - public AvroStructConsumer(StructVector vector, Consumer[] delegates) { - super(vector); - this.delegates = delegates; - } - - @Override - public void consume(Decoder decoder) throws IOException { - - ensureInnerVectorCapacity(currentIndex + 1); - for (int i = 0; i < delegates.length; i++) { - delegates[i].consume(decoder); - } - vector.setIndexDefined(currentIndex); - currentIndex++; - } - - @Override - public void close() throws Exception { - super.close(); - AutoCloseables.close(delegates); - } - - @Override - public boolean resetValueVector(StructVector vector) { - for (int i = 0; i < delegates.length; i++) { - delegates[i].resetValueVector(vector.getChildrenFromFields().get(i)); - } - return super.resetValueVector(vector); - } - - void ensureInnerVectorCapacity(long targetCapacity) { - for (FieldVector v : vector.getChildrenFromFields()) { - while (v.getValueCapacity() < targetCapacity) { - v.reAlloc(); - } - } - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java deleted file mode 100644 index 5a8e23e62892c..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.Types; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume unions type values from avro decoder. Write the data to {@link - * org.apache.arrow.vector.complex.UnionVector}. - */ -public class AvroUnionsConsumer extends BaseAvroConsumer { - - private Consumer[] delegates; - private Types.MinorType[] types; - - /** Instantiate an AvroUnionConsumer. */ - public AvroUnionsConsumer(UnionVector vector, Consumer[] delegates, Types.MinorType[] types) { - - super(vector); - this.delegates = delegates; - this.types = types; - } - - @Override - public void consume(Decoder decoder) throws IOException { - int fieldIndex = decoder.readInt(); - - ensureInnerVectorCapacity(currentIndex + 1, fieldIndex); - Consumer delegate = delegates[fieldIndex]; - - vector.setType(currentIndex, types[fieldIndex]); - // In UnionVector we need to set sub vector writer position before consume a value - // because in the previous iterations we might not have written to the specific union sub - // vector. - delegate.setPosition(currentIndex); - delegate.consume(decoder); - - currentIndex++; - } - - @Override - public void close() throws Exception { - super.close(); - AutoCloseables.close(delegates); - } - - @Override - public boolean resetValueVector(UnionVector vector) { - for (int i = 0; i < delegates.length; i++) { - delegates[i].resetValueVector(vector.getChildrenFromFields().get(i)); - } - return super.resetValueVector(vector); - } - - void ensureInnerVectorCapacity(long targetCapacity, int fieldIndex) { - ValueVector fieldVector = vector.getChildrenFromFields().get(fieldIndex); - if (fieldVector.getMinorType() == Types.MinorType.NULL) { - return; - } - while (fieldVector.getValueCapacity() < targetCapacity) { - fieldVector.reAlloc(); - } - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java deleted file mode 100644 index 9430d83cb4372..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import org.apache.arrow.vector.FieldVector; - -/** - * Base class for non-skippable avro consumers. - * - * @param vector type. - */ -public abstract class BaseAvroConsumer implements Consumer { - - protected T vector; - protected int currentIndex; - - /** - * Constructs a base avro consumer. - * - * @param vector the vector to consume. - */ - public BaseAvroConsumer(T vector) { - this.vector = vector; - } - - @Override - public void addNull() { - currentIndex++; - } - - @Override - public void setPosition(int index) { - currentIndex = index; - } - - @Override - public FieldVector getVector() { - return vector; - } - - @Override - public void close() throws Exception { - vector.close(); - } - - @Override - public boolean resetValueVector(T vector) { - this.vector = vector; - this.currentIndex = 0; - return true; - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java deleted file mode 100644 index 11c1f7712ef19..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import java.util.List; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.avro.io.Decoder; - -/** Composite consumer which hold all consumers. It manages the consume and cleanup process. */ -public class CompositeAvroConsumer implements AutoCloseable { - - private final List consumers; - - public List getConsumers() { - return consumers; - } - - public CompositeAvroConsumer(List consumers) { - this.consumers = consumers; - } - - /** Consume decoder data. */ - public void consume(Decoder decoder) throws IOException { - for (Consumer consumer : consumers) { - consumer.consume(decoder); - } - } - - /** Reset vector of consumers with the given {@link VectorSchemaRoot}. */ - public void resetConsumerVectors(VectorSchemaRoot root) { - int index = 0; - for (Consumer consumer : consumers) { - if (consumer.resetValueVector(root.getFieldVectors().get(index))) { - index++; - } - } - } - - @Override - public void close() { - // clean up - try { - AutoCloseables.close(consumers); - } catch (Exception e) { - throw new RuntimeException("Error occurs in close.", e); - } - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java deleted file mode 100644 index 0c07f90bf5f39..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.FieldVector; -import org.apache.avro.io.Decoder; - -/** - * Interface that is used to consume values from avro decoder. - * - * @param The vector within consumer or its delegate, used for partially consume purpose. - */ -public interface Consumer extends AutoCloseable { - - /** - * Consume a specific type value from avro decoder and write it to vector. - * - * @param decoder avro decoder to read data - * @throws IOException on error - */ - void consume(Decoder decoder) throws IOException; - - /** Add null value to vector by making writer position + 1. */ - void addNull(); - - /** Set the position to write value into vector. */ - void setPosition(int index); - - /** Get the vector within the consumer. */ - FieldVector getVector(); - - /** Close this consumer when occurs exception to avoid potential leak. */ - @Override - void close() throws Exception; - - /** - * Reset the vector within consumer for partial read purpose. - * - * @return true if reset is successful, false if reset is not needed. - */ - boolean resetValueVector(T vector); - - /** Indicates whether the consumer is type of {@link SkipConsumer}. */ - default boolean skippable() { - return false; - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java deleted file mode 100644 index 2c104728ce620..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.arrow.vector.FieldVector; -import org.apache.avro.io.Decoder; - -/** Consumer which skip (throw away) data from the decoder. */ -public class SkipConsumer implements Consumer { - - private final SkipFunction skipFunction; - - public SkipConsumer(SkipFunction skipFunction) { - this.skipFunction = skipFunction; - } - - @Override - public void consume(Decoder decoder) throws IOException { - skipFunction.apply(decoder); - } - - @Override - public void addNull() {} - - @Override - public void setPosition(int index) {} - - @Override - public FieldVector getVector() { - return null; - } - - @Override - public void close() throws Exception {} - - @Override - public boolean resetValueVector(FieldVector vector) { - return false; - } - - @Override - public boolean skippable() { - return true; - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java deleted file mode 100644 index 3d72d03104f3c..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers; - -import java.io.IOException; -import org.apache.avro.io.Decoder; - -/** Adapter function to skip (throw away) data from the decoder. */ -@FunctionalInterface -public interface SkipFunction { - void apply(Decoder decoder) throws IOException; -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java deleted file mode 100644 index 0f557297a3cb7..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers.logical; - -import java.io.IOException; -import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; -import org.apache.arrow.vector.DateDayVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume date type values from avro decoder. Write the data to {@link - * DateDayVector}. - */ -public class AvroDateConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroDateConsumer. */ - public AvroDateConsumer(DateDayVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readInt()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java deleted file mode 100644 index fa1a12ac8a6ed..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers.logical; - -import java.io.IOException; -import java.nio.ByteBuffer; -import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.DecimalVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume decimal type values from avro decoder. Write the data to {@link - * DecimalVector}. - */ -public abstract class AvroDecimalConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroDecimalConsumer. */ - public AvroDecimalConsumer(DecimalVector vector) { - super(vector); - } - - /** Consumer for decimal logical type with original bytes type. */ - public static class BytesDecimalConsumer extends AvroDecimalConsumer { - - private ByteBuffer cacheBuffer; - - /** Instantiate a BytesDecimalConsumer. */ - public BytesDecimalConsumer(DecimalVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - cacheBuffer = decoder.readBytes(cacheBuffer); - byte[] bytes = new byte[cacheBuffer.limit()]; - Preconditions.checkArgument(bytes.length <= 16, "Decimal bytes length should <= 16."); - cacheBuffer.get(bytes); - vector.setBigEndian(currentIndex++, bytes); - } - } - - /** Consumer for decimal logical type with original fixed type. */ - public static class FixedDecimalConsumer extends AvroDecimalConsumer { - - private byte[] reuseBytes; - - /** Instantiate a FixedDecimalConsumer. */ - public FixedDecimalConsumer(DecimalVector vector, int size) { - super(vector); - Preconditions.checkArgument(size <= 16, "Decimal bytes length should <= 16."); - reuseBytes = new byte[size]; - } - - @Override - public void consume(Decoder decoder) throws IOException { - decoder.readFixed(reuseBytes); - vector.setBigEndian(currentIndex++, reuseBytes); - } - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java deleted file mode 100644 index 60e7d15bf16d6..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers.logical; - -import java.io.IOException; -import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume date time-micro values from avro decoder. Write the data to {@link - * TimeMicroVector}. - */ -public class AvroTimeMicroConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroTimeMicroConsumer. */ - public AvroTimeMicroConsumer(TimeMicroVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readLong()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java deleted file mode 100644 index e0b232e9abd5e..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers.logical; - -import java.io.IOException; -import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume date time-millis values from avro decoder. Write the data to {@link - * TimeMilliVector}. - */ -public class AvroTimeMillisConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroTimeMilliConsumer. */ - public AvroTimeMillisConsumer(TimeMilliVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readInt()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java deleted file mode 100644 index 88acf7b329569..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers.logical; - -import java.io.IOException; -import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume date timestamp-micro values from avro decoder. Write the data to {@link - * TimeStampMicroVector}. - */ -public class AvroTimestampMicrosConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroTimestampMicroConsumer. */ - public AvroTimestampMicrosConsumer(TimeStampMicroVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readLong()); - } -} diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java deleted file mode 100644 index ec50d7902319c..0000000000000 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro.consumers.logical; - -import java.io.IOException; -import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.avro.io.Decoder; - -/** - * Consumer which consume date timestamp-millis values from avro decoder. Write the data to {@link - * TimeStampMilliVector}. - */ -public class AvroTimestampMillisConsumer extends BaseAvroConsumer { - - /** Instantiate a AvroTimestampMillisConsumer. */ - public AvroTimestampMillisConsumer(TimeStampMilliVector vector) { - super(vector); - } - - @Override - public void consume(Decoder decoder) throws IOException { - vector.set(currentIndex++, decoder.readLong()); - } -} diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java deleted file mode 100644 index 173cc855b1eb1..0000000000000 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.util.DateUtility; -import org.apache.avro.Conversions; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericFixed; -import org.junit.jupiter.api.Test; - -public class AvroLogicalTypesTest extends AvroTestBase { - - @Test - public void testTimestampMicros() throws Exception { - Schema schema = getSchema("logical/test_timestamp_micros.avsc"); - - List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - List expected = - Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMicro(10000), - DateUtility.getLocalDateTimeFromEpochMicro(20000), - DateUtility.getLocalDateTimeFromEpochMicro(30000), - DateUtility.getLocalDateTimeFromEpochMicro(40000), - DateUtility.getLocalDateTimeFromEpochMicro(50000)); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(expected, vector); - } - - @Test - public void testTimestampMillis() throws Exception { - Schema schema = getSchema("logical/test_timestamp_millis.avsc"); - - List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - List expected = - Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMilli(10000), - DateUtility.getLocalDateTimeFromEpochMilli(20000), - DateUtility.getLocalDateTimeFromEpochMilli(30000), - DateUtility.getLocalDateTimeFromEpochMilli(40000), - DateUtility.getLocalDateTimeFromEpochMilli(50000)); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(expected, vector); - } - - @Test - public void testTimeMicros() throws Exception { - Schema schema = getSchema("logical/test_time_micros.avsc"); - - List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testTimeMillis() throws Exception { - Schema schema = getSchema("logical/test_time_millis.avsc"); - - List data = Arrays.asList(100, 200, 300, 400, 500); - List expected = - Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMilli(100), - DateUtility.getLocalDateTimeFromEpochMilli(200), - DateUtility.getLocalDateTimeFromEpochMilli(300), - DateUtility.getLocalDateTimeFromEpochMilli(400), - DateUtility.getLocalDateTimeFromEpochMilli(500)); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(expected, vector); - } - - @Test - public void testDate() throws Exception { - Schema schema = getSchema("logical/test_date.avsc"); - - List data = Arrays.asList(100, 200, 300, 400, 500); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testDecimalWithOriginalBytes() throws Exception { - Schema schema = getSchema("logical/test_decimal_with_original_bytes.avsc"); - List data = new ArrayList<>(); - List expected = new ArrayList<>(); - - Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); - - for (int i = 0; i < 5; i++) { - BigDecimal value = new BigDecimal(i * i).setScale(2); - ByteBuffer buffer = conversion.toBytes(value, schema, schema.getLogicalType()); - data.add(buffer); - expected.add(value); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - checkPrimitiveResult(expected, vector); - } - - @Test - public void testDecimalWithOriginalFixed() throws Exception { - Schema schema = getSchema("logical/test_decimal_with_original_fixed.avsc"); - - List data = new ArrayList<>(); - List expected = new ArrayList<>(); - - Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); - - for (int i = 0; i < 5; i++) { - BigDecimal value = new BigDecimal(i * i).setScale(2); - GenericFixed fixed = conversion.toFixed(value, schema, schema.getLogicalType()); - data.add(fixed); - expected.add(value); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - checkPrimitiveResult(expected, vector); - } - - @Test - public void testInvalidDecimalPrecision() throws Exception { - Schema schema = getSchema("logical/test_decimal_invalid1.avsc"); - List data = new ArrayList<>(); - - Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); - - for (int i = 0; i < 5; i++) { - BigDecimal value = new BigDecimal(i * i).setScale(2); - ByteBuffer buffer = conversion.toBytes(value, schema, schema.getLogicalType()); - data.add(buffer); - } - - IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> writeAndRead(schema, data)); - assertTrue(e.getMessage().contains("Precision must be in range of 1 to 38")); - } - - @Test - public void testFailedToCreateDecimalLogicalType() throws Exception { - // For decimal logical type, if avro validate schema failed, it will not create logical type, - // and the schema will be treated as its original type. - - // java.lang.IllegalArgumentException: Invalid decimal scale: -1 (must be positive) - Schema schema1 = getSchema("logical/test_decimal_invalid2.avsc"); - assertNull(schema1.getLogicalType()); - - // java.lang.IllegalArgumentException: Invalid decimal scale: 40 (greater than precision: 20) - Schema schema2 = getSchema("logical/test_decimal_invalid3.avsc"); - assertNull(schema2.getLogicalType()); - - // java.lang.IllegalArgumentException: fixed(1) cannot store 30 digits (max 2) - Schema schema3 = getSchema("logical/test_decimal_invalid4.avsc"); - assertNull(schema3.getLogicalType()); - } -} diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java deleted file mode 100644 index 3120c26638c6d..0000000000000 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java +++ /dev/null @@ -1,685 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Set; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.types.Types; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericRecord; -import org.junit.jupiter.api.Test; - -public class AvroSkipFieldTest extends AvroTestBase { - - @Test - public void testSkipUnionWithOneField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f0"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_union_before.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_union_one_field_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, i % 2 == 0 ? "test" + i : null); - record.put(2, i % 2 == 0 ? "test" + i : i); - record.put(3, i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(1)); - expectedRecord.put(1, record.get(2)); - expectedRecord.put(2, record.get(3)); - expectedData.add(expectedRecord); - } - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipUnionWithNullableOneField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f1"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_union_before.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_union_nullable_field_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, i % 2 == 0 ? "test" + i : null); - record.put(2, i % 2 == 0 ? "test" + i : i); - record.put(3, i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedRecord.put(1, record.get(2)); - expectedRecord.put(2, record.get(3)); - expectedData.add(expectedRecord); - } - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipUnionWithMultiFields() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f2"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_union_before.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_union_multi_fields_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, i % 2 == 0 ? "test" + i : null); - record.put(2, i % 2 == 0 ? "test" + i : i); - record.put(3, i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedRecord.put(1, record.get(1)); - expectedRecord.put(2, record.get(3)); - expectedData.add(expectedRecord); - } - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipMapField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f1"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_map_before.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_map_expected.avsc"); - - HashMap map = new HashMap(); - map.put("key1", "value1"); - map.put("key2", "value3"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, map); - record.put(2, i % 2 == 0); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedRecord.put(1, record.get(2)); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipArrayField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f1"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_array_before.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_array_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, Arrays.asList("test" + i, "test" + i)); - record.put(2, i % 2 == 0); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedRecord.put(1, record.get(2)); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipMultiFields() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f1"); - skipFieldNames.add("f2"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("test_record.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_multi_fields_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, i); - record.put(2, i % 2 == 0); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipStringField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f2"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base1.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_string_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); - GenericRecord record = new GenericData.Record(schema); - GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); - fixed.bytes(testBytes); - record.put(0, fixed); - GenericData.EnumSymbol symbol = - new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); - record.put(1, symbol); - record.put(2, "testtest" + i); - record.put(3, ByteBuffer.wrap(testBytes)); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, testBytes); - expectedRecord.put(1, (byte) i % 2); - expectedRecord.put(2, testBytes); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipBytesField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f3"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base1.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_bytes_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); - GenericRecord record = new GenericData.Record(schema); - GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); - fixed.bytes(testBytes); - record.put(0, fixed); - GenericData.EnumSymbol symbol = - new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); - record.put(1, symbol); - record.put(2, "testtest" + i); - record.put(3, ByteBuffer.wrap(testBytes)); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, testBytes); - expectedRecord.put(1, (byte) i % 2); - expectedRecord.put(2, record.get(2)); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipFixedField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f0"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base1.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); - GenericRecord record = new GenericData.Record(schema); - GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); - fixed.bytes(testBytes); - record.put(0, fixed); - GenericData.EnumSymbol symbol = - new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); - record.put(1, symbol); - record.put(2, "testtest" + i); - record.put(3, ByteBuffer.wrap(testBytes)); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, (byte) i % 2); - expectedRecord.put(1, record.get(2)); - expectedRecord.put(2, record.get(3)); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipEnumField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f1"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base1.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); - GenericRecord record = new GenericData.Record(schema); - GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); - fixed.bytes(testBytes); - record.put(0, fixed); - GenericData.EnumSymbol symbol = - new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); - record.put(1, symbol); - record.put(2, "testtest" + i); - record.put(3, ByteBuffer.wrap(testBytes)); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, testBytes); - expectedRecord.put(1, record.get(2)); - expectedRecord.put(2, record.get(3)); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipBooleanField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f0"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base2.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_boolean_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0); - record.put(1, i); - record.put(2, (long) i); - record.put(3, (float) i); - record.put(4, (double) i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(1)); - expectedRecord.put(1, record.get(2)); - expectedRecord.put(2, record.get(3)); - expectedRecord.put(3, record.get(4)); - - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipIntField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f1"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base2.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_int_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0); - record.put(1, i); - record.put(2, (long) i); - record.put(3, (float) i); - record.put(4, (double) i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedRecord.put(1, record.get(2)); - expectedRecord.put(2, record.get(3)); - expectedRecord.put(3, record.get(4)); - - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipLongField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f2"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base2.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_long_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0); - record.put(1, i); - record.put(2, (long) i); - record.put(3, (float) i); - record.put(4, (double) i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedRecord.put(1, record.get(1)); - expectedRecord.put(2, record.get(3)); - expectedRecord.put(3, record.get(4)); - - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipFloatField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f3"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base2.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_float_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0); - record.put(1, i); - record.put(2, (long) i); - record.put(3, (float) i); - record.put(4, (double) i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedRecord.put(1, record.get(1)); - expectedRecord.put(2, record.get(2)); - expectedRecord.put(3, record.get(4)); - - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipDoubleField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f4"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_base2.avsc"); - Schema expectedSchema = getSchema("skip/test_skip_double_expected.avsc"); - - ArrayList data = new ArrayList<>(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0); - record.put(1, i); - record.put(2, (long) i); - record.put(3, (float) i); - record.put(4, (double) i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, record.get(0)); - expectedRecord.put(1, record.get(1)); - expectedRecord.put(2, record.get(2)); - expectedRecord.put(3, record.get(3)); - - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipRecordField() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f0"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("skip/test_skip_record_before.avsc"); - Schema nestedSchema = schema.getFields().get(0).schema(); - ArrayList data = new ArrayList<>(); - - Schema expectedSchema = getSchema("skip/test_skip_record_expected.avsc"); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - GenericRecord nestedRecord = new GenericData.Record(nestedSchema); - nestedRecord.put(0, "test" + i); - nestedRecord.put(1, i); - record.put(0, nestedRecord); - record.put(1, i); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - expectedRecord.put(0, i); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipNestedFields() throws Exception { - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f0.f0"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - Schema schema = getSchema("test_nested_record.avsc"); - Schema nestedSchema = schema.getFields().get(0).schema(); - ArrayList data = new ArrayList<>(); - - Schema expectedSchema = getSchema("skip/test_skip_second_level_expected.avsc"); - Schema expectedNestedSchema = expectedSchema.getFields().get(0).schema(); - ArrayList expectedData = new ArrayList<>(); - - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - GenericRecord nestedRecord = new GenericData.Record(nestedSchema); - nestedRecord.put(0, "test" + i); - nestedRecord.put(1, i); - record.put(0, nestedRecord); - data.add(record); - - GenericRecord expectedRecord = new GenericData.Record(expectedSchema); - GenericRecord expectedNestedRecord = new GenericData.Record(expectedNestedSchema); - expectedNestedRecord.put(0, nestedRecord.get(1)); - expectedRecord.put(0, expectedNestedRecord); - expectedData.add(expectedRecord); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkNestedRecordResult(expectedSchema, expectedData, root); - } - - @Test - public void testSkipThirdLevelField() throws Exception { - Schema firstLevelSchema = getSchema("skip/test_skip_third_level_expected.avsc"); - Schema secondLevelSchema = firstLevelSchema.getFields().get(0).schema(); - Schema thirdLevelSchema = secondLevelSchema.getFields().get(0).schema(); - - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord firstLevelRecord = new GenericData.Record(firstLevelSchema); - GenericRecord secondLevelRecord = new GenericData.Record(secondLevelSchema); - GenericRecord thirdLevelRecord = new GenericData.Record(thirdLevelSchema); - - thirdLevelRecord.put(0, i); - thirdLevelRecord.put(1, "test" + i); - thirdLevelRecord.put(2, i % 2 == 0); - - secondLevelRecord.put(0, thirdLevelRecord); - firstLevelRecord.put(0, secondLevelRecord); - data.add(firstLevelRecord); - } - - // do not skip any fields first - VectorSchemaRoot root1 = writeAndRead(firstLevelSchema, data); - - assertEquals(1, root1.getFieldVectors().size()); - assertEquals(Types.MinorType.STRUCT, root1.getFieldVectors().get(0).getMinorType()); - StructVector secondLevelVector = (StructVector) root1.getFieldVectors().get(0); - assertEquals(1, secondLevelVector.getChildrenFromFields().size()); - assertEquals( - Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType()); - StructVector thirdLevelVector = (StructVector) secondLevelVector.getChildrenFromFields().get(0); - assertEquals(3, thirdLevelVector.getChildrenFromFields().size()); - - // skip third level field and validate - Set skipFieldNames = new HashSet<>(); - skipFieldNames.add("f0.f0.f0"); - config = - new AvroToArrowConfigBuilder(config.getAllocator()) - .setSkipFieldNames(skipFieldNames) - .build(); - VectorSchemaRoot root2 = writeAndRead(firstLevelSchema, data); - - assertEquals(1, root2.getFieldVectors().size()); - assertEquals(Types.MinorType.STRUCT, root2.getFieldVectors().get(0).getMinorType()); - StructVector secondStruct = (StructVector) root2.getFieldVectors().get(0); - assertEquals(1, secondStruct.getChildrenFromFields().size()); - assertEquals( - Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType()); - StructVector thirdStruct = (StructVector) secondStruct.getChildrenFromFields().get(0); - assertEquals(2, thirdStruct.getChildrenFromFields().size()); - - assertEquals(Types.MinorType.INT, thirdStruct.getChildrenFromFields().get(0).getMinorType()); - assertEquals(Types.MinorType.BIT, thirdStruct.getChildrenFromFields().get(1).getMinorType()); - } -} diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java deleted file mode 100644 index 44c4186c52d9e..0000000000000 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.InputStream; -import java.lang.reflect.Method; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.util.Text; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.BinaryDecoder; -import org.apache.avro.io.BinaryEncoder; -import org.apache.avro.io.DatumWriter; -import org.apache.avro.io.DecoderFactory; -import org.apache.avro.io.EncoderFactory; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.io.TempDir; - -public class AvroTestBase { - - @TempDir public File TMP; - - protected AvroToArrowConfig config; - - @BeforeEach - public void init() { - BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - config = new AvroToArrowConfigBuilder(allocator).build(); - } - - public static Schema getSchema(String schemaName) throws Exception { - try { - // Attempt to use JDK 9 behavior of getting the module then the resource stream from the - // module. - // Note that this code is caller-sensitive. - Method getModuleMethod = Class.class.getMethod("getModule"); - Object module = getModuleMethod.invoke(TestWriteReadAvroRecord.class); - Method getResourceAsStreamFromModule = - module.getClass().getMethod("getResourceAsStream", String.class); - try (InputStream is = - (InputStream) getResourceAsStreamFromModule.invoke(module, "/schema/" + schemaName)) { - return new Schema.Parser().parse(is); - } - } catch (NoSuchMethodException ex) { - // Use JDK8 behavior. - try (InputStream is = - TestWriteReadAvroRecord.class.getResourceAsStream("/schema/" + schemaName)) { - return new Schema.Parser().parse(is); - } - } - } - - protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Exception { - File dataFile = new File(TMP, "test.avro"); - - try (FileOutputStream fos = new FileOutputStream(dataFile); - FileInputStream fis = new FileInputStream(dataFile)) { - - BinaryEncoder encoder = new EncoderFactory().directBinaryEncoder(fos, null); - DatumWriter writer = new GenericDatumWriter<>(schema); - BinaryDecoder decoder = new DecoderFactory().directBinaryDecoder(fis, null); - - for (Object value : data) { - writer.write(value, encoder); - } - - return AvroToArrow.avroToArrow(schema, decoder, config); - } - } - - protected void checkArrayResult(List> expected, ListVector vector) { - assertEquals(expected.size(), vector.getValueCount()); - for (int i = 0; i < expected.size(); i++) { - checkArrayElement(expected.get(i), vector.getObject(i)); - } - } - - protected void checkArrayElement(List expected, List actual) { - assertEquals(expected.size(), actual.size()); - for (int i = 0; i < expected.size(); i++) { - Object value1 = expected.get(i); - Object value2 = actual.get(i); - if (value1 == null) { - assertTrue(value2 == null); - continue; - } - if (value2 instanceof byte[]) { - value2 = ByteBuffer.wrap((byte[]) value2); - } else if (value2 instanceof Text) { - value2 = value2.toString(); - } - assertEquals(value1, value2); - } - } - - protected void checkPrimitiveResult(List data, FieldVector vector) { - assertEquals(data.size(), vector.getValueCount()); - for (int i = 0; i < data.size(); i++) { - Object value1 = data.get(i); - Object value2 = vector.getObject(i); - if (value1 == null) { - assertTrue(value2 == null); - continue; - } - if (value2 instanceof byte[]) { - value2 = ByteBuffer.wrap((byte[]) value2); - if (value1 instanceof byte[]) { - value1 = ByteBuffer.wrap((byte[]) value1); - } - } else if (value2 instanceof Text) { - value2 = value2.toString(); - } else if (value2 instanceof Byte) { - value2 = ((Byte) value2).intValue(); - } - assertEquals(value1, value2); - } - } - - protected void checkRecordResult(Schema schema, List data, VectorSchemaRoot root) { - assertEquals(data.size(), root.getRowCount()); - assertEquals(schema.getFields().size(), root.getFieldVectors().size()); - - for (int i = 0; i < schema.getFields().size(); i++) { - ArrayList fieldData = new ArrayList(); - for (GenericRecord record : data) { - fieldData.add(record.get(i)); - } - - checkPrimitiveResult(fieldData, root.getFieldVectors().get(i)); - } - } - - protected void checkNestedRecordResult( - Schema schema, List data, VectorSchemaRoot root) { - assertEquals(data.size(), root.getRowCount()); - assertTrue(schema.getFields().size() == 1); - - final Schema nestedSchema = schema.getFields().get(0).schema(); - final StructVector structVector = (StructVector) root.getFieldVectors().get(0); - - for (int i = 0; i < nestedSchema.getFields().size(); i++) { - ArrayList fieldData = new ArrayList(); - for (GenericRecord record : data) { - GenericRecord nestedRecord = (GenericRecord) record.get(0); - fieldData.add(nestedRecord.get(i)); - } - - checkPrimitiveResult(fieldData, structVector.getChildrenFromFields().get(i)); - } - } - - // belows are for iterator api - - protected void checkArrayResult(List> expected, List vectors) { - int valueCount = vectors.stream().mapToInt(v -> v.getValueCount()).sum(); - assertEquals(expected.size(), valueCount); - - int index = 0; - for (ListVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - checkArrayElement(expected.get(index++), vector.getObject(i)); - } - } - } - - protected void checkRecordResult( - Schema schema, List data, List roots) { - roots.forEach( - root -> { - assertEquals(schema.getFields().size(), root.getFieldVectors().size()); - }); - - for (int i = 0; i < schema.getFields().size(); i++) { - List fieldData = new ArrayList(); - List vectors = new ArrayList<>(); - for (GenericRecord record : data) { - fieldData.add(record.get(i)); - } - final int columnIndex = i; - roots.forEach(root -> vectors.add(root.getFieldVectors().get(columnIndex))); - - checkPrimitiveResult(fieldData, vectors); - } - } - - protected void checkPrimitiveResult(List data, List vectors) { - int valueCount = vectors.stream().mapToInt(v -> v.getValueCount()).sum(); - assertEquals(data.size(), valueCount); - - int index = 0; - for (FieldVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - Object value1 = data.get(index++); - Object value2 = vector.getObject(i); - if (value1 == null) { - assertNull(value2); - continue; - } - if (value2 instanceof byte[]) { - value2 = ByteBuffer.wrap((byte[]) value2); - if (value1 instanceof byte[]) { - value1 = ByteBuffer.wrap((byte[]) value1); - } - } else if (value2 instanceof Text) { - value2 = value2.toString(); - } - assertEquals(value1, value2); - } - } - } -} diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java deleted file mode 100644 index 44ccbc74511dd..0000000000000 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.EOFException; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.BinaryDecoder; -import org.apache.avro.io.BinaryEncoder; -import org.apache.avro.io.DatumWriter; -import org.apache.avro.io.Decoder; -import org.apache.avro.io.DecoderFactory; -import org.apache.avro.io.EncoderFactory; -import org.apache.avro.util.Utf8; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class AvroToArrowIteratorTest extends AvroTestBase { - - @BeforeEach - @Override - public void init() { - final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - this.config = new AvroToArrowConfigBuilder(allocator).setTargetBatchSize(3).build(); - } - - private void writeDataToFile(Schema schema, List data, File dataFile) throws Exception { - try (FileOutputStream fos = new FileOutputStream(dataFile)) { - BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(fos, null); - DatumWriter writer = new GenericDatumWriter<>(schema); - - for (Object value : data) { - writer.write(value, encoder); - } - encoder.flush(); - } - } - - private AvroToArrowVectorIterator convert(Schema schema, FileInputStream fis) throws Exception { - BinaryDecoder decoder = DecoderFactory.get().directBinaryDecoder(fis, null); - return AvroToArrow.avroToArrowIterator(schema, decoder, config); - } - - @Test - public void testStringType() throws Exception { - Schema schema = getSchema("test_primitive_string.avsc"); - List data = Arrays.asList("v1", "v2", "v3", "v4", "v5"); - - File dataFile = new File(TMP, "test.avro"); - writeDataToFile(schema, data, dataFile); - - List roots = new ArrayList<>(); - List vectors = new ArrayList<>(); - try (FileInputStream fis = new FileInputStream(dataFile); - AvroToArrowVectorIterator iterator = convert(schema, fis)) { - while (iterator.hasNext()) { - VectorSchemaRoot root = iterator.next(); - FieldVector vector = root.getFieldVectors().get(0); - roots.add(root); - vectors.add(vector); - } - } - checkPrimitiveResult(data, vectors); - AutoCloseables.close(roots); - } - - @Test - public void testNullableStringType() throws Exception { - Schema schema = getSchema("test_nullable_string.avsc"); - - List data = new ArrayList<>(); - List expected = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - String value = i % 2 == 0 ? "test" + i : null; - record.put(0, value); - expected.add(value); - data.add(record); - } - - File dataFile = new File(TMP, "test.avro"); - writeDataToFile(schema, data, dataFile); - - List roots = new ArrayList<>(); - List vectors = new ArrayList<>(); - try (FileInputStream fis = new FileInputStream(dataFile); - AvroToArrowVectorIterator iterator = convert(schema, fis)) { - while (iterator.hasNext()) { - VectorSchemaRoot root = iterator.next(); - FieldVector vector = root.getFieldVectors().get(0); - roots.add(root); - vectors.add(vector); - } - } - checkPrimitiveResult(expected, vectors); - AutoCloseables.close(roots); - } - - @Test - public void testRecordType() throws Exception { - Schema schema = getSchema("test_record.avsc"); - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, i); - record.put(2, i % 2 == 0); - data.add(record); - } - - File dataFile = new File(TMP, "test.avro"); - writeDataToFile(schema, data, dataFile); - - List roots = new ArrayList<>(); - try (FileInputStream fis = new FileInputStream(dataFile); - AvroToArrowVectorIterator iterator = convert(schema, fis)) { - while (iterator.hasNext()) { - roots.add(iterator.next()); - } - } - checkRecordResult(schema, data, roots); - AutoCloseables.close(roots); - } - - @Test - public void testArrayType() throws Exception { - Schema schema = getSchema("test_array.avsc"); - List> data = - Arrays.asList( - Arrays.asList("11", "222", "999"), - Arrays.asList("12222", "2333", "1000"), - Arrays.asList("1rrr", "2ggg"), - Arrays.asList("1vvv", "2bbb"), - Arrays.asList("1fff", "2")); - - File dataFile = new File(TMP, "test.avro"); - writeDataToFile(schema, data, dataFile); - - List roots = new ArrayList<>(); - List vectors = new ArrayList<>(); - try (FileInputStream fis = new FileInputStream(dataFile); - AvroToArrowVectorIterator iterator = convert(schema, fis)) { - while (iterator.hasNext()) { - VectorSchemaRoot root = iterator.next(); - roots.add(root); - vectors.add((ListVector) root.getFieldVectors().get(0)); - } - } - checkArrayResult(data, vectors); - AutoCloseables.close(roots); - } - - @Test - public void runLargeNumberOfRows() throws Exception { - Schema schema = getSchema("test_large_data.avsc"); - int x = 0; - final int targetRows = 600000; - Decoder fakeDecoder = new FakeDecoder(targetRows); - try (AvroToArrowVectorIterator iter = - AvroToArrow.avroToArrowIterator( - schema, fakeDecoder, new AvroToArrowConfigBuilder(config.getAllocator()).build())) { - while (iter.hasNext()) { - VectorSchemaRoot root = iter.next(); - x += root.getRowCount(); - root.close(); - } - } - - assertEquals(targetRows, x); - } - - /** Fake avro decoder to test large data. */ - private static class FakeDecoder extends Decoder { - - private int numRows; - - FakeDecoder(int numRows) { - this.numRows = numRows; - } - - // note that Decoder has no hasNext() API, assume enum is the first type in schema - // and fixed is the last type in schema and they are unique. - private void validate() throws EOFException { - if (numRows <= 0) { - throw new EOFException(); - } - } - - @Override - public void readNull() throws IOException {} - - @Override - public boolean readBoolean() throws IOException { - return false; - } - - @Override - public int readInt() throws IOException { - return 0; - } - - @Override - public long readLong() throws IOException { - return 0; - } - - @Override - public float readFloat() throws IOException { - return 0; - } - - @Override - public double readDouble() throws IOException { - return 0; - } - - @Override - public Utf8 readString(Utf8 old) throws IOException { - return new Utf8("test123test123" + numRows); - } - - @Override - public String readString() throws IOException { - return "test123test123" + numRows; - } - - @Override - public void skipString() throws IOException {} - - @Override - public ByteBuffer readBytes(ByteBuffer old) throws IOException { - return ByteBuffer.allocate(0); - } - - @Override - public void skipBytes() throws IOException {} - - @Override - public void readFixed(byte[] bytes, int start, int length) throws IOException { - // fixed type is last column, after read value, decrease numRows - numRows--; - } - - @Override - public void skipFixed(int length) throws IOException {} - - @Override - public int readEnum() throws IOException { - // enum type is first column, validate numRows first. - validate(); - return 0; - } - - @Override - public long readArrayStart() throws IOException { - return 5; - } - - @Override - public long arrayNext() throws IOException { - return 0; - } - - @Override - public long skipArray() throws IOException { - return 0; - } - - @Override - public long readMapStart() throws IOException { - return 5; - } - - @Override - public long mapNext() throws IOException { - return 0; - } - - @Override - public long skipMap() throws IOException { - return 0; - } - - @Override - public int readIndex() throws IOException { - return 0; - } - } -} diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java deleted file mode 100644 index 899ea40301322..0000000000000 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java +++ /dev/null @@ -1,477 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericRecord; -import org.junit.jupiter.api.Test; - -public class AvroToArrowTest extends AvroTestBase { - - @Test - public void testStringType() throws Exception { - Schema schema = getSchema("test_primitive_string.avsc"); - List data = Arrays.asList("v1", "v2", "v3", "v4", "v5"); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testNullableStringType() throws Exception { - Schema schema = getSchema("test_nullable_string.avsc"); - - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? "test" + i : null); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(schema, data, root); - } - - @Test - public void testRecordType() throws Exception { - Schema schema = getSchema("test_record.avsc"); - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, i); - record.put(2, i % 2 == 0); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(schema, data, root); - } - - @Test - public void testFixedAttributes() throws Exception { - Schema schema = getSchema("attrs/test_fixed_attr.avsc"); - - List data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - byte[] value = ("value" + i).getBytes(StandardCharsets.UTF_8); - GenericData.Fixed fixed = new GenericData.Fixed(schema); - fixed.bytes(value); - data.add(fixed); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - Map metadata = vector.getField().getMetadata(); - assertEquals("fixed doc", metadata.get("doc")); - assertEquals("[\"alias1\",\"alias2\"]", metadata.get("aliases")); - } - - @Test - public void testEnumAttributes() throws Exception { - Schema schema = getSchema("attrs/test_enum_attrs.avsc"); - List data = - Arrays.asList( - new GenericData.EnumSymbol(schema, "SPADES"), - new GenericData.EnumSymbol(schema, "HEARTS"), - new GenericData.EnumSymbol(schema, "DIAMONDS"), - new GenericData.EnumSymbol(schema, "CLUBS"), - new GenericData.EnumSymbol(schema, "SPADES")); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - Map metadata = vector.getField().getMetadata(); - assertEquals("enum doc", metadata.get("doc")); - assertEquals("[\"alias1\",\"alias2\"]", metadata.get("aliases")); - } - - @Test - public void testRecordAttributes() throws Exception { - Schema schema = getSchema("attrs/test_record_attrs.avsc"); - Schema nestedSchema = schema.getFields().get(0).schema(); - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - GenericRecord nestedRecord = new GenericData.Record(nestedSchema); - nestedRecord.put(0, "test" + i); - nestedRecord.put(1, i); - record.put(0, nestedRecord); - - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - - StructVector structVector = (StructVector) root.getFieldVectors().get(0); - Map structMeta = structVector.getField().getMetadata(); - Map childMeta1 = structVector.getChildByOrdinal(0).getField().getMetadata(); - Map childMeta2 = structVector.getChildByOrdinal(1).getField().getMetadata(); - - assertEquals("f0 doc", structMeta.get("doc")); - assertEquals("[\"f0.a1\"]", structMeta.get("aliases")); - assertEquals("f1 doc", childMeta1.get("doc")); - assertEquals("[\"f1.a1\",\"f1.a2\"]", childMeta1.get("aliases")); - assertEquals("f2 doc", childMeta2.get("doc")); - assertEquals("[\"f2.a1\",\"f2.a2\"]", childMeta2.get("aliases")); - } - - @Test - public void testNestedRecordType() throws Exception { - Schema schema = getSchema("test_nested_record.avsc"); - Schema nestedSchema = schema.getFields().get(0).schema(); - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - GenericRecord nestedRecord = new GenericData.Record(nestedSchema); - nestedRecord.put(0, "test" + i); - nestedRecord.put(1, i); - record.put(0, nestedRecord); - - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkNestedRecordResult(schema, data, root); - } - - @Test - public void testEnumType() throws Exception { - Schema schema = getSchema("test_primitive_enum.avsc"); - List data = - Arrays.asList( - new GenericData.EnumSymbol(schema, "SPADES"), - new GenericData.EnumSymbol(schema, "HEARTS"), - new GenericData.EnumSymbol(schema, "DIAMONDS"), - new GenericData.EnumSymbol(schema, "CLUBS"), - new GenericData.EnumSymbol(schema, "SPADES")); - - List expectedIndices = Arrays.asList(0, 1, 2, 3, 0); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(expectedIndices, vector); - - VarCharVector dictVector = (VarCharVector) config.getProvider().lookup(0).getVector(); - assertEquals(4, dictVector.getValueCount()); - - assertEquals("SPADES", dictVector.getObject(0).toString()); - assertEquals("HEARTS", dictVector.getObject(1).toString()); - assertEquals("DIAMONDS", dictVector.getObject(2).toString()); - assertEquals("CLUBS", dictVector.getObject(3).toString()); - } - - @Test - public void testIntType() throws Exception { - Schema schema = getSchema("test_primitive_int.avsc"); - List data = Arrays.asList(1, 2, 3, 4, 5); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testNullableIntType() throws Exception { - Schema schema = getSchema("test_nullable_int.avsc"); - - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? i : null); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(schema, data, root); - } - - @Test - public void testLongType() throws Exception { - Schema schema = getSchema("test_primitive_long.avsc"); - List data = Arrays.asList(1L, 2L, 3L, 4L, 5L); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testNullableLongType() throws Exception { - Schema schema = getSchema("test_nullable_long.avsc"); - - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? (long) i : null); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(schema, data, root); - } - - @Test - public void testFloatType() throws Exception { - Schema schema = getSchema("test_primitive_float.avsc"); - List data = Arrays.asList(1.1f, 2.2f, 3.3f, 4.4f, 5.5f); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testNullableFloatType() throws Exception { - Schema schema = getSchema("test_nullable_float.avsc"); - - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? i + 0.1f : null); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(schema, data, root); - } - - @Test - public void testDoubleType() throws Exception { - Schema schema = getSchema("test_primitive_double.avsc"); - List data = Arrays.asList(1.1, 2.2, 3.3, 4.4, 5.5); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testNullableDoubleType() throws Exception { - Schema schema = getSchema("test_nullable_double.avsc"); - - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? i + 0.1 : null); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(schema, data, root); - } - - @Test - public void testBytesType() throws Exception { - Schema schema = getSchema("test_primitive_bytes.avsc"); - List data = - Arrays.asList( - ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testNullableBytesType() throws Exception { - Schema schema = getSchema("test_nullable_bytes.avsc"); - - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put( - 0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(schema, data, root); - } - - @Test - public void testBooleanType() throws Exception { - Schema schema = getSchema("test_primitive_boolean.avsc"); - List data = Arrays.asList(true, false, true, false, true); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(data, vector); - } - - @Test - public void testNullableBooleanType() throws Exception { - Schema schema = getSchema("test_nullable_boolean.avsc"); - - ArrayList data = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? true : null); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - checkRecordResult(schema, data, root); - } - - @Test - public void testArrayType() throws Exception { - Schema schema = getSchema("test_array.avsc"); - List> data = - Arrays.asList( - Arrays.asList("11", "222", "999"), - Arrays.asList("12222", "2333", "1000"), - Arrays.asList("1rrr", "2ggg"), - Arrays.asList("1vvv", "2bbb"), - Arrays.asList("1fff", "2")); - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkArrayResult(data, (ListVector) vector); - } - - @Test - public void testMapType() throws Exception { - Schema schema = getSchema("test_map.avsc"); - - List keys = Arrays.asList("key1", "key2", "key3", "key4", "key5", "key6"); - List vals = Arrays.asList("val1", "val2", "val3", "val4", "val5", "val6"); - - List data = new ArrayList<>(); - LinkedHashMap map1 = new LinkedHashMap(); - map1.put(keys.get(0), vals.get(0)); - map1.put(keys.get(1), vals.get(1)); - data.add(map1); - - LinkedHashMap map2 = new LinkedHashMap(); - map2.put(keys.get(2), vals.get(2)); - map2.put(keys.get(3), vals.get(3)); - data.add(map2); - - LinkedHashMap map3 = new LinkedHashMap(); - map3.put(keys.get(4), vals.get(4)); - map3.put(keys.get(5), vals.get(5)); - data.add(map3); - - VectorSchemaRoot root = writeAndRead(schema, data); - MapVector vector = (MapVector) root.getFieldVectors().get(0); - - checkPrimitiveResult(keys, vector.getDataVector().getChildrenFromFields().get(0)); - checkPrimitiveResult(vals, vector.getDataVector().getChildrenFromFields().get(1)); - assertEquals(0, vector.getOffsetBuffer().getInt(0)); - assertEquals(2, vector.getOffsetBuffer().getInt(1 * 4)); - assertEquals(4, vector.getOffsetBuffer().getInt(2 * 4)); - assertEquals(6, vector.getOffsetBuffer().getInt(3 * 4)); - } - - @Test - public void testFixedType() throws Exception { - Schema schema = getSchema("test_fixed.avsc"); - - List data = new ArrayList<>(); - List expected = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - byte[] value = ("value" + i).getBytes(StandardCharsets.UTF_8); - expected.add(value); - GenericData.Fixed fixed = new GenericData.Fixed(schema); - fixed.bytes(value); - data.add(fixed); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(expected, vector); - } - - @Test - public void testUnionType() throws Exception { - Schema schema = getSchema("test_union.avsc"); - ArrayList data = new ArrayList<>(); - ArrayList expected = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? "test" + i : i); - expected.add(i % 2 == 0 ? "test" + i : i); - data.add(record); - } - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(expected, vector); - } - - @Test - public void testNullableUnionType() throws Exception { - Schema schema = getSchema("test_nullable_union.avsc"); - ArrayList data = new ArrayList<>(); - ArrayList expected = new ArrayList<>(); - for (int i = 0; i < 5; i++) { - GenericRecord record = new GenericData.Record(schema); - if (i % 3 == 0) { - record.put(0, "test" + i); - expected.add("test" + i); - data.add(record); - } else if (i % 3 == 1) { - record.put(0, i); - expected.add(i); - data.add(record); - } else { - record.put(0, null); - expected.add(null); - data.add(record); - } - } - - VectorSchemaRoot root = writeAndRead(schema, data); - FieldVector vector = root.getFieldVectors().get(0); - - checkPrimitiveResult(expected, vector); - } -} diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java deleted file mode 100644 index c318214f5c0a7..0000000000000 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.avro; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileReader; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.DatumReader; -import org.apache.avro.io.DatumWriter; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class TestWriteReadAvroRecord { - - @TempDir public static File TMP; - - @Test - public void testWriteAndRead() throws Exception { - File dataFile = new File(TMP, "test.avro"); - Schema schema = AvroTestBase.getSchema("test.avsc"); - - // write data to disk - GenericRecord user1 = new GenericData.Record(schema); - user1.put("name", "Alyssa"); - user1.put("favorite_number", 256); - - GenericRecord user2 = new GenericData.Record(schema); - user2.put("name", "Ben"); - user2.put("favorite_number", 7); - user2.put("favorite_color", "red"); - - DatumWriter datumWriter = new GenericDatumWriter(schema); - try (DataFileWriter dataFileWriter = - new DataFileWriter(datumWriter)) { - dataFileWriter.create(schema, dataFile); - dataFileWriter.append(user1); - dataFileWriter.append(user2); - } - - // read data from disk - DatumReader datumReader = new GenericDatumReader(schema); - List result = new ArrayList<>(); - try (DataFileReader dataFileReader = - new DataFileReader(dataFile, datumReader)) { - while (dataFileReader.hasNext()) { - GenericRecord user = dataFileReader.next(); - result.add(user); - } - } - - assertEquals(2, result.size()); - GenericRecord deUser1 = result.get(0); - assertEquals("Alyssa", deUser1.get("name").toString()); - assertEquals(256, deUser1.get("favorite_number")); - assertEquals(null, deUser1.get("favorite_color")); - - GenericRecord deUser2 = result.get(1); - assertEquals("Ben", deUser2.get("name").toString()); - assertEquals(7, deUser2.get("favorite_number")); - assertEquals("red", deUser2.get("favorite_color").toString()); - } -} diff --git a/java/adapter/avro/src/test/resources/logback.xml b/java/adapter/avro/src/test/resources/logback.xml deleted file mode 100644 index 4c54d18a210ff..0000000000000 --- a/java/adapter/avro/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc b/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc deleted file mode 100644 index afd00b8d9f7df..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "type": "enum", - "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"], - "name": "testEnum", - "doc" : "enum doc", - "aliases" : ["alias1", "alias2"] -} diff --git a/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc b/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc deleted file mode 100644 index 55e504def1759..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "type": "fixed", - "size": 6, - "name": "testFixed", - "doc" : "fixed doc", - "aliases" : ["alias1", "alias2"] -} diff --git a/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc b/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc deleted file mode 100644 index 2e2e311a9d542..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testAttrs", - "fields": [ - { - "name" : "f0", - "type" : { - "type" : "record", - "name" : "nestedInRecord", - "doc" : "f0 doc", - "aliases" : ["f0.a1"], - "fields": [ - {"name": "f1", "type": "string", "doc": "f1 doc", "aliases" : ["f1.a1", "f1.a2"]}, - {"name": "f2", "type": "int", "doc": "f2 doc", "aliases" : ["f2.a1", "f2.a2"]} - ] - } - } - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc deleted file mode 100644 index f661e65062dba..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "int", - "logicalType" : "date" -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc deleted file mode 100644 index 18d7d63fc7330..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "bytes", - "logicalType" : "decimal", - "precision": 39, - "scale": 2 -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc deleted file mode 100644 index eed7bd7811d1b..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "bytes", - "logicalType" : "decimal", - "precision": 20, - "scale": -1 -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc deleted file mode 100644 index 1667b8aff87d8..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "bytes", - "logicalType" : "decimal", - "precision": 20, - "scale": 40 -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc deleted file mode 100644 index e1f710416f91e..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "fixed", - "size" : 1, - "logicalType" : "decimal", - "precision": 30, - "scale": 2 -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc deleted file mode 100644 index 944b5d85d6df2..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "bytes", - "logicalType" : "decimal", - "precision": 10, - "scale": 2 -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc deleted file mode 100644 index 1901f90a975f9..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "fixed", - "size" : 10, - "logicalType" : "decimal", - "precision": 10, - "scale": 2 -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc deleted file mode 100644 index ee7d4e9378aa3..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "long", - "logicalType" : "time-micros" -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc deleted file mode 100644 index 54877babc81b3..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "int", - "logicalType" : "time-millis" -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc deleted file mode 100644 index 15c0bf53dca88..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "long", - "logicalType" : "timestamp-micros" -} diff --git a/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc b/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc deleted file mode 100644 index 822a2c360c5fc..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "name": "test", - "type": "long", - "logicalType" : "timestamp-millis" -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc deleted file mode 100644 index e836aa7685363..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - {"name": "f0", "type": "string"}, - {"name": "f1", "type": {"type" : "array", "items": "string"}}, - {"name": "f2", "type": "boolean"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc deleted file mode 100644 index 36e7fdfb066fc..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - {"name": "f0", "type": "string"}, - {"name": "f2", "type": "boolean"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc deleted file mode 100644 index 5338253f45f7f..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}}, - {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}}, - {"name": "f2", "type": "string"}, - {"name": "f3", "type": "bytes"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc deleted file mode 100644 index 50655a70e1295..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": "boolean"}, - {"name": "f1", "type": "int"}, - {"name": "f2", "type": "long"}, - {"name": "f3", "type": "float"}, - {"name": "f4", "type": "double"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc deleted file mode 100644 index 9b62e3149ffc1..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f1", "type": "int"}, - {"name": "f2", "type": "long"}, - {"name": "f3", "type": "float"}, - {"name": "f4", "type": "double"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc deleted file mode 100644 index 8a1903b34da83..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}}, - {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}}, - {"name": "f2", "type": "string"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc deleted file mode 100644 index 6021c445413c0..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": "boolean"}, - {"name": "f1", "type": "int"}, - {"name": "f2", "type": "long"}, - {"name": "f3", "type": "float"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc deleted file mode 100644 index f5ed86a2892dc..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}}, - {"name": "f2", "type": "string"}, - {"name": "f3", "type": "bytes"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc deleted file mode 100644 index 5423a7977c679..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}}, - {"name": "f2", "type": "string"}, - {"name": "f3", "type": "bytes"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc deleted file mode 100644 index dea106331a9e3..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": "boolean"}, - {"name": "f1", "type": "int"}, - {"name": "f2", "type": "long"}, - {"name": "f4", "type": "double"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc deleted file mode 100644 index 53d4f1025b4a7..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": "boolean"}, - {"name": "f2", "type": "long"}, - {"name": "f3", "type": "float"}, - {"name": "f4", "type": "double"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc deleted file mode 100644 index bf16601dd458e..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": "boolean"}, - {"name": "f1", "type": "int"}, - {"name": "f3", "type": "float"}, - {"name": "f4", "type": "double"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc deleted file mode 100644 index 8cbb1a1d72061..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - {"name": "f0", "type": "string"}, - {"name": "f1", "type": {"type" : "map", "values": "string"}}, - {"name": "f2", "type": "boolean"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc deleted file mode 100644 index 36e7fdfb066fc..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - {"name": "f0", "type": "string"}, - {"name": "f2", "type": "boolean"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc deleted file mode 100644 index b5d637b1daaec..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testSkip", - "fields": [ - {"name": "f0", "type": "string"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc deleted file mode 100644 index 7aee92b924e31..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - { - "name" : "f0", - "type" : { - "type" : "record", - "name" : "nestedInRecord", - "fields": [ - {"name": "f00", "type": "string"}, - {"name": "f01", "type": "int"} - ] - } - }, - { - "name" : "f1", "type" : "int" - } - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc deleted file mode 100644 index 3e24952035174..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - { "name" : "f1", "type" : "int"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc deleted file mode 100644 index f3b7f8c097404..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testSkipNested", - "fields": [ - { - "name" : "nested", - "type" : { - "type" : "record", - "name" : "nestedInRecord", - "fields": [ - {"name": "f1", "type": "int"} - ] - } - } - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc deleted file mode 100644 index 553525847d0ab..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testSkip", - "fields": [ - {"name": "f0", "type": "string"}, - {"name": "f2", "type": "boolean"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc deleted file mode 100644 index 2d2c0817434c2..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}}, - {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}}, - {"name": "f3", "type": "bytes"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc deleted file mode 100644 index 6f42da893daab..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "firstLevel", - "fields": [ - { - "name" : "f0", - "type" : { - "type" : "record", - "name" : "secondLevel", - "fields": [ - { - "name" : "f0", - "type" : { - "type" : "record", - "name" : "thirdLevel", - "fields" : [ - {"name": "f1", "type": "int"}, - {"name": "f0", "type": "string"}, - {"name": "f2", "type": "boolean"} - ] - } - } - ] - } - } - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc deleted file mode 100644 index fc1105911dd8a..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - {"name": "f0", "type": ["string"]}, - {"name": "f1", "type": ["string", "null"]}, - {"name": "f2", "type": ["string", "int"]}, - {"name": "f3", "type": "int"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc deleted file mode 100644 index 308e027a26e07..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - {"name": "f0", "type": ["string"]}, - {"name": "f1", "type": ["string", "null"]}, - {"name": "f3", "type": "int"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc deleted file mode 100644 index cbc83e5666e14..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - {"name": "f0", "type": ["string"]}, - {"name": "f2", "type": ["string", "int"]}, - {"name": "f3", "type": "int"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc b/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc deleted file mode 100644 index 0f72fb432fbce..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "test", - "fields": [ - {"name": "f1", "type": ["string", "null"]}, - {"name": "f2", "type": ["string", "int"]}, - {"name": "f3", "type": ["string", "int"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test.avsc b/java/adapter/avro/src/test/resources/schema/test.avsc deleted file mode 100644 index 92c0873de1da9..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_array.avsc b/java/adapter/avro/src/test/resources/schema/test_array.avsc deleted file mode 100644 index 5b75a4031d89e..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_array.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "array", - "items": "string", - "name": "testArray" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_fixed.avsc b/java/adapter/avro/src/test/resources/schema/test_fixed.avsc deleted file mode 100644 index a4d96e9ab550a..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_fixed.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "fixed", - "size": 6, - "name": "testFixed" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_large_data.avsc b/java/adapter/avro/src/test/resources/schema/test_large_data.avsc deleted file mode 100644 index f784ae62337a4..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_large_data.avsc +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testLargeData", - "fields": [ - { - "name": "f0", - "type": { - "name" : "f0", - "type" : "enum", - "symbols" : ["value1", "value2", "value3", "value4", "value5"] - } - }, - { - "name" : "f1", - "type" : { - "type" : "record", - "name" : "nestedRecord", - "fields": [ - {"name": "f1_0", "type": "string"}, - {"name": "f1_1", "type": "int"} - ] - } - }, - - {"name": "f2", "type": "string"}, - {"name": "f3", "type": "int"}, - {"name": "f4", "type": "boolean"}, - {"name": "f5", "type": "float"}, - {"name": "f6", "type": "double"}, - {"name": "f7", "type": "bytes"}, - {"name": "f8", "type": ["string", "int"]}, - { - "name": "f9", - "type": { - "name" : "f9", - "type" : "array", - "items" : "string" - } - }, - { - "name": "f10", - "type": { - "name" : "f10", - "type" : "map", - "values" : "string" - } - }, - { - "name": "f11", - "type": { - "type" : "fixed", - "name" : "f11", - "size" : 5 - } - } - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_map.avsc b/java/adapter/avro/src/test/resources/schema/test_map.avsc deleted file mode 100644 index 0dfa3a595bb25..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_map.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "map", - "values": "string", - "name": "testMap" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc b/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc deleted file mode 100644 index 29dddfd1adc66..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testNestedRecord", - "fields": [ - { - "name" : "f0", - "type" : { - "type" : "record", - "name" : "nestedInRecord", - "fields": [ - {"name": "f0", "type": "string"}, - {"name": "f1", "type": "int"} - ] - } - } - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc b/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc deleted file mode 100644 index 62af1a85d8201..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "nullableBoolean", - "fields": [ - {"name": "f0", "type": ["null", "boolean"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc b/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc deleted file mode 100644 index 002bc7ce2c365..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "nullableBytes", - "fields": [ - {"name": "f0", "type": ["null", "bytes"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc b/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc deleted file mode 100644 index 642b7aa1622fe..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "nullableDouble", - "fields": [ - {"name": "f0", "type": ["null", "double"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc b/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc deleted file mode 100644 index dff285909b130..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "nullableFloat", - "fields": [ - {"name": "f0", "type": ["null", "float"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc b/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc deleted file mode 100644 index abb2fc48a684f..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "nullableInt", - "fields": [ - {"name": "f0", "type": ["null", "int"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc b/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc deleted file mode 100644 index 0624d2737114e..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "nullableLong", - "fields": [ - {"name": "f0", "type": ["null", "long"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc b/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc deleted file mode 100644 index 347808ce6da15..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "nullableString", - "fields": [ - {"name": "f0", "type": ["null", "string"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc b/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc deleted file mode 100644 index af94812d76373..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testNullableUnions", - "fields": [ - {"name": "f0", "type": ["string", "int", "null"]} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc deleted file mode 100644 index 7652ce72385d2..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "boolean", - "name": "TestBoolean" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc deleted file mode 100644 index 5102430b65aa0..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "bytes", - "name": "TestBytes" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc deleted file mode 100644 index d1ae0b605a93a..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "double", - "name": "TestDouble" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc deleted file mode 100644 index bd8df61020ebd..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "enum", - "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"], - "name": "testEnum" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc deleted file mode 100644 index 675d1090d8695..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "float", - "name": "TestFloat" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc deleted file mode 100644 index 8fc8488281ad9..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "int", - "name": "TestInt" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc deleted file mode 100644 index b9706107c09a2..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "long", - "name": "TestLong" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc deleted file mode 100644 index b4a89a7f62cfb..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "string", - "name": "TestString" -} diff --git a/java/adapter/avro/src/test/resources/schema/test_record.avsc b/java/adapter/avro/src/test/resources/schema/test_record.avsc deleted file mode 100644 index e83cf1180d201..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_record.avsc +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testRecord", - "fields": [ - {"name": "f0", "type": "string"}, - {"name": "f1", "type": "int"}, - {"name": "f2", "type": "boolean"} - ] -} diff --git a/java/adapter/avro/src/test/resources/schema/test_union.avsc b/java/adapter/avro/src/test/resources/schema/test_union.avsc deleted file mode 100644 index f181e36e3c120..0000000000000 --- a/java/adapter/avro/src/test/resources/schema/test_union.avsc +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.avro", - "type": "record", - "name": "testUnions", - "fields": [ - {"name": "f0", "type": ["string", "int"]} - ] -} diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml deleted file mode 100644 index 2f621d7a05a80..0000000000000 --- a/java/adapter/jdbc/pom.xml +++ /dev/null @@ -1,130 +0,0 @@ - - - - 4.0.0 - - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - ../../pom.xml - - - arrow-jdbc - Arrow JDBC Adapter - (Contrib/Experimental)A library for converting JDBC data to Arrow data. - http://maven.apache.org - - - - - org.apache.arrow - arrow-memory-core - - - - org.apache.arrow - arrow-memory-netty - runtime - - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - - org.immutables - value-annotations - - - - com.h2database - h2 - 2.3.232 - test - - - - com.fasterxml.jackson.dataformat - jackson-dataformat-yaml - test - - - - com.fasterxml.jackson.core - jackson-databind - - - - com.fasterxml.jackson.core - jackson-core - - - - com.fasterxml.jackson.core - jackson-annotations - - - - org.assertj - assertj-core - test - - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - verify - - - com.fasterxml.jackson.core:jackson-annotations - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -Duser.timezone=UTC - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Werror - - - - - - diff --git a/java/adapter/jdbc/src/main/java/module-info.java b/java/adapter/jdbc/src/main/java/module-info.java deleted file mode 100644 index 04977222c1530..0000000000000 --- a/java/adapter/jdbc/src/main/java/module-info.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.adapter.jdbc { - exports org.apache.arrow.adapter.jdbc.consumer; - exports org.apache.arrow.adapter.jdbc; - exports org.apache.arrow.adapter.jdbc.binder; - - requires com.fasterxml.jackson.annotation; - requires com.fasterxml.jackson.databind; - requires java.sql; - requires jdk.unsupported; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java deleted file mode 100644 index d30cf32a04996..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.isColumnNullable; - -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Iterator; -import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer; -import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; -import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** VectorSchemaRoot iterator for partially converting JDBC data. */ -public class ArrowVectorIterator implements Iterator, AutoCloseable { - - private final ResultSet resultSet; - private final JdbcToArrowConfig config; - - private final Schema schema; - private final ResultSetMetaData rsmd; - - private final JdbcConsumer[] consumers; - final CompositeJdbcConsumer compositeConsumer; - - // this is used only if resuing vector schema root is enabled. - private VectorSchemaRoot nextBatch; - - private final int targetBatchSize; - - // This is used to track whether the ResultSet has been fully read, and is needed specifically for - // cases where there - // is a ResultSet having zero rows (empty): - private boolean readComplete = false; - - /** Construct an instance. */ - private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { - this.resultSet = resultSet; - this.config = config; - this.schema = JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config); - this.targetBatchSize = config.getTargetBatchSize(); - - rsmd = resultSet.getMetaData(); - consumers = new JdbcConsumer[rsmd.getColumnCount()]; - this.compositeConsumer = new CompositeJdbcConsumer(consumers); - this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null; - } - - /** Create a ArrowVectorIterator to partially convert data. */ - public static ArrowVectorIterator create(ResultSet resultSet, JdbcToArrowConfig config) - throws SQLException { - ArrowVectorIterator iterator = null; - try { - iterator = new ArrowVectorIterator(resultSet, config); - } catch (Throwable e) { - AutoCloseables.close(e, iterator); - throw new RuntimeException("Error occurred while creating iterator.", e); - } - return iterator; - } - - private void consumeData(VectorSchemaRoot root) { - // consume data - try { - int readRowCount = 0; - if (targetBatchSize == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { - while (resultSet.next()) { - ValueVectorUtility.ensureCapacity(root, readRowCount + 1); - compositeConsumer.consume(resultSet); - readRowCount++; - } - readComplete = true; - } else { - while ((readRowCount < targetBatchSize) && !readComplete) { - if (resultSet.next()) { - compositeConsumer.consume(resultSet); - readRowCount++; - } else { - readComplete = true; - } - } - } - - root.setRowCount(readRowCount); - } catch (Throwable e) { - compositeConsumer.close(); - if (e instanceof JdbcConsumerException) { - throw (JdbcConsumerException) e; - } else { - throw new RuntimeException("Error occurred while consuming data.", e); - } - } - } - - private VectorSchemaRoot createVectorSchemaRoot() throws SQLException { - VectorSchemaRoot root = null; - try { - root = VectorSchemaRoot.create(schema, config.getAllocator()); - if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { - ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); - } - } catch (Throwable e) { - if (root != null) { - root.close(); - } - throw new RuntimeException("Error occurred while creating schema root.", e); - } - initialize(root); - return root; - } - - private void initialize(VectorSchemaRoot root) throws SQLException { - for (int i = 1; i <= consumers.length; i++) { - final JdbcFieldInfo columnFieldInfo = - JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config); - ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); - consumers[i - 1] = - config - .getJdbcConsumerGetter() - .apply( - arrowType, - i, - isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo), - root.getVector(i - 1), - config); - } - } - - // Loads the next schema root or null if no more rows are available. - private void load(VectorSchemaRoot root) { - for (int i = 0; i < consumers.length; i++) { - FieldVector vec = root.getVector(i); - if (config.isReuseVectorSchemaRoot()) { - // if we are reusing the vector schema root, - // we must reset the vector before populating it with data. - vec.reset(); - } - consumers[i].resetValueVector(vec); - } - - consumeData(root); - } - - @Override - public boolean hasNext() { - return !readComplete; - } - - /** - * Gets the next vector. If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, the - * client is responsible for freeing its resources. - * - * @throws JdbcConsumerException on error from VectorConsumer - */ - @Override - public VectorSchemaRoot next() { - Preconditions.checkArgument(hasNext()); - try { - VectorSchemaRoot ret = - config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); - load(ret); - return ret; - } catch (Exception e) { - close(); - if (e instanceof JdbcConsumerException) { - throw (JdbcConsumerException) e; - } else { - throw new RuntimeException("Error occurred while getting next schema root.", e); - } - } - } - - /** - * Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a - * new VectorSchemaRoot is created for each batch, each root must be closed manually by the client - * code. - */ - @Override - public void close() { - if (config.isReuseVectorSchemaRoot()) { - nextBatch.close(); - compositeConsumer.close(); - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java deleted file mode 100644 index 30e734a68d511..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -/** String constants used for metadata returned on Vectors. */ -public class Constants { - private Constants() {} - - public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; - public static final String SQL_SCHEMA_NAME_KEY = "SQL_SCHEMA_NAME"; - public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; - public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; - public static final String SQL_TYPE_KEY = "SQL_TYPE"; -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java deleted file mode 100644 index 6becac0bbc10c..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.util.Preconditions; - -/** - * This class represents the information about a JDBC ResultSet Field that is needed to construct an - * {@link org.apache.arrow.vector.types.pojo.ArrowType}. Currently, this is: - * - *
    - *
  • The JDBC {@link java.sql.Types} type. - *
  • The nullability. - *
  • The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link - * java.sql.Types#NUMERIC} types). - *
  • The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link - * java.sql.Types#NUMERIC} types). - *
- */ -public class JdbcFieldInfo { - private final int column; - private final int jdbcType; - private final int nullability; - private final int precision; - private final int scale; - private final String typeName; - private final int displaySize; - - /** - * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this - * constructor if the field type is {@link java.sql.Types#DECIMAL} or {@link - * java.sql.Types#NUMERIC}; the precision and scale will be set to 0. - * - * @param jdbcType The {@link java.sql.Types} type. - * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link - * java.sql.Types#NUMERIC}. - */ - public JdbcFieldInfo(int jdbcType) { - Preconditions.checkArgument( - (jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC), - "DECIMAL and NUMERIC types require a precision and scale; please use another constructor."); - this.column = 0; - this.jdbcType = jdbcType; - this.nullability = ResultSetMetaData.columnNullableUnknown; - this.precision = 0; - this.scale = 0; - this.typeName = ""; - this.displaySize = 0; - } - - /** - * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, precision, and scale. - * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} - * types. - * - * @param jdbcType The {@link java.sql.Types} type. - * @param precision The field's numeric precision. - * @param scale The field's numeric scale. - */ - public JdbcFieldInfo(int jdbcType, int precision, int scale) { - this.column = 0; - this.jdbcType = jdbcType; - this.nullability = ResultSetMetaData.columnNullableUnknown; - this.precision = precision; - this.scale = scale; - this.typeName = ""; - this.displaySize = 0; - } - - /** - * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, nullability, - * precision, and scale. - * - * @param jdbcType The {@link java.sql.Types} type. - * @param nullability The nullability. Must be one of {@link ResultSetMetaData#columnNoNulls}, - * {@link ResultSetMetaData#columnNullable}, or {@link - * ResultSetMetaData#columnNullableUnknown}. - * @param precision The field's numeric precision. - * @param scale The field's numeric scale. - */ - public JdbcFieldInfo(int jdbcType, int nullability, int precision, int scale) { - this.column = 0; - this.jdbcType = jdbcType; - this.nullability = nullability; - this.precision = precision; - this.scale = scale; - this.typeName = ""; - this.displaySize = 0; - } - - /** - * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} - * column. - * - * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from. - * @param column The column to get the field information for (on a 1-based index). - * @throws SQLException If the column information cannot be retrieved. - * @throws NullPointerException if rsmd is null. - * @throws IllegalArgumentException if column is out of bounds. - */ - public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { - Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null."); - Preconditions.checkArgument( - column > 0, "ResultSetMetaData columns have indices starting at 1."); - Preconditions.checkArgument( - column <= rsmd.getColumnCount(), - "The index must be within the number of columns (1 to %s, inclusive)", - rsmd.getColumnCount()); - - this.column = column; - this.jdbcType = rsmd.getColumnType(column); - this.nullability = rsmd.isNullable(column); - this.precision = rsmd.getPrecision(column); - this.scale = rsmd.getScale(column); - this.typeName = rsmd.getColumnTypeName(column); - this.displaySize = rsmd.getColumnDisplaySize(column); - } - - /** - * Builds a JdbcFieldInfo from the corresponding row from a {@link - * java.sql.DatabaseMetaData#getColumns} ResultSet. - * - * @param rs The {@link java.sql.ResultSet} to get the field information from. - * @throws SQLException If the column information cannot be retrieved. - */ - public JdbcFieldInfo(ResultSet rs) throws SQLException { - this.column = rs.getInt("ORDINAL_POSITION"); - this.jdbcType = rs.getInt("DATA_TYPE"); - this.nullability = rs.getInt("NULLABLE"); - this.precision = rs.getInt("COLUMN_SIZE"); - this.scale = rs.getInt("DECIMAL_DIGITS"); - this.typeName = rs.getString("TYPE_NAME"); - this.displaySize = rs.getInt("CHAR_OCTET_LENGTH"); - } - - /** The {@link java.sql.Types} type. */ - public int getJdbcType() { - return jdbcType; - } - - /** The nullability. */ - public int isNullable() { - return nullability; - } - - /** - * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} - * types. - */ - public int getPrecision() { - return precision; - } - - /** - * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. - */ - public int getScale() { - return scale; - } - - /** The column index for query column. */ - public int getColumn() { - return column; - } - - /** The type name as reported by the database. */ - public String getTypeName() { - return typeName; - } - - /** The max number of characters for the column. */ - public int getDisplaySize() { - return displaySize; - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java deleted file mode 100644 index fd4721bcd9c4e..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; - -/** - * A binder binds JDBC prepared statement parameters to rows of Arrow data from a VectorSchemaRoot. - * - *

Each row of the VectorSchemaRoot will be bound to the configured parameters of the - * PreparedStatement. One row of data is bound at a time. - */ -public class JdbcParameterBinder { - private final PreparedStatement statement; - private final VectorSchemaRoot root; - private final ColumnBinder[] binders; - private final int[] parameterIndices; - private int nextRowIndex; - - /** - * Create a new parameter binder. - * - * @param statement The statement to bind parameters to. - * @param root The VectorSchemaRoot to pull data from. - * @param binders Column binders to translate from Arrow data to JDBC parameters, one per - * parameter. - * @param parameterIndices For each binder in binders, the index of the parameter to bind - * to. - */ - private JdbcParameterBinder( - final PreparedStatement statement, - final VectorSchemaRoot root, - final ColumnBinder[] binders, - int[] parameterIndices) { - Preconditions.checkArgument( - binders.length == parameterIndices.length, - "Number of column binders (%s) must equal number of parameter indices (%s)", - binders.length, - parameterIndices.length); - this.statement = statement; - this.root = root; - this.binders = binders; - this.parameterIndices = parameterIndices; - this.nextRowIndex = 0; - } - - /** - * Initialize a binder with a builder. - * - * @param statement The statement to bind to. The binder does not maintain ownership of the - * statement. - * @param root The {@link VectorSchemaRoot} to pull data from. The binder does not maintain - * ownership of the vector schema root. - */ - public static Builder builder(final PreparedStatement statement, final VectorSchemaRoot root) { - return new Builder(statement, root); - } - - /** Reset the binder (so the root can be updated with new data). */ - public void reset() { - nextRowIndex = 0; - } - - /** - * Bind the next row of data to the parameters of the statement. - * - *

After this, the application should call the desired method on the prepared statement, such - * as {@link PreparedStatement#executeUpdate()}, or {@link PreparedStatement#addBatch()}. - * - * @return true if a row was bound, false if rows were exhausted - */ - public boolean next() throws SQLException { - if (nextRowIndex >= root.getRowCount()) { - return false; - } - for (int i = 0; i < parameterIndices.length; i++) { - final int parameterIndex = parameterIndices[i]; - binders[i].bind(statement, parameterIndex, nextRowIndex); - } - nextRowIndex++; - return true; - } - - /** A builder for a {@link JdbcParameterBinder}. */ - public static class Builder { - private final PreparedStatement statement; - private final VectorSchemaRoot root; - private final Map bindings; - - Builder(PreparedStatement statement, VectorSchemaRoot root) { - this.statement = statement; - this.root = root; - this.bindings = new HashMap<>(); - } - - /** Bind each column to the corresponding parameter in order. */ - public Builder bindAll() { - for (int i = 0; i < root.getFieldVectors().size(); i++) { - bind(/*parameterIndex=*/ i + 1, /*columnIndex=*/ i); - } - return this; - } - - /** Bind the given parameter to the given column using the default binder. */ - public Builder bind(int parameterIndex, int columnIndex) { - return bind(parameterIndex, ColumnBinder.forVector(root.getVector(columnIndex))); - } - - /** Bind the given parameter using the given binder. */ - public Builder bind(int parameterIndex, ColumnBinder binder) { - Preconditions.checkArgument( - parameterIndex > 0, "parameterIndex %d must be positive", parameterIndex); - bindings.put(parameterIndex, binder); - return this; - } - - /** Build the binder. */ - public JdbcParameterBinder build() { - ColumnBinder[] binders = new ColumnBinder[bindings.size()]; - int[] parameterIndices = new int[bindings.size()]; - int index = 0; - for (Map.Entry entry : bindings.entrySet()) { - binders[index] = entry.getValue(); - parameterIndices[index] = entry.getKey(); - index++; - } - return new JdbcParameterBinder(statement, root, binders, parameterIndices); - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java deleted file mode 100644 index 493e53056f945..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; - -/** - * Utility class to convert JDBC objects to columnar Arrow format objects. - * - *

This utility uses following data mapping to map JDBC/SQL datatype to Arrow data types. - * - *

CHAR --> ArrowType.Utf8 NCHAR --> ArrowType.Utf8 VARCHAR --> ArrowType.Utf8 NVARCHAR --> - * ArrowType.Utf8 LONGVARCHAR --> ArrowType.Utf8 LONGNVARCHAR --> ArrowType.Utf8 NUMERIC --> - * ArrowType.Decimal(precision, scale) DECIMAL --> ArrowType.Decimal(precision, scale) BIT --> - * ArrowType.Bool TINYINT --> ArrowType.Int(8, signed) SMALLINT --> ArrowType.Int(16, signed) - * INTEGER --> ArrowType.Int(32, signed) BIGINT --> ArrowType.Int(64, signed) REAL --> - * ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) FLOAT --> - * ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) DOUBLE --> - * ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) BINARY --> ArrowType.Binary VARBINARY --> - * ArrowType.Binary LONGVARBINARY --> ArrowType.Binary DATE --> ArrowType.Date(DateUnit.MILLISECOND) - * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) TIMESTAMP --> - * ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) CLOB --> ArrowType.Utf8 BLOB --> - * ArrowType.Binary - * - * @since 0.10.0 - */ -public class JdbcToArrow { - - /*----------------------------------------------------------------* - | | - | Partial Convert API | - | | - *----------------------------------------------------------------*/ - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow - * objects. Note here uses the default targetBatchSize = 1024. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @param allocator Memory allocator - * @return Arrow Data Objects {@link ArrowVectorIterator} - * @throws SQLException on error - */ - public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, BufferAllocator allocator) throws SQLException, IOException { - Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - - JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); - return sqlToArrowVectorIterator(resultSet, config); - } - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow - * objects. Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value - * 1024. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. - * @return Arrow Data Objects {@link ArrowVectorIterator} - * @throws SQLException on error - */ - public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, JdbcToArrowConfig config) throws SQLException, IOException { - Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); - Preconditions.checkNotNull(config, "The configuration cannot be null"); - return ArrowVectorIterator.create(resultSet, config); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java deleted file mode 100644 index 1bfcfc8fe00aa..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import java.math.RoundingMode; -import java.util.Calendar; -import java.util.Map; -import java.util.function.Function; -import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** - * This class configures the JDBC-to-Arrow conversion process. - * - *

The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, and - * the calendar is used to define the time zone of any {@link - * org.apache.arrow.vector.types.pojo.ArrowType.Timestamp} fields that are created during the - * conversion. Neither field may be null. - * - *

If the includeMetadata flag is set, the Arrow field metadata will contain - * information from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the - * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding {@link - * org.apache.arrow.vector.FieldVector}. - * - *

If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the - * corresponding {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, - * the sub-type information cannot be retrieved from all JDBC implementations (H2 for example, - * returns {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The - * column index or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the - * conversion. - */ -public final class JdbcToArrowConfig { - - public static final int DEFAULT_TARGET_BATCH_SIZE = 1024; - public static final int NO_LIMIT_BATCH_SIZE = -1; - private final Calendar calendar; - private final BufferAllocator allocator; - private final boolean includeMetadata; - private final boolean reuseVectorSchemaRoot; - private final Map arraySubTypesByColumnIndex; - private final Map arraySubTypesByColumnName; - private final Map explicitTypesByColumnIndex; - private final Map explicitTypesByColumnName; - private final Map schemaMetadata; - private final Map> columnMetadataByColumnIndex; - private final RoundingMode bigDecimalRoundingMode; - /** - * The maximum rowCount to read each time when partially convert data. Default value is 1024 and - * -1 means disable partial read. default is -1 which means disable partial read. Note that this - * flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} 1) if targetBatchSize != -1, - * it will convert full data into multiple vectors with valueCount no more than targetBatchSize. - * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link - * ArrowVectorIterator} - */ - private final int targetBatchSize; - - private final Function jdbcToArrowTypeConverter; - private final JdbcConsumerFactory jdbcConsumerGetter; - - /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is - * used to define Arrow Timestamp fields, and to read time-based fields from the JDBC - * ResultSet. - * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based - * results. - */ - JdbcToArrowConfig(BufferAllocator allocator, Calendar calendar) { - this( - allocator, - calendar, - /* include metadata */ false, - /* reuse vector schema root */ false, - /* array sub-types by column index */ null, - /* array sub-types by column name */ null, - DEFAULT_TARGET_BATCH_SIZE, - null, - null); - } - - JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter) { - this( - allocator, - calendar, - includeMetadata, - reuseVectorSchemaRoot, - arraySubTypesByColumnIndex, - arraySubTypesByColumnName, - targetBatchSize, - jdbcToArrowTypeConverter, - null); - } - - /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is - * used to define Arrow Timestamp fields, and to read time-based fields from the JDBC - * ResultSet. - * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based - * results. - * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field - * metadata. - * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for each data load. - * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based). - * @param arraySubTypesByColumnName The type of the JDBC array at the column name. - * @param targetBatchSize The target batch size to be used in preallocation of the resulting - * vectors. - * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow - * type. If set to null, the default mapping will be used, which is defined as: - *

    - *
  • CHAR --> ArrowType.Utf8 - *
  • NCHAR --> ArrowType.Utf8 - *
  • VARCHAR --> ArrowType.Utf8 - *
  • NVARCHAR --> ArrowType.Utf8 - *
  • LONGVARCHAR --> ArrowType.Utf8 - *
  • LONGNVARCHAR --> ArrowType.Utf8 - *
  • NUMERIC --> ArrowType.Decimal(precision, scale) - *
  • DECIMAL --> ArrowType.Decimal(precision, scale) - *
  • BIT --> ArrowType.Bool - *
  • TINYINT --> ArrowType.Int(8, signed) - *
  • SMALLINT --> ArrowType.Int(16, signed) - *
  • INTEGER --> ArrowType.Int(32, signed) - *
  • BIGINT --> ArrowType.Int(64, signed) - *
  • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) - *
  • BINARY --> ArrowType.Binary - *
  • VARBINARY --> ArrowType.Binary - *
  • LONGVARBINARY --> ArrowType.Binary - *
  • DATE --> ArrowType.Date(DateUnit.DAY) - *
  • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) - *
  • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone) - *
  • CLOB --> ArrowType.Utf8 - *
  • BLOB --> ArrowType.Binary - *
  • ARRAY --> ArrowType.List - *
  • STRUCT --> ArrowType.Struct - *
  • NULL --> ArrowType.Null - *
- * - * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in coercion of a BigDecimal - * from a ResultSet having a scale which does not match that of the target vector. Use null - * (default value) to require strict scale matching. - */ - JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter, - RoundingMode bigDecimalRoundingMode) { - - this( - allocator, - calendar, - includeMetadata, - reuseVectorSchemaRoot, - arraySubTypesByColumnIndex, - arraySubTypesByColumnName, - targetBatchSize, - jdbcToArrowTypeConverter, - null, - null, - null, - null, - bigDecimalRoundingMode); - } - - JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter, - Map explicitTypesByColumnIndex, - Map explicitTypesByColumnName, - Map schemaMetadata, - Map> columnMetadataByColumnIndex, - RoundingMode bigDecimalRoundingMode) { - this( - allocator, - calendar, - includeMetadata, - reuseVectorSchemaRoot, - arraySubTypesByColumnIndex, - arraySubTypesByColumnName, - targetBatchSize, - jdbcToArrowTypeConverter, - null, - explicitTypesByColumnIndex, - explicitTypesByColumnName, - schemaMetadata, - columnMetadataByColumnIndex, - bigDecimalRoundingMode); - } - - JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter, - JdbcConsumerFactory jdbcConsumerGetter, - Map explicitTypesByColumnIndex, - Map explicitTypesByColumnName, - Map schemaMetadata, - Map> columnMetadataByColumnIndex, - RoundingMode bigDecimalRoundingMode) { - Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); - this.allocator = allocator; - this.calendar = calendar; - this.includeMetadata = includeMetadata; - this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; - this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex; - this.arraySubTypesByColumnName = arraySubTypesByColumnName; - this.targetBatchSize = targetBatchSize; - this.explicitTypesByColumnIndex = explicitTypesByColumnIndex; - this.explicitTypesByColumnName = explicitTypesByColumnName; - this.schemaMetadata = schemaMetadata; - this.columnMetadataByColumnIndex = columnMetadataByColumnIndex; - this.bigDecimalRoundingMode = bigDecimalRoundingMode; - - // set up type converter - this.jdbcToArrowTypeConverter = - jdbcToArrowTypeConverter != null - ? jdbcToArrowTypeConverter - : (jdbcFieldInfo) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); - - this.jdbcConsumerGetter = - jdbcConsumerGetter != null ? jdbcConsumerGetter : JdbcToArrowUtils::getConsumer; - } - - /** - * The calendar to use when defining Arrow Timestamp fields and retrieving {@link java.sql.Date}, - * {@link java.sql.Time}, or {@link java.sql.Timestamp} data types from the {@link - * java.sql.ResultSet}, or null if not converting. - * - * @return the calendar. - */ - public Calendar getCalendar() { - return calendar; - } - - /** - * The Arrow memory allocator. - * - * @return the allocator. - */ - public BufferAllocator getAllocator() { - return allocator; - } - - /** - * Whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. - * - * @return true to include field metadata, false to exclude it. - */ - public boolean shouldIncludeMetadata() { - return includeMetadata; - } - - /** Get the target batch size for partial read. */ - public int getTargetBatchSize() { - return targetBatchSize; - } - - /** Get whether it is allowed to reuse the vector schema root. */ - public boolean isReuseVectorSchemaRoot() { - return reuseVectorSchemaRoot; - } - - /** Gets the mapping between JDBC type information to Arrow type. */ - public Function getJdbcToArrowTypeConverter() { - return jdbcToArrowTypeConverter; - } - - /** Gets the JDBC consumer getter. */ - public JdbcConsumerFactory getJdbcConsumerGetter() { - return jdbcConsumerGetter; - } - - /** - * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index. - * - * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link - * java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not - * defined. - */ - public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { - if (arraySubTypesByColumnIndex == null) { - return null; - } else { - return arraySubTypesByColumnIndex.get(index); - } - } - - /** - * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name. - * - * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link - * java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not - * defined. - */ - public JdbcFieldInfo getArraySubTypeByColumnName(String name) { - if (arraySubTypesByColumnName == null) { - return null; - } else { - return arraySubTypesByColumnName.get(name); - } - } - - /** - * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column index. - * - * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type - * mapping. - * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. - */ - public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) { - if (explicitTypesByColumnIndex == null) { - return null; - } else { - return explicitTypesByColumnIndex.get(index); - } - } - - /** - * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column name. - * - * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type - * mapping. - * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. - */ - public JdbcFieldInfo getExplicitTypeByColumnName(String name) { - if (explicitTypesByColumnName == null) { - return null; - } else { - return explicitTypesByColumnName.get(name); - } - } - - /** Return schema level metadata or null if not provided. */ - public Map getSchemaMetadata() { - return schemaMetadata; - } - - /** Return metadata from columnIndex->meta map on per field basis or null if not provided. */ - public Map> getColumnMetadataByColumnIndex() { - return columnMetadataByColumnIndex; - } - - public RoundingMode getBigDecimalRoundingMode() { - return bigDecimalRoundingMode; - } - - /** Interface for a function that gets a JDBC consumer for the given values. */ - @FunctionalInterface - public interface JdbcConsumerFactory { - JdbcConsumer apply( - ArrowType arrowType, - int columnIndex, - boolean nullable, - FieldVector vector, - JdbcToArrowConfig config); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java deleted file mode 100644 index ea9ffe55d334a..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE; - -import java.math.RoundingMode; -import java.util.Calendar; -import java.util.Map; -import java.util.function.Function; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** This class builds {@link JdbcToArrowConfig}s. */ -public class JdbcToArrowConfigBuilder { - private Calendar calendar; - private BufferAllocator allocator; - private boolean includeMetadata; - private boolean reuseVectorSchemaRoot; - private Map arraySubTypesByColumnIndex; - private Map arraySubTypesByColumnName; - private Map explicitTypesByColumnIndex; - private Map explicitTypesByColumnName; - private Map schemaMetadata; - private Map> columnMetadataByColumnIndex; - private int targetBatchSize; - private Function jdbcToArrowTypeConverter; - private JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter; - private RoundingMode bigDecimalRoundingMode; - - /** - * Default constructor for the JdbcToArrowConfigBuilder}. Use the setter methods for - * the allocator and calendar; the allocator must be set. Otherwise, {@link #build()} will throw a - * {@link NullPointerException}. - */ - public JdbcToArrowConfigBuilder() { - this.allocator = null; - this.calendar = null; - this.includeMetadata = false; - this.reuseVectorSchemaRoot = false; - this.arraySubTypesByColumnIndex = null; - this.arraySubTypesByColumnName = null; - this.explicitTypesByColumnIndex = null; - this.explicitTypesByColumnName = null; - this.schemaMetadata = null; - this.columnMetadataByColumnIndex = null; - this.bigDecimalRoundingMode = null; - } - - /** - * Constructor for the JdbcToArrowConfigBuilder. The allocator is required, and a - * {@link NullPointerException} will be thrown if it is null. - * - *

The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is - * used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link - * java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single, - * common time zone when reading from the result set. - * - * @param allocator The Arrow Vector memory allocator. - * @param calendar The calendar to use when constructing timestamp fields. - */ - public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) { - this(); - - Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); - - this.allocator = allocator; - this.calendar = calendar; - this.includeMetadata = false; - this.reuseVectorSchemaRoot = false; - this.targetBatchSize = DEFAULT_TARGET_BATCH_SIZE; - } - - /** - * Constructor for the JdbcToArrowConfigBuilder. Both the allocator and calendar are - * required. A {@link NullPointerException} will be thrown if either of those arguments is - * null. - * - *

The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is - * used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link - * java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single, - * common time zone when reading from the result set. - * - *

The includeMetadata argument, if true will cause various - * information about each database field to be added to the Vector Schema's field metadata. - * - * @param allocator The Arrow Vector memory allocator. - * @param calendar The calendar to use when constructing timestamp fields. - */ - public JdbcToArrowConfigBuilder( - BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { - this(allocator, calendar); - this.includeMetadata = includeMetadata; - } - - /** - * Sets the memory allocator to use when constructing the Arrow vectors from the ResultSet. - * - * @param allocator the allocator to set. - * @exception NullPointerException if allocator is null. - */ - public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) { - Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); - this.allocator = allocator; - return this; - } - - /** - * Sets the {@link Calendar} to use when constructing timestamp fields in the Arrow schema, and - * reading time-based fields from the JDBC ResultSet. - * - * @param calendar the calendar to set. - */ - public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { - this.calendar = calendar; - return this; - } - - /** - * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. - * - * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field - * metadata. - * @return This instance of the JdbcToArrowConfig, for chaining. - */ - public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { - this.includeMetadata = includeMetadata; - return this; - } - - /** - * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link - * java.sql.Types#ARRAY}. The column index is 1-based, to match the JDBC column index. - * - * @param map The mapping. - * @return This instance of the JdbcToArrowConfig, for chaining. - */ - public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map map) { - this.arraySubTypesByColumnIndex = map; - return this; - } - - /** - * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link - * java.sql.Types#ARRAY}. - * - * @param map The mapping. - * @return This instance of the JdbcToArrowConfig, for chaining. - */ - public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map map) { - this.arraySubTypesByColumnName = map; - return this; - } - - /** - * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for column types. - * - *

This can be useful to override type information from JDBC drivers that provide incomplete - * type info, e.g. DECIMAL with precision = scale = 0. - * - *

The column index is 1-based, to match the JDBC column index. - * - * @param map The mapping. - */ - public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map map) { - this.explicitTypesByColumnIndex = map; - return this; - } - - /** - * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for column types. - * - *

This can be useful to override type information from JDBC drivers that provide incomplete - * type info, e.g. DECIMAL with precision = scale = 0. - * - * @param map The mapping. - */ - public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map map) { - this.explicitTypesByColumnName = map; - return this; - } - - /** - * Set the target number of rows to convert at once. - * - *

Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once. - */ - public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { - this.targetBatchSize = targetBatchSize; - return this; - } - - /** - * Set the function used to convert JDBC types to Arrow types. - * - *

Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, - * Calendar)}. - * - * @see JdbcToArrowUtils#reportUnsupportedTypesAsUnknown(Function) - */ - public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( - Function jdbcToArrowTypeConverter) { - this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter; - return this; - } - - /** - * Set the function used to get a JDBC consumer for a given type. - * - *

Defaults to wrapping {@link JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean, - * FieldVector, JdbcToArrowConfig)}. - */ - public JdbcToArrowConfigBuilder setJdbcConsumerGetter( - JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter) { - this.jdbcConsumerGetter = jdbcConsumerGetter; - return this; - } - - /** - * Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each - * iteration, or to allocate a new one. - */ - public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) { - this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; - return this; - } - - /** Set metadata for schema. */ - public JdbcToArrowConfigBuilder setSchemaMetadata(Map schemaMetadata) { - this.schemaMetadata = schemaMetadata; - return this; - } - - /** Set metadata from columnIndex->meta map on per field basis. */ - public JdbcToArrowConfigBuilder setColumnMetadataByColumnIndex( - Map> columnMetadataByColumnIndex) { - this.columnMetadataByColumnIndex = columnMetadataByColumnIndex; - return this; - } - - /** - * Set the rounding mode used when the scale of the actual value does not match the declared - * scale. - * - *

By default, an error is raised in such cases. - */ - public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecimalRoundingMode) { - this.bigDecimalRoundingMode = bigDecimalRoundingMode; - return this; - } - - /** - * This builds the {@link JdbcToArrowConfig} from the provided {@link BufferAllocator} and {@link - * Calendar}. - * - * @return The built {@link JdbcToArrowConfig} - * @throws NullPointerException if either the allocator or calendar was not set. - */ - public JdbcToArrowConfig build() { - return new JdbcToArrowConfig( - allocator, - calendar, - includeMetadata, - reuseVectorSchemaRoot, - arraySubTypesByColumnIndex, - arraySubTypesByColumnName, - targetBatchSize, - jdbcToArrowTypeConverter, - jdbcConsumerGetter, - explicitTypesByColumnIndex, - explicitTypesByColumnName, - schemaMetadata, - columnMetadataByColumnIndex, - bigDecimalRoundingMode); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java deleted file mode 100644 index aecb734a8bbf7..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java +++ /dev/null @@ -1,567 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; -import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; -import static org.apache.arrow.vector.types.Types.MinorType; - -import java.io.IOException; -import java.math.RoundingMode; -import java.sql.Date; -import java.sql.ParameterMetaData; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Timestamp; -import java.sql.Types; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Calendar; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.TimeZone; -import java.util.function.Function; -import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer; -import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer; -import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer; -import org.apache.arrow.adapter.jdbc.consumer.BitConsumer; -import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer; -import org.apache.arrow.adapter.jdbc.consumer.DateConsumer; -import org.apache.arrow.adapter.jdbc.consumer.Decimal256Consumer; -import org.apache.arrow.adapter.jdbc.consumer.DecimalConsumer; -import org.apache.arrow.adapter.jdbc.consumer.DoubleConsumer; -import org.apache.arrow.adapter.jdbc.consumer.FloatConsumer; -import org.apache.arrow.adapter.jdbc.consumer.IntConsumer; -import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; -import org.apache.arrow.adapter.jdbc.consumer.MapConsumer; -import org.apache.arrow.adapter.jdbc.consumer.NullConsumer; -import org.apache.arrow.adapter.jdbc.consumer.SmallIntConsumer; -import org.apache.arrow.adapter.jdbc.consumer.TimeConsumer; -import org.apache.arrow.adapter.jdbc.consumer.TimestampConsumer; -import org.apache.arrow.adapter.jdbc.consumer.TimestampTZConsumer; -import org.apache.arrow.adapter.jdbc.consumer.TinyIntConsumer; -import org.apache.arrow.adapter.jdbc.consumer.VarCharConsumer; -import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.extension.OpaqueType; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector - * objects. - * - * @since 0.10.0 - */ -public class JdbcToArrowUtils { - - private static final int JDBC_ARRAY_VALUE_COLUMN = 2; - - /** Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. */ - public static Calendar getUtcCalendar() { - return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); - } - - /** - * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. - * - * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. - * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. - * @return {@link Schema} - * @throws SQLException on error - */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) - throws SQLException { - Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - - return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); - } - - /** - * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. - * - * @param parameterMetaData The ResultSetMetaData containing the results, to read the JDBC - * metadata from. - * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. - * @return {@link Schema} - * @throws SQLException on error - */ - public static Schema jdbcToArrowSchema( - final ParameterMetaData parameterMetaData, final Calendar calendar) throws SQLException { - Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - Preconditions.checkNotNull(parameterMetaData); - final List parameterFields = new ArrayList<>(parameterMetaData.getParameterCount()); - for (int parameterCounter = 1; - parameterCounter <= parameterMetaData.getParameterCount(); - parameterCounter++) { - final int jdbcDataType = parameterMetaData.getParameterType(parameterCounter); - final int jdbcIsNullable = parameterMetaData.isNullable(parameterCounter); - final boolean arrowIsNullable = jdbcIsNullable != ParameterMetaData.parameterNoNulls; - final int precision = parameterMetaData.getPrecision(parameterCounter); - final int scale = parameterMetaData.getScale(parameterCounter); - final ArrowType arrowType = - getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), calendar); - final FieldType fieldType = new FieldType(arrowIsNullable, arrowType, /*dictionary=*/ null); - parameterFields.add(new Field(null, fieldType, null)); - } - - return new Schema(parameterFields); - } - - /** - * Converts the provided JDBC type to its respective {@link ArrowType} counterpart. - * - * @param fieldInfo the {@link JdbcFieldInfo} with information about the original JDBC type. - * @param calendar the {@link Calendar} to use for datetime data types. - * @return a new {@link ArrowType}. - */ - public static ArrowType getArrowTypeFromJdbcType( - final JdbcFieldInfo fieldInfo, final Calendar calendar) { - switch (fieldInfo.getJdbcType()) { - case Types.BOOLEAN: - case Types.BIT: - return new ArrowType.Bool(); - case Types.TINYINT: - return new ArrowType.Int(8, true); - case Types.SMALLINT: - return new ArrowType.Int(16, true); - case Types.INTEGER: - return new ArrowType.Int(32, true); - case Types.BIGINT: - return new ArrowType.Int(64, true); - case Types.NUMERIC: - case Types.DECIMAL: - int precision = fieldInfo.getPrecision(); - int scale = fieldInfo.getScale(); - if (precision > 38) { - return new ArrowType.Decimal(precision, scale, 256); - } else { - return new ArrowType.Decimal(precision, scale, 128); - } - case Types.REAL: - case Types.FLOAT: - return new ArrowType.FloatingPoint(SINGLE); - case Types.DOUBLE: - return new ArrowType.FloatingPoint(DOUBLE); - case Types.CHAR: - case Types.NCHAR: - case Types.VARCHAR: - case Types.NVARCHAR: - case Types.LONGVARCHAR: - case Types.LONGNVARCHAR: - case Types.CLOB: - return new ArrowType.Utf8(); - case Types.DATE: - return new ArrowType.Date(DateUnit.DAY); - case Types.TIME: - return new ArrowType.Time(TimeUnit.MILLISECOND, 32); - case Types.TIMESTAMP: - final String timezone; - if (calendar != null) { - timezone = calendar.getTimeZone().getID(); - } else { - timezone = null; - } - return new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone); - case Types.BINARY: - case Types.VARBINARY: - case Types.LONGVARBINARY: - case Types.BLOB: - return new ArrowType.Binary(); - case Types.ARRAY: - return new ArrowType.List(); - case Types.NULL: - return new ArrowType.Null(); - case Types.STRUCT: - return new ArrowType.Struct(); - default: - throw new UnsupportedOperationException("Unmapped JDBC type: " + fieldInfo.getJdbcType()); - } - } - - /** - * Wrap a JDBC to Arrow type converter such that {@link UnsupportedOperationException} becomes - * {@link OpaqueType}. - * - * @param typeConverter The type converter to wrap. - * @param vendorName The database name to report as the Opaque type's vendor name. - */ - public static Function reportUnsupportedTypesAsOpaque( - Function typeConverter, String vendorName) { - return (final JdbcFieldInfo fieldInfo) -> { - try { - return typeConverter.apply(fieldInfo); - } catch (UnsupportedOperationException e) { - return new OpaqueType(MinorType.NULL.getType(), fieldInfo.getTypeName(), vendorName); - } - }; - } - - /** - * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}. - * - *

If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns true, the - * following fields will be added to the {@link FieldType#getMetadata()}: - * - *

    - *
  • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link - * ResultSetMetaData#getCatalogName(int)} - *
  • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link - * ResultSetMetaData#getTableName(int)} - *
  • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link - * ResultSetMetaData#getColumnLabel(int)} - *
  • {@link Constants#SQL_TYPE_KEY} representing {@link - * ResultSetMetaData#getColumnTypeName(int)} - *
- * - *

If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be - * used to look up the array sub-type field. The {@link - * JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be checked first, followed by - * the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. - * - * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. - * @param config The configuration to use when constructing the schema. - * @return {@link Schema} - * @throws SQLException on error - * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} - * but the config does not have a sub-type definition for it. - */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) - throws SQLException { - Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); - Preconditions.checkNotNull(config, "The configuration object must not be null"); - - List fields = new ArrayList<>(); - int columnCount = rsmd.getColumnCount(); - for (int i = 1; i <= columnCount; i++) { - final String columnName = rsmd.getColumnLabel(i); - - final Map columnMetadata = - config.getColumnMetadataByColumnIndex() != null - ? config.getColumnMetadataByColumnIndex().get(i) - : null; - final Map metadata; - if (config.shouldIncludeMetadata()) { - metadata = new HashMap<>(); - metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i)); - metadata.put(Constants.SQL_SCHEMA_NAME_KEY, rsmd.getSchemaName(i)); - metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i)); - metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName); - metadata.put(Constants.SQL_TYPE_KEY, rsmd.getColumnTypeName(i)); - if (columnMetadata != null && !columnMetadata.isEmpty()) { - metadata.putAll(columnMetadata); - } - } else { - if (columnMetadata != null && !columnMetadata.isEmpty()) { - metadata = columnMetadata; - } else { - metadata = null; - } - } - - final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); - final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); - if (arrowType != null) { - final FieldType fieldType = - new FieldType( - isColumnNullable(rsmd, i, columnFieldInfo), - arrowType, /* dictionary encoding */ - null, - metadata); - - List children = null; - if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) { - final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config); - if (arrayFieldInfo == null) { - throw new IllegalArgumentException( - "Configuration does not provide a mapping for array column " + i); - } - children = new ArrayList(); - final ArrowType childType = config.getJdbcToArrowTypeConverter().apply(arrayFieldInfo); - children.add(new Field("child", FieldType.nullable(childType), null)); - } else if (arrowType.getTypeID() == ArrowType.ArrowTypeID.Map) { - FieldType mapType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - FieldType keyType = new FieldType(false, new ArrowType.Utf8(), null, null); - FieldType valueType = new FieldType(false, new ArrowType.Utf8(), null, null); - children = new ArrayList<>(); - children.add( - new Field( - "child", - mapType, - Arrays.asList( - new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, valueType, null)))); - } - - fields.add(new Field(columnName, fieldType, children)); - } - } - return new Schema(fields, config.getSchemaMetadata()); - } - - static JdbcFieldInfo getJdbcFieldInfoForColumn( - ResultSetMetaData rsmd, int arrayColumn, JdbcToArrowConfig config) throws SQLException { - Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); - Preconditions.checkNotNull(config, "Configuration must not be null"); - Preconditions.checkArgument( - arrayColumn > 0, "ResultSetMetaData columns start with 1; column cannot be less than 1"); - Preconditions.checkArgument( - arrayColumn <= rsmd.getColumnCount(), - "Column number cannot be more than the number of columns"); - - JdbcFieldInfo fieldInfo = config.getExplicitTypeByColumnIndex(arrayColumn); - if (fieldInfo == null) { - fieldInfo = config.getExplicitTypeByColumnName(rsmd.getColumnLabel(arrayColumn)); - } - if (fieldInfo != null) { - return fieldInfo; - } - return new JdbcFieldInfo(rsmd, arrayColumn); - } - - /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is. - * If no sub-type can be found, returns null. - */ - private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( - ResultSetMetaData rsmd, int arrayColumn, JdbcToArrowConfig config) throws SQLException { - - Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); - Preconditions.checkNotNull(config, "Configuration must not be null"); - Preconditions.checkArgument( - arrayColumn > 0, "ResultSetMetaData columns start with 1; column cannot be less than 1"); - Preconditions.checkArgument( - arrayColumn <= rsmd.getColumnCount(), - "Column number cannot be more than the number of columns"); - - JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn); - if (fieldInfo == null) { - fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnLabel(arrayColumn)); - } - return fieldInfo; - } - - /** - * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate - * the given Arrow Vector objects. - * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate - * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link - * Timestamp} data types from the {@link ResultSet}, or null if not converting. - * @throws SQLException on error - */ - public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) - throws SQLException, IOException { - - Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - - jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); - } - - static boolean isColumnNullable( - ResultSetMetaData resultSetMetadata, int index, JdbcFieldInfo info) throws SQLException { - int nullableValue; - if (info != null && info.isNullable() != ResultSetMetaData.columnNullableUnknown) { - nullableValue = info.isNullable(); - } else { - nullableValue = resultSetMetadata.isNullable(index); - } - return nullableValue == ResultSetMetaData.columnNullable - || nullableValue == ResultSetMetaData.columnNullableUnknown; - } - - /** - * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate - * the given Arrow Vector objects. - * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate - * @param config The configuration to use when reading the data. - * @throws SQLException on error - * @throws JdbcConsumerException on error from VectorConsumer - */ - public static void jdbcToArrowVectors( - ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) - throws SQLException, IOException { - - ResultSetMetaData rsmd = rs.getMetaData(); - int columnCount = rsmd.getColumnCount(); - - JdbcConsumer[] consumers = new JdbcConsumer[columnCount]; - for (int i = 1; i <= columnCount; i++) { - FieldVector vector = root.getVector(rsmd.getColumnLabel(i)); - final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); - consumers[i - 1] = - getConsumer( - vector.getField().getType(), - i, - isColumnNullable(rsmd, i, columnFieldInfo), - vector, - config); - } - - CompositeJdbcConsumer compositeConsumer = null; - // Only clean resources when occurs error, - // vectors within consumers are useful and users are responsible for its close. - try { - compositeConsumer = new CompositeJdbcConsumer(consumers); - int readRowCount = 0; - if (config.getTargetBatchSize() == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { - while (rs.next()) { - ValueVectorUtility.ensureCapacity(root, readRowCount + 1); - compositeConsumer.consume(rs); - readRowCount++; - } - } else { - while (readRowCount < config.getTargetBatchSize() && rs.next()) { - compositeConsumer.consume(rs); - readRowCount++; - } - } - - root.setRowCount(readRowCount); - } catch (Exception e) { - // error occurs and clean up resources. - if (compositeConsumer != null) { - compositeConsumer.close(); - } - throw e; - } - } - - /** - * Default function used for JdbcConsumerFactory. This function gets a JdbcConsumer for the given - * column based on the Arrow type and provided vector. - * - * @param arrowType Arrow type for the column. - * @param columnIndex Column index to fetch from the ResultSet - * @param nullable Whether the value is nullable or not - * @param vector Vector to store the consumed value - * @param config Associated JdbcToArrowConfig, used mainly for the Calendar. - * @return {@link JdbcConsumer} - */ - public static JdbcConsumer getConsumer( - ArrowType arrowType, - int columnIndex, - boolean nullable, - FieldVector vector, - JdbcToArrowConfig config) { - final Calendar calendar = config.getCalendar(); - - switch (arrowType.getTypeID()) { - case Bool: - return BitConsumer.createConsumer((BitVector) vector, columnIndex, nullable); - case Int: - switch (((ArrowType.Int) arrowType).getBitWidth()) { - case 8: - return TinyIntConsumer.createConsumer((TinyIntVector) vector, columnIndex, nullable); - case 16: - return SmallIntConsumer.createConsumer((SmallIntVector) vector, columnIndex, nullable); - case 32: - return IntConsumer.createConsumer((IntVector) vector, columnIndex, nullable); - case 64: - return BigIntConsumer.createConsumer((BigIntVector) vector, columnIndex, nullable); - default: - return null; - } - case Decimal: - final RoundingMode bigDecimalRoundingMode = config.getBigDecimalRoundingMode(); - if (((ArrowType.Decimal) arrowType).getBitWidth() == 256) { - return Decimal256Consumer.createConsumer( - (Decimal256Vector) vector, columnIndex, nullable, bigDecimalRoundingMode); - } else { - return DecimalConsumer.createConsumer( - (DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode); - } - case FloatingPoint: - switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) { - case SINGLE: - return FloatConsumer.createConsumer((Float4Vector) vector, columnIndex, nullable); - case DOUBLE: - return DoubleConsumer.createConsumer((Float8Vector) vector, columnIndex, nullable); - default: - return null; - } - case Utf8: - case LargeUtf8: - return VarCharConsumer.createConsumer((VarCharVector) vector, columnIndex, nullable); - case Binary: - case LargeBinary: - return BinaryConsumer.createConsumer((VarBinaryVector) vector, columnIndex, nullable); - case Date: - return DateConsumer.createConsumer((DateDayVector) vector, columnIndex, nullable, calendar); - case Time: - return TimeConsumer.createConsumer( - (TimeMilliVector) vector, columnIndex, nullable, calendar); - case Timestamp: - if (config.getCalendar() == null) { - return TimestampConsumer.createConsumer( - (TimeStampMilliVector) vector, columnIndex, nullable); - } else { - return TimestampTZConsumer.createConsumer( - (TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); - } - case List: - FieldVector childVector = ((ListVector) vector).getDataVector(); - JdbcConsumer delegate = - getConsumer( - childVector.getField().getType(), - JDBC_ARRAY_VALUE_COLUMN, - childVector.getField().isNullable(), - childVector, - config); - return ArrayConsumer.createConsumer((ListVector) vector, delegate, columnIndex, nullable); - case Map: - return MapConsumer.createConsumer((MapVector) vector, columnIndex, nullable); - case Null: - return new NullConsumer((NullVector) vector); - default: - // no-op, shouldn't get here - throw new UnsupportedOperationException("No consumer for Arrow type: " + arrowType); - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java deleted file mode 100644 index d7b62c43acf6f..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import org.apache.arrow.vector.FieldVector; - -/** - * Base class for ColumnBinder implementations. - * - * @param The concrete FieldVector subtype. - */ -public abstract class BaseColumnBinder implements ColumnBinder { - protected final V vector; - protected final int jdbcType; - - public BaseColumnBinder(V vector, int jdbcType) { - this.vector = vector; - this.jdbcType = jdbcType; - } - - @Override - public int getJdbcType() { - return jdbcType; - } - - @Override - public V getVector() { - return vector; - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java deleted file mode 100644 index b9dfcb0d6c956..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.BigIntVector; - -/** A column binder for 8-bit integers. */ -public class BigIntBinder extends BaseColumnBinder { - public BigIntBinder(BigIntVector vector) { - this(vector, Types.BIGINT); - } - - public BigIntBinder(BigIntVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final long value = vector.getDataBuffer().getLong((long) rowIndex * BigIntVector.TYPE_WIDTH); - statement.setLong(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java deleted file mode 100644 index c9db194f652ff..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.BitVector; - -/** A column binder for booleans. */ -public class BitBinder extends BaseColumnBinder { - public BitBinder(BitVector vector) { - this(vector, Types.BOOLEAN); - } - - public BitBinder(BitVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - // See BitVector#getBit - final int byteIndex = rowIndex >> 3; - final byte b = vector.getDataBuffer().getByte(byteIndex); - final int bitIndex = rowIndex & 7; - final int value = (b >> bitIndex) & 0x01; - statement.setBoolean(parameterIndex, value != 0); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java deleted file mode 100644 index c38db68234ecf..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import org.apache.arrow.vector.FieldVector; - -/** A helper to bind values from a wrapped Arrow vector to a JDBC PreparedStatement. */ -public interface ColumnBinder { - /** - * Bind the given row to the given parameter. - * - * @param statement The statement to bind to. - * @param parameterIndex The parameter to bind to (1-indexed) - * @param rowIndex The row to bind values from (0-indexed) - * @throws SQLException if an error occurs - */ - void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException; - - /** - * Get the JDBC type code used by this binder. - * - * @return A type code from {@link java.sql.Types}. - */ - int getJdbcType(); - - /** Get the vector used by this binder. */ - FieldVector getVector(); - - /** Create a column binder for a vector, using the default JDBC type code for null values. */ - static ColumnBinder forVector(FieldVector vector) { - return forVector(vector, /*jdbcType*/ null); - } - - /** - * Create a column binder for a vector, overriding the JDBC type code used for null values. - * - * @param vector The vector that the column binder will wrap. - * @param jdbcType The JDBC type code to use (or null to use the default). - */ - static ColumnBinder forVector(FieldVector vector, Integer jdbcType) { - final ColumnBinder binder = - vector.getField().getType().accept(new ColumnBinderArrowTypeVisitor(vector, jdbcType)); - if (vector.getField().isNullable()) { - return new NullableColumnBinder(binder); - } - return binder; - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java deleted file mode 100644 index a3d615a7e1958..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.Types; -import java.time.ZoneId; -import java.util.Calendar; -import java.util.TimeZone; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** - * Visitor to create the base ColumnBinder for a vector. - * - *

To handle null values, wrap the returned binder in a {@link NullableColumnBinder}. - */ -public class ColumnBinderArrowTypeVisitor implements ArrowType.ArrowTypeVisitor { - private final FieldVector vector; - private final Integer jdbcType; - - /** - * Create a binder using a custom JDBC type code. - * - * @param vector The vector that the binder will wrap. - * @param jdbcType The JDBC type code (or null to use the default). - */ - public ColumnBinderArrowTypeVisitor(FieldVector vector, Integer jdbcType) { - this.vector = vector; - this.jdbcType = jdbcType; - } - - @Override - public ColumnBinder visit(ArrowType.Null type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Struct type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.List type) { - return new ListBinder((ListVector) vector); - } - - @Override - public ColumnBinder visit(ArrowType.LargeList type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.FixedSizeList type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Union type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.RunEndEncoded type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Map type) { - return new MapBinder((MapVector) vector); - } - - @Override - public ColumnBinder visit(ArrowType.Int type) { - if (!type.getIsSigned()) { - throw new UnsupportedOperationException( - "No column binder implemented for unsigned type " + type); - } - switch (type.getBitWidth()) { - case 8: - return jdbcType == null - ? new TinyIntBinder((TinyIntVector) vector) - : new TinyIntBinder((TinyIntVector) vector, jdbcType); - case 16: - return jdbcType == null - ? new SmallIntBinder((SmallIntVector) vector) - : new SmallIntBinder((SmallIntVector) vector, jdbcType); - case 32: - return jdbcType == null - ? new IntBinder((IntVector) vector) - : new IntBinder((IntVector) vector, jdbcType); - case 64: - return jdbcType == null - ? new BigIntBinder((BigIntVector) vector) - : new BigIntBinder((BigIntVector) vector, jdbcType); - default: - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - } - - @Override - public ColumnBinder visit(ArrowType.FloatingPoint type) { - switch (type.getPrecision()) { - case SINGLE: - return jdbcType == null - ? new Float4Binder((Float4Vector) vector) - : new Float4Binder((Float4Vector) vector, jdbcType); - case DOUBLE: - return jdbcType == null - ? new Float8Binder((Float8Vector) vector) - : new Float8Binder((Float8Vector) vector, jdbcType); - default: - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - } - - @Override - public ColumnBinder visit(ArrowType.Utf8 type) { - VarCharVector varChar = (VarCharVector) vector; - return jdbcType == null - ? new VarCharBinder<>(varChar, Types.VARCHAR) - : new VarCharBinder<>(varChar, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.Utf8View type) { - throw new UnsupportedOperationException( - "Column binder implemented for type " + type + " is not supported"); - } - - @Override - public ColumnBinder visit(ArrowType.LargeUtf8 type) { - LargeVarCharVector varChar = (LargeVarCharVector) vector; - return jdbcType == null - ? new VarCharBinder<>(varChar, Types.LONGVARCHAR) - : new VarCharBinder<>(varChar, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.Binary type) { - VarBinaryVector varBinary = (VarBinaryVector) vector; - return jdbcType == null - ? new VarBinaryBinder<>(varBinary, Types.VARBINARY) - : new VarBinaryBinder<>(varBinary, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.BinaryView type) { - throw new UnsupportedOperationException( - "Column binder implemented for type " + type + " is not supported"); - } - - @Override - public ColumnBinder visit(ArrowType.LargeBinary type) { - LargeVarBinaryVector varBinary = (LargeVarBinaryVector) vector; - return jdbcType == null - ? new VarBinaryBinder<>(varBinary, Types.LONGVARBINARY) - : new VarBinaryBinder<>(varBinary, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.FixedSizeBinary type) { - FixedSizeBinaryVector binary = (FixedSizeBinaryVector) vector; - return jdbcType == null - ? new FixedSizeBinaryBinder(binary, Types.BINARY) - : new FixedSizeBinaryBinder(binary, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.Bool type) { - return jdbcType == null - ? new BitBinder((BitVector) vector) - : new BitBinder((BitVector) vector, jdbcType); - } - - @Override - public ColumnBinder visit(ArrowType.Decimal type) { - if (type.getBitWidth() == 128) { - DecimalVector decimalVector = (DecimalVector) vector; - return jdbcType == null - ? new Decimal128Binder(decimalVector) - : new Decimal128Binder(decimalVector, jdbcType); - } else if (type.getBitWidth() == 256) { - Decimal256Vector decimalVector = (Decimal256Vector) vector; - return jdbcType == null - ? new Decimal256Binder(decimalVector) - : new Decimal256Binder(decimalVector, jdbcType); - } - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Date type) { - switch (type.getUnit()) { - case DAY: - return jdbcType == null - ? new DateDayBinder((DateDayVector) vector) - : new DateDayBinder((DateDayVector) vector, /*calendar*/ null, jdbcType); - case MILLISECOND: - return jdbcType == null - ? new DateMilliBinder((DateMilliVector) vector) - : new DateMilliBinder((DateMilliVector) vector, /*calendar*/ null, jdbcType); - default: - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - } - - @Override - public ColumnBinder visit(ArrowType.Time type) { - switch (type.getUnit()) { - case SECOND: - return jdbcType == null - ? new Time32Binder((TimeSecVector) vector) - : new Time32Binder((TimeSecVector) vector, jdbcType); - case MILLISECOND: - return jdbcType == null - ? new Time32Binder((TimeMilliVector) vector) - : new Time32Binder((TimeMilliVector) vector, jdbcType); - case MICROSECOND: - return jdbcType == null - ? new Time64Binder((TimeMicroVector) vector) - : new Time64Binder((TimeMicroVector) vector, jdbcType); - case NANOSECOND: - return jdbcType == null - ? new Time64Binder((TimeNanoVector) vector) - : new Time64Binder((TimeNanoVector) vector, jdbcType); - default: - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - } - - @Override - public ColumnBinder visit(ArrowType.Timestamp type) { - Calendar calendar = null; - final String timezone = type.getTimezone(); - if (timezone != null && !timezone.isEmpty()) { - calendar = Calendar.getInstance(TimeZone.getTimeZone(ZoneId.of(timezone))); - } - return new TimeStampBinder((TimeStampVector) vector, calendar); - } - - @Override - public ColumnBinder visit(ArrowType.Interval type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.Duration type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.ListView type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } - - @Override - public ColumnBinder visit(ArrowType.LargeListView type) { - throw new UnsupportedOperationException("No column binder implemented for type " + type); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java deleted file mode 100644 index b9eae464c8aa2..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.Date; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import java.util.Calendar; -import org.apache.arrow.vector.DateDayVector; - -/** A column binder for 32-bit dates. */ -public class DateDayBinder extends BaseColumnBinder { - private static final long MILLIS_PER_DAY = 86_400_000; - private final Calendar calendar; - - public DateDayBinder(DateDayVector vector) { - this(vector, null, Types.DATE); - } - - public DateDayBinder(DateDayVector vector, Calendar calendar) { - this(vector, calendar, Types.DATE); - } - - public DateDayBinder(DateDayVector vector, Calendar calendar, int jdbcType) { - super(vector, jdbcType); - this.calendar = calendar; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - // TODO: multiply with overflow - final long index = (long) rowIndex * DateDayVector.TYPE_WIDTH; - final Date value = new Date(vector.getDataBuffer().getInt(index) * MILLIS_PER_DAY); - if (calendar == null) { - statement.setDate(parameterIndex, value); - } else { - statement.setDate(parameterIndex, value, calendar); - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java deleted file mode 100644 index f320391fbed5b..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.Date; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import java.util.Calendar; -import org.apache.arrow.vector.DateMilliVector; - -/** A column binder for 64-bit dates. */ -public class DateMilliBinder extends BaseColumnBinder { - private final Calendar calendar; - - public DateMilliBinder(DateMilliVector vector) { - this(vector, null, Types.DATE); - } - - public DateMilliBinder(DateMilliVector vector, Calendar calendar) { - this(vector, calendar, Types.DATE); - } - - public DateMilliBinder(DateMilliVector vector, Calendar calendar, int jdbcType) { - super(vector, jdbcType); - this.calendar = calendar; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final long index = (long) rowIndex * DateMilliVector.TYPE_WIDTH; - final Date value = new Date(vector.getDataBuffer().getLong(index)); - if (calendar == null) { - statement.setDate(parameterIndex, value); - } else { - statement.setDate(parameterIndex, value, calendar); - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java deleted file mode 100644 index 07ef52f2e594c..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.math.BigDecimal; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.util.DecimalUtility; - -/** A binder for 128-bit decimals. */ -public class Decimal128Binder extends BaseColumnBinder { - public Decimal128Binder(DecimalVector vector) { - this(vector, Types.DECIMAL); - } - - public Decimal128Binder(DecimalVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final BigDecimal value = - DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), DecimalVector.TYPE_WIDTH); - statement.setBigDecimal(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java deleted file mode 100644 index 5a4222f6b84db..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.math.BigDecimal; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.util.DecimalUtility; - -/** A binder for 256-bit decimals. */ -public class Decimal256Binder extends BaseColumnBinder { - public Decimal256Binder(Decimal256Vector vector) { - this(vector, Types.DECIMAL); - } - - public Decimal256Binder(Decimal256Vector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final BigDecimal value = - DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), Decimal256Vector.TYPE_WIDTH); - statement.setBigDecimal(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java deleted file mode 100644 index 4f74b1fa8cfd4..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import org.apache.arrow.vector.FixedSizeBinaryVector; - -/** A binder for fixed-width binary types. */ -public class FixedSizeBinaryBinder extends BaseColumnBinder { - /** - * Create a binder for the given vector using the given JDBC type for null values. - * - * @param vector The vector to draw values from. - * @param jdbcType The JDBC type code. - */ - public FixedSizeBinaryBinder(FixedSizeBinaryVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - byte[] binaryData = new byte[vector.getByteWidth()]; - vector - .getDataBuffer() - .getBytes((long) rowIndex * binaryData.length, binaryData, 0, binaryData.length); - statement.setBytes(parameterIndex, binaryData); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java deleted file mode 100644 index 466a67a2dbc89..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.Float4Vector; - -/** A binder for 32-bit floats. */ -public class Float4Binder extends BaseColumnBinder { - public Float4Binder(Float4Vector vector) { - this(vector, Types.REAL); - } - - public Float4Binder(Float4Vector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final float value = vector.getDataBuffer().getFloat((long) rowIndex * Float4Vector.TYPE_WIDTH); - statement.setFloat(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java deleted file mode 100644 index 222bebf115372..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.Float8Vector; - -/** A binder for 64-bit floats. */ -public class Float8Binder extends BaseColumnBinder { - public Float8Binder(Float8Vector vector) { - this(vector, Types.DOUBLE); - } - - public Float8Binder(Float8Vector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final double value = - vector.getDataBuffer().getDouble((long) rowIndex * Float8Vector.TYPE_WIDTH); - statement.setDouble(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java deleted file mode 100644 index 6b49eeb5352b1..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.IntVector; - -/** A column binder for 32-bit integers. */ -public class IntBinder extends BaseColumnBinder { - public IntBinder(IntVector vector) { - this(vector, Types.INTEGER); - } - - public IntBinder(IntVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final int value = vector.getDataBuffer().getInt((long) rowIndex * IntVector.TYPE_WIDTH); - statement.setInt(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java deleted file mode 100644 index 25172c0c1f0aa..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.lang.reflect.Array; -import java.util.ArrayList; -import java.util.Arrays; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionListReader; -import org.apache.arrow.vector.util.Text; - -/** A column binder for list of primitive values. */ -public class ListBinder extends BaseColumnBinder { - - private final UnionListReader listReader; - private final Class arrayElementClass; - private final boolean isTextColumn; - - public ListBinder(ListVector vector) { - this(vector, java.sql.Types.ARRAY); - } - - /** - * Init ListBinder and determine type of data vector. - * - * @param vector corresponding data vector from arrow buffer for binding - * @param jdbcType parameter jdbc type - */ - public ListBinder(ListVector vector, int jdbcType) { - super(vector, jdbcType); - listReader = vector.getReader(); - Class dataVectorClass = vector.getDataVector().getClass(); - try { - arrayElementClass = dataVectorClass.getMethod("getObject", Integer.TYPE).getReturnType(); - } catch (NoSuchMethodException e) { - final String message = - String.format( - "Issue to determine type for getObject method of data vector class %s ", - dataVectorClass.getName()); - throw new RuntimeException(message); - } - isTextColumn = arrayElementClass.isAssignableFrom(Text.class); - } - - @Override - public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex) - throws java.sql.SQLException { - listReader.setPosition(rowIndex); - ArrayList sourceArray = (ArrayList) listReader.readObject(); - Object array; - if (!isTextColumn) { - array = Array.newInstance(arrayElementClass, sourceArray.size()); - Arrays.setAll((Object[]) array, sourceArray::get); - } else { - array = new String[sourceArray.size()]; - Arrays.setAll( - (Object[]) array, - idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); - } - statement.setObject(parameterIndex, array); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java deleted file mode 100644 index e94f186453581..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Objects; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.impl.UnionMapReader; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.JsonStringHashMap; - -/** A column binder for map of primitive values. */ -public class MapBinder extends BaseColumnBinder { - - private UnionMapReader reader; - private final boolean isTextKey; - private final boolean isTextValue; - - public MapBinder(MapVector vector) { - this(vector, Types.VARCHAR); - } - - /** - * Init MapBinder and determine type of data vector. - * - * @param vector corresponding data vector from arrow buffer for binding - * @param jdbcType parameter jdbc type - */ - public MapBinder(MapVector vector, int jdbcType) { - super(vector, jdbcType); - reader = vector.getReader(); - List structField = Objects.requireNonNull(vector.getField()).getChildren(); - if (structField.size() != 1) { - throw new IllegalArgumentException("Expected Struct field metadata inside Map field"); - } - List keyValueFields = Objects.requireNonNull(structField.get(0)).getChildren(); - if (keyValueFields.size() != 2) { - throw new IllegalArgumentException( - "Expected two children fields " + "inside nested Struct field in Map"); - } - ArrowType keyType = Objects.requireNonNull(keyValueFields.get(0)).getType(); - ArrowType valueType = Objects.requireNonNull(keyValueFields.get(1)).getType(); - isTextKey = ArrowType.Utf8.INSTANCE.equals(keyType); - isTextValue = ArrowType.Utf8.INSTANCE.equals(valueType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - reader.setPosition(rowIndex); - LinkedHashMap tags = new JsonStringHashMap<>(); - while (reader.next()) { - Object key = reader.key().readObject(); - Object value = reader.value().readObject(); - tags.put( - isTextKey && key != null ? key.toString() : key, - isTextValue && value != null ? value.toString() : value); - } - switch (jdbcType) { - case Types.VARCHAR: - statement.setString(parameterIndex, tags.toString()); - break; - case Types.OTHER: - default: - statement.setObject(parameterIndex, tags); - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java deleted file mode 100644 index bf5288b173341..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import org.apache.arrow.vector.FieldVector; - -/** A ColumnBinder that checks for nullability before deferring to a type-specific binder. */ -public class NullableColumnBinder implements ColumnBinder { - private final ColumnBinder wrapped; - - public NullableColumnBinder(ColumnBinder wrapped) { - this.wrapped = wrapped; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - if (wrapped.getVector().isNull(rowIndex)) { - statement.setNull(parameterIndex, wrapped.getJdbcType()); - } else { - wrapped.bind(statement, parameterIndex, rowIndex); - } - } - - @Override - public int getJdbcType() { - return wrapped.getJdbcType(); - } - - @Override - public FieldVector getVector() { - return wrapped.getVector(); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java deleted file mode 100644 index aa636c9336f55..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.SmallIntVector; - -/** A column binder for 8-bit integers. */ -public class SmallIntBinder extends BaseColumnBinder { - public SmallIntBinder(SmallIntVector vector) { - this(vector, Types.SMALLINT); - } - - public SmallIntBinder(SmallIntVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final short value = - vector.getDataBuffer().getShort((short) rowIndex * SmallIntVector.TYPE_WIDTH); - statement.setShort(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java deleted file mode 100644 index 4e09c3be23264..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Types; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeSecVector; - -/** A binder for 32-bit time types. */ -public class Time32Binder extends BaseColumnBinder { - private static final long TYPE_WIDTH = 4; - - private final long factor; - - public Time32Binder(TimeSecVector vector) { - this(vector, Types.TIME); - } - - public Time32Binder(TimeMilliVector vector) { - this(vector, Types.TIME); - } - - public Time32Binder(TimeSecVector vector, int jdbcType) { - this(vector, /*factor*/ 1_000, jdbcType); - } - - public Time32Binder(TimeMilliVector vector, int jdbcType) { - this(vector, /*factor*/ 1, jdbcType); - } - - Time32Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { - super(vector, jdbcType); - this.factor = factor; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - // TODO: multiply with overflow - // TODO: take in a Calendar as well? - final Time value = new Time(vector.getDataBuffer().getInt(rowIndex * TYPE_WIDTH) * factor); - statement.setTime(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java deleted file mode 100644 index 01c85fb32f1b5..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Types; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeNanoVector; - -/** A binder for 64-bit time types. */ -public class Time64Binder extends BaseColumnBinder { - private static final long TYPE_WIDTH = 8; - - private final long factor; - - public Time64Binder(TimeMicroVector vector) { - this(vector, Types.TIME); - } - - public Time64Binder(TimeNanoVector vector) { - this(vector, Types.TIME); - } - - public Time64Binder(TimeMicroVector vector, int jdbcType) { - this(vector, /*factor*/ 1_000, jdbcType); - } - - public Time64Binder(TimeNanoVector vector, int jdbcType) { - this(vector, /*factor*/ 1_000_000, jdbcType); - } - - Time64Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { - super(vector, jdbcType); - this.factor = factor; - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - // TODO: option to throw on truncation (vendor Guava IntMath#multiply) - final Time value = new Time(vector.getDataBuffer().getLong(rowIndex * TYPE_WIDTH) / factor); - statement.setTime(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java deleted file mode 100644 index 942d7ae58dcd5..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.sql.Types; -import java.util.Calendar; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** A column binder for timestamps. */ -public class TimeStampBinder extends BaseColumnBinder { - private final Calendar calendar; - private final long unitsPerSecond; - private final long nanosPerUnit; - - /** Create a binder for a timestamp vector using the default JDBC type code. */ - public TimeStampBinder(TimeStampVector vector, Calendar calendar) { - this( - vector, - calendar, - isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP); - } - - /** - * Create a binder for a timestamp vector. - * - * @param vector The vector to pull values from. - * @param calendar Optionally, the calendar to pass to JDBC. - * @param jdbcType The JDBC type code to use for null values. - */ - public TimeStampBinder(TimeStampVector vector, Calendar calendar, int jdbcType) { - super(vector, jdbcType); - this.calendar = calendar; - - final ArrowType.Timestamp type = (ArrowType.Timestamp) vector.getField().getType(); - switch (type.getUnit()) { - case SECOND: - this.unitsPerSecond = 1; - this.nanosPerUnit = 1_000_000_000; - break; - case MILLISECOND: - this.unitsPerSecond = 1_000; - this.nanosPerUnit = 1_000_000; - break; - case MICROSECOND: - this.unitsPerSecond = 1_000_000; - this.nanosPerUnit = 1_000; - break; - case NANOSECOND: - this.unitsPerSecond = 1_000_000_000; - this.nanosPerUnit = 1; - break; - default: - throw new IllegalArgumentException("Invalid time unit in " + type); - } - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - // TODO: option to throw on truncation (vendor Guava IntMath#multiply) or overflow - final long rawValue = - vector.getDataBuffer().getLong((long) rowIndex * TimeStampVector.TYPE_WIDTH); - final long seconds = rawValue / unitsPerSecond; - final int nanos = (int) ((rawValue - (seconds * unitsPerSecond)) * nanosPerUnit); - final Timestamp value = new Timestamp(seconds * 1_000); - value.setNanos(nanos); - if (calendar != null) { - // Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC - // value with a - // zone offset, so we don't need to do any conversion. - statement.setTimestamp(parameterIndex, value, calendar); - } else { - // Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is - // technically wrong, - // but there is no 'correct' interpretation here. The application should provide a calendar. - statement.setTimestamp(parameterIndex, value); - } - } - - private static boolean isZoned(ArrowType type) { - final String timezone = ((ArrowType.Timestamp) type).getTimezone(); - return timezone != null && !timezone.isEmpty(); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java deleted file mode 100644 index 0580456d37983..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.vector.TinyIntVector; - -/** A column binder for 8-bit integers. */ -public class TinyIntBinder extends BaseColumnBinder { - public TinyIntBinder(TinyIntVector vector) { - this(vector, Types.TINYINT); - } - - public TinyIntBinder(TinyIntVector vector, int jdbcType) { - super(vector, jdbcType); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - final byte value = vector.getDataBuffer().getByte((long) rowIndex * TinyIntVector.TYPE_WIDTH); - statement.setByte(parameterIndex, value); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java deleted file mode 100644 index 41807efc611b1..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.vector.ElementAddressableVector; -import org.apache.arrow.vector.FieldVector; - -/** - * A binder for variable-width binary types. - * - * @param The binary vector. - */ -public class VarBinaryBinder - extends BaseColumnBinder { - private final ArrowBufPointer element; - - /** - * Create a binder for the given vector using the given JDBC type for null values. - * - * @param vector The vector to draw values from. - * @param jdbcType The JDBC type code. - */ - public VarBinaryBinder(T vector, int jdbcType) { - super(vector, jdbcType); - this.element = new ArrowBufPointer(); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - vector.getDataPointer(rowIndex, element); - if (element.getBuf() == null) { - statement.setNull(parameterIndex, jdbcType); - return; - } - if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = - String.format( - "Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); - throw new RuntimeException(message); - } - byte[] binaryData = new byte[(int) element.getLength()]; - element.getBuf().getBytes(element.getOffset(), binaryData); - statement.setBytes(parameterIndex, binaryData); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java deleted file mode 100644 index 926e1da28c9a0..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.binder; - -import java.nio.charset.StandardCharsets; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VariableWidthVector; - -/** - * A binder for variable-width string types. - * - * @param The text vector. - */ -public class VarCharBinder - extends BaseColumnBinder { - private final ArrowBufPointer element; - - /** - * Create a binder for the given vector using the given JDBC type for null values. - * - * @param vector The vector to draw values from. - * @param jdbcType The JDBC type code. - */ - public VarCharBinder(T vector, int jdbcType) { - super(vector, jdbcType); - this.element = new ArrowBufPointer(); - } - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - vector.getDataPointer(rowIndex, element); - if (element.getBuf() == null) { - statement.setNull(parameterIndex, jdbcType); - return; - } - if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = - String.format( - "Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); - throw new RuntimeException(message); - } - byte[] utf8Bytes = new byte[(int) element.getLength()]; - element.getBuf().getBytes(element.getOffset(), utf8Bytes); - statement.setString(parameterIndex, new String(utf8Bytes, StandardCharsets.UTF_8)); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java deleted file mode 100644 index 945c3c9f84fa8..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Utilities to bind Arrow data as JDBC prepared statement parameters. */ -package org.apache.arrow.adapter.jdbc.binder; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java deleted file mode 100644 index 4676e8204eed4..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.io.IOException; -import java.sql.Array; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.ListVector; - -/** - * Consumer which consume array type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.complex.ListVector}. - */ -public abstract class ArrayConsumer extends BaseConsumer { - - /** Creates a consumer for {@link ListVector}. */ - public static ArrayConsumer createConsumer( - ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { - if (nullable) { - return new ArrayConsumer.NullableArrayConsumer(vector, delegate, index); - } else { - return new ArrayConsumer.NonNullableArrayConsumer(vector, delegate, index); - } - } - - protected final JdbcConsumer delegate; - - private final ValueVector innerVector; - - protected int innerVectorIndex = 0; - - /** Instantiate a ArrayConsumer. */ - public ArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { - super(vector, index); - this.delegate = delegate; - this.innerVector = vector.getDataVector(); - } - - @Override - public void close() throws Exception { - this.vector.close(); - this.delegate.close(); - } - - @Override - public void resetValueVector(ListVector vector) { - super.resetValueVector(vector); - - FieldVector childVector = vector.getDataVector(); - this.delegate.resetValueVector(childVector); - - innerVectorIndex = 0; - } - - void ensureInnerVectorCapacity(int targetCapacity) { - while (innerVector.getValueCapacity() < targetCapacity) { - innerVector.reAlloc(); - } - } - - /** Nullable consumer for {@link ListVector}. */ - static class NullableArrayConsumer extends ArrayConsumer { - - /** Instantiate a nullable array consumer. */ - public NullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { - super(vector, delegate, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - final Array array = resultSet.getArray(columnIndexInResultSet); - if (!resultSet.wasNull()) { - vector.startNewValue(currentIndex); - int count = 0; - try (ResultSet rs = array.getResultSet()) { - while (rs.next()) { - ensureInnerVectorCapacity(innerVectorIndex + count + 1); - delegate.consume(rs); - count++; - } - } - vector.endValue(currentIndex, count); - innerVectorIndex += count; - } - currentIndex++; - } - } - - /** Non-nullable consumer for {@link ListVector}. */ - static class NonNullableArrayConsumer extends ArrayConsumer { - - /** Instantiate a nullable array consumer. */ - public NonNullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { - super(vector, delegate, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - final Array array = resultSet.getArray(columnIndexInResultSet); - vector.startNewValue(currentIndex); - int count = 0; - try (ResultSet rs = array.getResultSet()) { - while (rs.next()) { - ensureInnerVectorCapacity(innerVectorIndex + count + 1); - delegate.consume(rs); - count++; - } - } - vector.endValue(currentIndex, count); - innerVectorIndex += count; - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java deleted file mode 100644 index 9ca3c98a7eb98..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import org.apache.arrow.vector.ValueVector; - -/** - * Base class for all consumers. - * - * @param vector type. - */ -public abstract class BaseConsumer implements JdbcConsumer { - - protected V vector; - - protected final int columnIndexInResultSet; - - protected int currentIndex; - - /** - * Constructs a new consumer. - * - * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. - */ - public BaseConsumer(V vector, int index) { - this.vector = vector; - this.columnIndexInResultSet = index; - } - - @Override - public void close() throws Exception { - this.vector.close(); - } - - @Override - public void resetValueVector(V vector) { - this.vector = vector; - this.currentIndex = 0; - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java deleted file mode 100644 index b7c547a9391b6..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.BigIntVector; - -/** - * Consumer which consume bigint type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.BigIntVector}. - */ -public class BigIntConsumer { - - /** Creates a consumer for {@link BigIntVector}. */ - public static JdbcConsumer createConsumer( - BigIntVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableBigIntConsumer(vector, index); - } else { - return new NonNullableBigIntConsumer(vector, index); - } - } - - /** Nullable consumer for big int. */ - static class NullableBigIntConsumer extends BaseConsumer { - - /** Instantiate a BigIntConsumer. */ - public NullableBigIntConsumer(BigIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - long value = resultSet.getLong(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** Non-nullable consumer for big int. */ - static class NonNullableBigIntConsumer extends BaseConsumer { - - /** Instantiate a BigIntConsumer. */ - public NonNullableBigIntConsumer(BigIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - long value = resultSet.getLong(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java deleted file mode 100644 index edbc6360df6bf..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.io.IOException; -import java.io.InputStream; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.VarBinaryVector; - -/** - * Consumer which consume binary type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.VarBinaryVector}. - */ -public abstract class BinaryConsumer extends BaseConsumer { - - /** Creates a consumer for {@link VarBinaryVector}. */ - public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableBinaryConsumer(vector, index); - } else { - return new NonNullableBinaryConsumer(vector, index); - } - } - - private final byte[] reuseBytes = new byte[1024]; - - /** Instantiate a BinaryConsumer. */ - public BinaryConsumer(VarBinaryVector vector, int index) { - super(vector, index); - if (vector != null) { - vector.allocateNewSafe(); - } - } - - /** consume a InputStream. */ - public void consume(InputStream is) throws IOException { - if (is != null) { - while (currentIndex >= vector.getValueCapacity()) { - vector.reallocValidityAndOffsetBuffers(); - } - final int startOffset = vector.getStartOffset(currentIndex); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - int dataLength = 0; - int read; - while ((read = is.read(reuseBytes)) != -1) { - while (vector.getDataBuffer().capacity() < (startOffset + dataLength + read)) { - vector.reallocDataBuffer(); - } - vector.getDataBuffer().setBytes(startOffset + dataLength, reuseBytes, 0, read); - dataLength += read; - } - offsetBuffer.setInt( - (currentIndex + 1) * ((long) VarBinaryVector.OFFSET_WIDTH), startOffset + dataLength); - BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); - vector.setLastSet(currentIndex); - } - } - - public void moveWriterPosition() { - currentIndex++; - } - - @Override - public void resetValueVector(VarBinaryVector vector) { - this.vector = vector; - this.vector.allocateNewSafe(); - this.currentIndex = 0; - } - - /** Consumer for nullable binary data. */ - static class NullableBinaryConsumer extends BinaryConsumer { - - /** Instantiate a BinaryConsumer. */ - public NullableBinaryConsumer(VarBinaryVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - InputStream is = resultSet.getBinaryStream(columnIndexInResultSet); - if (!resultSet.wasNull()) { - consume(is); - } - moveWriterPosition(); - } - } - - /** Consumer for non-nullable binary data. */ - static class NonNullableBinaryConsumer extends BinaryConsumer { - - /** Instantiate a BinaryConsumer. */ - public NonNullableBinaryConsumer(VarBinaryVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - InputStream is = resultSet.getBinaryStream(columnIndexInResultSet); - consume(is); - moveWriterPosition(); - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java deleted file mode 100644 index 287b9509b5054..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.BitVector; - -/** - * Consumer which consume bit type values from {@link ResultSet}. Write the data to {@link - * BitVector}. - */ -public class BitConsumer { - - /** Creates a consumer for {@link BitVector}. */ - public static JdbcConsumer createConsumer( - BitVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableBitConsumer(vector, index); - } else { - return new NonNullableBitConsumer(vector, index); - } - } - - /** Nullable consumer for {@link BitVector}. */ - static class NullableBitConsumer extends BaseConsumer { - - /** Instantiate a BitConsumer. */ - public NullableBitConsumer(BitVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - boolean value = resultSet.getBoolean(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value ? 1 : 0); - } - currentIndex++; - } - } - - /** Non-nullable consumer for {@link BitVector}. */ - static class NonNullableBitConsumer extends BaseConsumer { - - /** Instantiate a BitConsumer. */ - public NonNullableBitConsumer(BitVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - boolean value = resultSet.getBoolean(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value ? 1 : 0); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java deleted file mode 100644 index a4fc789494e0f..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.io.IOException; -import java.sql.Blob; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.VarBinaryVector; - -/** - * Consumer which consume blob type values from {@link ResultSet}. Write the data to {@link - * VarBinaryVector}. - */ -public class BlobConsumer extends BaseConsumer { - - private BinaryConsumer delegate; - - private final boolean nullable; - - /** Creates a consumer for {@link VarBinaryVector}. */ - public static BlobConsumer createConsumer(BinaryConsumer delegate, int index, boolean nullable) { - return new BlobConsumer(delegate, index, nullable); - } - - /** Instantiate a BlobConsumer. */ - public BlobConsumer(BinaryConsumer delegate, int index, boolean nullable) { - super(null, index); - this.delegate = delegate; - this.nullable = nullable; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - Blob blob = resultSet.getBlob(columnIndexInResultSet); - if (blob != null) { - delegate.consume(blob.getBinaryStream()); - } - delegate.moveWriterPosition(); - } - - @Override - public void close() throws Exception { - delegate.close(); - } - - @Override - public void resetValueVector(VarBinaryVector vector) { - delegate = BinaryConsumer.createConsumer(vector, columnIndexInResultSet, nullable); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java deleted file mode 100644 index 9fcdd42414dfa..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.nio.charset.StandardCharsets; -import java.sql.Clob; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.VarCharVector; - -/** - * Consumer which consume clob type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.VarCharVector}. - */ -public abstract class ClobConsumer extends BaseConsumer { - - /** Creates a consumer for {@link VarCharVector}. */ - public static ClobConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableClobConsumer(vector, index); - } else { - return new NonNullableClobConsumer(vector, index); - } - } - - private static final int BUFFER_SIZE = 256; - - /** Instantiate a ClobConsumer. */ - public ClobConsumer(VarCharVector vector, int index) { - super(vector, index); - if (vector != null) { - vector.allocateNewSafe(); - } - } - - @Override - public void resetValueVector(VarCharVector vector) { - this.vector = vector; - this.vector.allocateNewSafe(); - this.currentIndex = 0; - } - - /** Nullable consumer for clob data. */ - static class NullableClobConsumer extends ClobConsumer { - - /** Instantiate a ClobConsumer. */ - public NullableClobConsumer(VarCharVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Clob clob = resultSet.getClob(columnIndexInResultSet); - if (!resultSet.wasNull()) { - if (clob != null) { - long length = clob.length(); - - int read = 1; - int readSize = length < BUFFER_SIZE ? (int) length : BUFFER_SIZE; - int totalBytes = 0; - - ArrowBuf dataBuffer = vector.getDataBuffer(); - ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - int startIndex = offsetBuffer.getInt(currentIndex * 4L); - while (read <= length) { - String str = clob.getSubString(read, readSize); - byte[] bytes = str.getBytes(StandardCharsets.UTF_8); - - while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) { - vector.reallocDataBuffer(); - } - MemoryUtil.copyToMemory( - bytes, 0, dataBuffer.memoryAddress() + startIndex + totalBytes, bytes.length); - - totalBytes += bytes.length; - read += readSize; - } - offsetBuffer.setInt((currentIndex + 1) * 4L, startIndex + totalBytes); - BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); - vector.setLastSet(currentIndex); - } - } - currentIndex++; - } - } - - /** Non-nullable consumer for clob data. */ - static class NonNullableClobConsumer extends ClobConsumer { - - /** Instantiate a ClobConsumer. */ - public NonNullableClobConsumer(VarCharVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Clob clob = resultSet.getClob(columnIndexInResultSet); - if (clob != null) { - long length = clob.length(); - - int read = 1; - int readSize = length < BUFFER_SIZE ? (int) length : BUFFER_SIZE; - int totalBytes = 0; - - ArrowBuf dataBuffer = vector.getDataBuffer(); - ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - int startIndex = offsetBuffer.getInt(currentIndex * 4L); - while (read <= length) { - String str = clob.getSubString(read, readSize); - byte[] bytes = str.getBytes(StandardCharsets.UTF_8); - - while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) { - vector.reallocDataBuffer(); - } - MemoryUtil.copyToMemory( - bytes, 0, dataBuffer.memoryAddress() + startIndex + totalBytes, bytes.length); - - totalBytes += bytes.length; - read += readSize; - } - offsetBuffer.setInt((currentIndex + 1) * 4L, startIndex + totalBytes); - BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); - vector.setLastSet(currentIndex); - } - - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java deleted file mode 100644 index 2366116fd0d18..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; -import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** Composite consumer which hold all consumers. It manages the consume and cleanup process. */ -public class CompositeJdbcConsumer implements JdbcConsumer { - - private final JdbcConsumer[] consumers; - - /** Construct an instance. */ - public CompositeJdbcConsumer(JdbcConsumer[] consumers) { - this.consumers = consumers; - } - - @Override - public void consume(ResultSet rs) throws SQLException, IOException { - for (int i = 0; i < consumers.length; i++) { - try { - consumers[i].consume(rs); - } catch (Exception e) { - if (consumers[i] instanceof BaseConsumer) { - BaseConsumer consumer = (BaseConsumer) consumers[i]; - JdbcFieldInfo fieldInfo = - new JdbcFieldInfo(rs.getMetaData(), consumer.columnIndexInResultSet); - ArrowType arrowType = consumer.vector.getMinorType().getType(); - throw new JdbcConsumerException( - "Exception while consuming JDBC value", e, fieldInfo, arrowType); - } else { - throw e; - } - } - } - } - - @Override - public void close() { - - try { - // clean up - AutoCloseables.close(consumers); - } catch (Exception e) { - throw new RuntimeException("Error occurred while releasing resources.", e); - } - } - - @Override - public void resetValueVector(ValueVector vector) {} - - /** Reset inner consumers through vectors in the vector schema root. */ - public void resetVectorSchemaRoot(VectorSchemaRoot root) { - assert root.getFieldVectors().size() == consumers.length; - for (int i = 0; i < consumers.length; i++) { - consumers[i].resetValueVector(root.getVector(i)); - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java deleted file mode 100644 index c271b900682a1..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.Date; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Calendar; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; - -/** - * Consumer which consume date type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.DateDayVector}. - */ -public class DateConsumer { - - /** Creates a consumer for {@link DateMilliVector}. */ - public static JdbcConsumer createConsumer( - DateDayVector vector, int index, boolean nullable, Calendar calendar) { - if (nullable) { - return new NullableDateConsumer(vector, index, calendar); - } else { - return new NonNullableDateConsumer(vector, index, calendar); - } - } - - /** Nullable consumer for date. */ - static class NullableDateConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** Instantiate a DateConsumer. */ - public NullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */ null); - } - - /** Instantiate a DateConsumer. */ - public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Date date = - calendar == null - ? resultSet.getDate(columnIndexInResultSet) - : resultSet.getDate(columnIndexInResultSet, calendar); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); - } - currentIndex++; - } - } - - /** Non-nullable consumer for date. */ - static class NonNullableDateConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** Instantiate a DateConsumer. */ - public NonNullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */ null); - } - - /** Instantiate a DateConsumer. */ - public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Date date = - calendar == null - ? resultSet.getDate(columnIndexInResultSet) - : resultSet.getDate(columnIndexInResultSet, calendar); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java deleted file mode 100644 index eb33ea5038b98..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.Decimal256Vector; - -/** - * Consumer which consume decimal type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.Decimal256Vector}. - */ -public abstract class Decimal256Consumer extends BaseConsumer { - private final RoundingMode bigDecimalRoundingMode; - private final int scale; - - /** - * Constructs a new consumer. - * - * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. - */ - public Decimal256Consumer(Decimal256Vector vector, int index) { - this(vector, index, null); - } - - /** - * Constructs a new consumer, with optional coercibility. - * - * @param vector the underlying vector for the consumer. - * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does - * not match that of the target vector. Set to null to retain strict matching behavior (scale - * of source and target vector must match exactly). - */ - public Decimal256Consumer( - Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index); - this.bigDecimalRoundingMode = bigDecimalRoundingMode; - this.scale = vector.getScale(); - } - - /** Creates a consumer for {@link Decimal256Vector}. */ - public static JdbcConsumer createConsumer( - Decimal256Vector vector, int index, boolean nullable, RoundingMode bigDecimalRoundingMode) { - if (nullable) { - return new NullableDecimal256Consumer(vector, index, bigDecimalRoundingMode); - } else { - return new NonNullableDecimal256Consumer(vector, index, bigDecimalRoundingMode); - } - } - - protected void set(BigDecimal value) { - if (bigDecimalRoundingMode != null && value.scale() != scale) { - value = value.setScale(scale, bigDecimalRoundingMode); - } - vector.set(currentIndex, value); - } - - /** Consumer for nullable decimal. */ - static class NullableDecimal256Consumer extends Decimal256Consumer { - - /** Instantiate a Decimal256Consumer. */ - public NullableDecimal256Consumer( - Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index, bigDecimalRoundingMode); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - set(value); - } - currentIndex++; - } - } - - /** Consumer for non-nullable decimal. */ - static class NonNullableDecimal256Consumer extends Decimal256Consumer { - - /** Instantiate a Decimal256Consumer. */ - public NonNullableDecimal256Consumer( - Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index, bigDecimalRoundingMode); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - set(value); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java deleted file mode 100644 index 05b4d27de1022..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.DecimalVector; - -/** - * Consumer which consume decimal type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.DecimalVector}. - */ -public abstract class DecimalConsumer extends BaseConsumer { - private final RoundingMode bigDecimalRoundingMode; - private final int scale; - - /** - * Constructs a new consumer. - * - * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. - */ - public DecimalConsumer(DecimalVector vector, int index) { - this(vector, index, null); - } - - /** - * Constructs a new consumer, with optional coercibility. - * - * @param vector the underlying vector for the consumer. - * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does - * not match that of the target vector. Set to null to retain strict matching behavior (scale - * of source and target vector must match exactly). - */ - public DecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index); - this.bigDecimalRoundingMode = bigDecimalRoundingMode; - this.scale = vector.getScale(); - } - - /** Creates a consumer for {@link DecimalVector}. */ - public static JdbcConsumer createConsumer( - DecimalVector vector, int index, boolean nullable, RoundingMode bigDecimalRoundingMode) { - if (nullable) { - return new NullableDecimalConsumer(vector, index, bigDecimalRoundingMode); - } else { - return new NonNullableDecimalConsumer(vector, index, bigDecimalRoundingMode); - } - } - - protected void set(BigDecimal value) { - if (bigDecimalRoundingMode != null && value.scale() != scale) { - value = value.setScale(scale, bigDecimalRoundingMode); - } - vector.set(currentIndex, value); - } - - /** Consumer for nullable decimal. */ - static class NullableDecimalConsumer extends DecimalConsumer { - - /** Instantiate a DecimalConsumer. */ - public NullableDecimalConsumer( - DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index, bigDecimalRoundingMode); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - set(value); - } - currentIndex++; - } - } - - /** Consumer for non-nullable decimal. */ - static class NonNullableDecimalConsumer extends DecimalConsumer { - - /** Instantiate a DecimalConsumer. */ - public NonNullableDecimalConsumer( - DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { - super(vector, index, bigDecimalRoundingMode); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - set(value); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java deleted file mode 100644 index 9cd31e9245472..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.Float8Vector; - -/** - * Consumer which consume double type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.Float8Vector}. - */ -public class DoubleConsumer { - - /** Creates a consumer for {@link Float8Vector}. */ - public static JdbcConsumer createConsumer( - Float8Vector vector, int index, boolean nullable) { - if (nullable) { - return new NullableDoubleConsumer(vector, index); - } else { - return new NonNullableDoubleConsumer(vector, index); - } - } - - /** Nullable double consumer. */ - static class NullableDoubleConsumer extends BaseConsumer { - - /** Instantiate a DoubleConsumer. */ - public NullableDoubleConsumer(Float8Vector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - double value = resultSet.getDouble(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** Non-nullable double consumer. */ - static class NonNullableDoubleConsumer extends BaseConsumer { - - /** Instantiate a DoubleConsumer. */ - public NonNullableDoubleConsumer(Float8Vector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - double value = resultSet.getDouble(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java deleted file mode 100644 index 0f16a68da883e..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.Float4Vector; - -/** - * Consumer which consume float type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.Float4Vector}. - */ -public class FloatConsumer { - - /** Creates a consumer for {@link Float4Vector}. */ - public static JdbcConsumer createConsumer( - Float4Vector vector, int index, boolean nullable) { - if (nullable) { - return new NullableFloatConsumer(vector, index); - } else { - return new NonNullableFloatConsumer(vector, index); - } - } - - /** Nullable float consumer. */ - static class NullableFloatConsumer extends BaseConsumer { - - /** Instantiate a FloatConsumer. */ - public NullableFloatConsumer(Float4Vector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - float value = resultSet.getFloat(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** Non-nullable float consumer. */ - static class NonNullableFloatConsumer extends BaseConsumer { - - /** Instantiate a FloatConsumer. */ - public NonNullableFloatConsumer(Float4Vector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - float value = resultSet.getFloat(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java deleted file mode 100644 index 302be697fbf07..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.IntVector; - -/** - * Consumer which consume int type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.IntVector}. - */ -public class IntConsumer { - - /** Creates a consumer for {@link IntVector}. */ - public static JdbcConsumer createConsumer( - IntVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableIntConsumer(vector, index); - } else { - return new NonNullableIntConsumer(vector, index); - } - } - - /** Nullable consumer for int. */ - static class NullableIntConsumer extends BaseConsumer { - - /** Instantiate a IntConsumer. */ - public NullableIntConsumer(IntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - int value = resultSet.getInt(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** Non-nullable consumer for int. */ - static class NonNullableIntConsumer extends BaseConsumer { - - /** Instantiate a IntConsumer. */ - public NonNullableIntConsumer(IntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - int value = resultSet.getInt(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java deleted file mode 100644 index 1ec6ad7eb9266..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.ValueVector; - -/** - * An abstraction that is used to consume values from {@link ResultSet}. - * - * @param The vector within consumer or its delegate, used for partially consume purpose. - */ -public interface JdbcConsumer extends AutoCloseable { - - /** Consume a specific type value from {@link ResultSet} and write it to vector. */ - void consume(ResultSet resultSet) throws SQLException, IOException; - - /** Close this consumer, do some clean work such as clear reuse ArrowBuf. */ - @Override - void close() throws Exception; - - /** Reset the vector within consumer for partial read purpose. */ - void resetValueVector(T vector); -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java deleted file mode 100644 index 6223650ff2c04..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Map; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.util.ObjectMapperFactory; - -/** - * Consumer which consume map type values from {@link ResultSet}. Write the data into {@link - * org.apache.arrow.vector.complex.MapVector}. - */ -public class MapConsumer extends BaseConsumer { - - private final UnionMapWriter writer; - private final ObjectMapper objectMapper = ObjectMapperFactory.newObjectMapper(); - private final TypeReference> typeReference = - new TypeReference>() {}; - private int currentRow; - - /** Creates a consumer for {@link MapVector}. */ - public static MapConsumer createConsumer(MapVector mapVector, int index, boolean nullable) { - return new MapConsumer(mapVector, index); - } - - /** Instantiate a MapConsumer. */ - public MapConsumer(MapVector vector, int index) { - super(vector, index); - writer = vector.getWriter(); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException, IOException { - Object map = resultSet.getObject(columnIndexInResultSet); - writer.setPosition(currentRow++); - if (map != null) { - if (map instanceof String) { - writeJavaMapIntoVector(objectMapper.readValue((String) map, typeReference)); - } else if (map instanceof Map) { - writeJavaMapIntoVector((Map) map); - } else { - throw new IllegalArgumentException( - "Unknown type of map type column from JDBC " + map.getClass().getName()); - } - } else { - writer.writeNull(); - } - } - - private void writeJavaMapIntoVector(Map map) { - BufferAllocator allocator = vector.getAllocator(); - writer.startMap(); - map.forEach( - (key, value) -> { - byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = value != null ? value.getBytes(StandardCharsets.UTF_8) : null; - try (ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = - valueBytes != null ? allocator.buffer(valueBytes.length) : null; ) { - writer.startEntry(); - keyBuf.writeBytes(keyBytes); - writer.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - if (valueBytes != null) { - valueBuf.writeBytes(valueBytes); - writer.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - } else { - writer.value().varChar().writeNull(); - } - writer.endEntry(); - } - }); - writer.endMap(); - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java deleted file mode 100644 index 9d7a760f697a7..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.NullVector; - -/** - * Consumer which consume null type values from ResultSet. Corresponding to {@link - * org.apache.arrow.vector.NullVector}. - */ -public class NullConsumer extends BaseConsumer { - - public NullConsumer(NullVector vector) { - super(vector, 0); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException {} -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java deleted file mode 100644 index 9f45c077ed0a8..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.SmallIntVector; - -/** - * Consumer which consume smallInt type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.SmallIntVector}. - */ -public class SmallIntConsumer { - - /** Creates a consumer for {@link SmallIntVector}. */ - public static BaseConsumer createConsumer( - SmallIntVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableSmallIntConsumer(vector, index); - } else { - return new NonNullableSmallIntConsumer(vector, index); - } - } - - /** Nullable consumer for small int. */ - static class NullableSmallIntConsumer extends BaseConsumer { - - /** Instantiate a SmallIntConsumer. */ - public NullableSmallIntConsumer(SmallIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - short value = resultSet.getShort(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** Non-nullable consumer for small int. */ - static class NonNullableSmallIntConsumer extends BaseConsumer { - - /** Instantiate a SmallIntConsumer. */ - public NonNullableSmallIntConsumer(SmallIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - short value = resultSet.getShort(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java deleted file mode 100644 index bee19d0e4deab..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Time; -import java.util.Calendar; -import org.apache.arrow.vector.TimeMilliVector; - -/** - * Consumer which consume time type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.TimeMilliVector}. - */ -public abstract class TimeConsumer { - - /** Creates a consumer for {@link TimeMilliVector}. */ - public static JdbcConsumer createConsumer( - TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { - if (nullable) { - return new NullableTimeConsumer(vector, index, calendar); - } else { - return new NonNullableTimeConsumer(vector, index, calendar); - } - } - - /** Nullable consumer for {@link TimeMilliVector}. */ - static class NullableTimeConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** Instantiate a TimeConsumer. */ - public NullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */ null); - } - - /** Instantiate a TimeConsumer. */ - public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Time time = - calendar == null - ? resultSet.getTime(columnIndexInResultSet) - : resultSet.getTime(columnIndexInResultSet, calendar); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, (int) time.getTime()); - } - currentIndex++; - } - } - - /** Non-nullable consumer for {@link TimeMilliVector}. */ - static class NonNullableTimeConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** Instantiate a TimeConsumer. */ - public NonNullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */ null); - } - - /** Instantiate a TimeConsumer. */ - public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Time time = - calendar == null - ? resultSet.getTime(columnIndexInResultSet) - : resultSet.getTime(columnIndexInResultSet, calendar); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, (int) time.getTime()); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java deleted file mode 100644 index cc6269c21f04a..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Timestamp; -import org.apache.arrow.vector.TimeStampMilliVector; - -/** - * Consumer which consume timestamp type values from {@link ResultSet}. Write the data to {@link - * TimeStampMilliVector}. - */ -public abstract class TimestampConsumer { - - /** Creates a consumer for {@link TimeStampMilliVector}. */ - public static JdbcConsumer createConsumer( - TimeStampMilliVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableTimestampConsumer(vector, index); - } else { - return new NonNullableTimestampConsumer(vector, index); - } - } - - /** Nullable consumer for timestamp. */ - static class NullableTimestampConsumer extends BaseConsumer { - - /** Instantiate a TimestampConsumer. */ - public NullableTimestampConsumer(TimeStampMilliVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, timestamp.getTime()); - } - currentIndex++; - } - } - - /** Non-nullable consumer for timestamp. */ - static class NonNullableTimestampConsumer extends BaseConsumer { - - /** Instantiate a TimestampConsumer. */ - public NonNullableTimestampConsumer(TimeStampMilliVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, timestamp.getTime()); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java deleted file mode 100644 index 3e4911ac1a161..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.util.Calendar; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.TimeStampMilliTZVector; - -/** - * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. Write the - * data to {@link TimeStampMilliTZVector}. - */ -public class TimestampTZConsumer { - /** Creates a consumer for {@link TimeStampMilliTZVector}. */ - public static JdbcConsumer createConsumer( - TimeStampMilliTZVector vector, int index, boolean nullable, Calendar calendar) { - Preconditions.checkArgument(calendar != null, "Calendar cannot be null"); - if (nullable) { - return new TimestampTZConsumer.NullableTimestampTZConsumer(vector, index, calendar); - } else { - return new TimestampTZConsumer.NonNullableTimestampConsumer(vector, index, calendar); - } - } - - /** Nullable consumer for timestamp (with time zone). */ - static class NullableTimestampTZConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** Instantiate a TimestampConsumer. */ - public NullableTimestampTZConsumer( - TimeStampMilliTZVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet, calendar); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, timestamp.getTime()); - } - currentIndex++; - } - } - - /** Non-nullable consumer for timestamp (with time zone). */ - static class NonNullableTimestampConsumer extends BaseConsumer { - - protected final Calendar calendar; - - /** Instantiate a TimestampConsumer. */ - public NonNullableTimestampConsumer( - TimeStampMilliTZVector vector, int index, Calendar calendar) { - super(vector, index); - this.calendar = calendar; - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet, calendar); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, timestamp.getTime()); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java deleted file mode 100644 index b75b87dd81cc4..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.TinyIntVector; - -/** - * Consumer which consume tinyInt type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.TinyIntVector}. - */ -public abstract class TinyIntConsumer { - - /** Creates a consumer for {@link TinyIntVector}. */ - public static JdbcConsumer createConsumer( - TinyIntVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableTinyIntConsumer(vector, index); - } else { - return new NonNullableTinyIntConsumer(vector, index); - } - } - - /** Nullable consumer for tiny int. */ - static class NullableTinyIntConsumer extends BaseConsumer { - - /** Instantiate a TinyIntConsumer. */ - public NullableTinyIntConsumer(TinyIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - byte value = resultSet.getByte(columnIndexInResultSet); - if (!resultSet.wasNull()) { - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - } - currentIndex++; - } - } - - /** Non-nullable consumer for tiny int. */ - static class NonNullableTinyIntConsumer extends BaseConsumer { - - /** Instantiate a TinyIntConsumer. */ - public NonNullableTinyIntConsumer(TinyIntVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - byte value = resultSet.getByte(columnIndexInResultSet); - // for fixed width vectors, we have allocated enough memory proactively, - // so there is no need to call the setSafe method here. - vector.set(currentIndex, value); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java deleted file mode 100644 index c81c4f0db124b..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import java.nio.charset.StandardCharsets; -import java.sql.ResultSet; -import java.sql.SQLException; -import org.apache.arrow.vector.VarCharVector; - -/** - * Consumer which consume varchar type values from {@link ResultSet}. Write the data to {@link - * org.apache.arrow.vector.VarCharVector}. - */ -public abstract class VarCharConsumer { - - /** Creates a consumer for {@link VarCharVector}. */ - public static JdbcConsumer createConsumer( - VarCharVector vector, int index, boolean nullable) { - if (nullable) { - return new NullableVarCharConsumer(vector, index); - } else { - return new NonNullableVarCharConsumer(vector, index); - } - } - - /** Nullable consumer for var char. */ - static class NullableVarCharConsumer extends BaseConsumer { - - /** Instantiate a VarCharConsumer. */ - public NullableVarCharConsumer(VarCharVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - String value = resultSet.getString(columnIndexInResultSet); - if (!resultSet.wasNull()) { - byte[] bytes = value.getBytes(StandardCharsets.UTF_8); - vector.setSafe(currentIndex, bytes); - } - currentIndex++; - } - } - - /** Non-nullable consumer for var char. */ - static class NonNullableVarCharConsumer extends BaseConsumer { - - /** Instantiate a VarCharConsumer. */ - public NonNullableVarCharConsumer(VarCharVector vector, int index) { - super(vector, index); - } - - @Override - public void consume(ResultSet resultSet) throws SQLException { - String value = resultSet.getString(columnIndexInResultSet); - byte[] bytes = value.getBytes(StandardCharsets.UTF_8); - vector.setSafe(currentIndex, bytes); - currentIndex++; - } - } -} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java deleted file mode 100644 index 04e26d640c04d..0000000000000 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer.exceptions; - -import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** - * Exception while consuming JDBC data. This exception stores the JdbcFieldInfo for the column and - * the ArrowType for the corresponding vector for easier debugging. - */ -public class JdbcConsumerException extends RuntimeException { - final JdbcFieldInfo fieldInfo; - final ArrowType arrowType; - - /** - * Construct JdbcConsumerException with all fields. - * - * @param message error message - * @param cause original exception - * @param fieldInfo JdbcFieldInfo for the column - * @param arrowType ArrowType for the corresponding vector - */ - public JdbcConsumerException( - String message, Throwable cause, JdbcFieldInfo fieldInfo, ArrowType arrowType) { - super(message, cause); - this.fieldInfo = fieldInfo; - this.arrowType = arrowType; - } - - public ArrowType getArrowType() { - return this.arrowType; - } - - public JdbcFieldInfo getFieldInfo() { - return this.fieldInfo; - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java deleted file mode 100644 index a37a8ca2f5990..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; -import java.util.function.Function; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.ValueVectorUtility; -import org.junit.jupiter.api.AfterEach; - -/** Class to abstract out some common test functionality for testing JDBC to Arrow. */ -public abstract class AbstractJdbcToArrowTest { - - protected static final String BIGINT = "BIGINT_FIELD5"; - protected static final String BINARY = "BINARY_FIELD12"; - protected static final String BIT = "BIT_FIELD17"; - protected static final String BLOB = "BLOB_FIELD14"; - protected static final String BOOL = "BOOL_FIELD2"; - protected static final String CHAR = "CHAR_FIELD16"; - protected static final String CLOB = "CLOB_FIELD15"; - protected static final String DATE = "DATE_FIELD10"; - protected static final String DECIMAL = "DECIMAL_FIELD6"; - protected static final String DOUBLE = "DOUBLE_FIELD7"; - protected static final String INT = "INT_FIELD1"; - protected static final String LIST = "LIST_FIELD19"; - protected static final String MAP = "MAP_FIELD20"; - protected static final String REAL = "REAL_FIELD8"; - protected static final String SMALLINT = "SMALLINT_FIELD4"; - protected static final String TIME = "TIME_FIELD9"; - protected static final String TIMESTAMP = "TIMESTAMP_FIELD11"; - protected static final String TINYINT = "TINYINT_FIELD3"; - protected static final String VARCHAR = "VARCHAR_FIELD13"; - protected static final String NULL = "NULL_FIELD18"; - protected static final Map ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP = - new HashMap<>(); - - static { - ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP.put(LIST, new JdbcFieldInfo(Types.INTEGER)); - } - - protected Connection conn = null; - protected Table table; - protected boolean reuseVectorSchemaRoot; - - /** - * This method creates Table object after reading YAML file. - * - * @param ymlFilePath path to file - * @return Table object - * @throws IOException on error - */ - protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) - throws IOException { - return new ObjectMapper(new YAMLFactory()) - .readValue(clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class); - } - - /** - * This method creates Connection object and DB table and also populate data into table for test. - * - * @throws SQLException on error - * @throws ClassNotFoundException on error - */ - protected void initializeDatabase(Table table) throws SQLException, ClassNotFoundException { - this.table = table; - - TimeZone.setDefault(TimeZone.getTimeZone("UTC")); - String url = "jdbc:h2:mem:JdbcToArrowTest"; - String driver = "org.h2.Driver"; - Class.forName(driver); - conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement(); ) { - stmt.executeUpdate(table.getCreate()); - for (String insert : table.getData()) { - stmt.executeUpdate(insert); - } - } - } - - /** - * Clean up method to close connection after test completes. - * - * @throws SQLException on error - */ - @AfterEach - public void destroy() throws SQLException { - if (conn != null) { - conn.close(); - conn = null; - } - } - - /** - * Prepares test data and returns collection of Table object for each test iteration. - * - * @param testFiles files for test - * @param clss Class type - * @return Collection of Table objects - * @throws SQLException on error - * @throws ClassNotFoundException on error - * @throws IOException on error - */ - public static Object[][] prepareTestData( - String[] testFiles, @SuppressWarnings("rawtypes") Class clss) - throws SQLException, ClassNotFoundException, IOException { - Object[][] tableArr = new Object[testFiles.length][]; - int i = 0; - for (String testFile : testFiles) { - tableArr[i++] = new Object[] {getTable(testFile, clss)}; - } - return tableArr; - } - - /** - * Abstract method to implement test Functionality to test JdbcToArrow methods. - * - * @param table Table object - * @throws SQLException on error - * @throws IOException on error - */ - public abstract void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException; - - /** - * Abstract method to implement logic to assert test various datatype values. - * - * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - public abstract void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector); - - /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow - * objects. This method uses the default Calendar instance with default TimeZone and Locale as - * returned by the JVM. If you wish to use specific TimeZone or Locale for any Date, Time and - * Timestamp datasets, you may want use overloaded API that taken Calendar object instance. - * - *

This method is for test only. - * - * @param connection Database connection to be used. This method will not close the passed - * connection object. Since the caller has passed the connection object it's the - * responsibility of the caller to close or return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param allocator Memory allocator - * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources - * opened such as ResultSet and Statement objects. - */ - public VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator) - throws SQLException, IOException { - Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null"); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - return sqlToArrow(connection, query, config); - } - - /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow - * objects. - * - *

This method is for test only. - * - * @param connection Database connection to be used. This method will not close the passed - * connection object. Since the caller has passed the connection object it's the - * responsibility of the caller to close or return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param allocator Memory allocator - * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets. - * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources - * opened such as ResultSet and Statement objects. - */ - public VectorSchemaRoot sqlToArrow( - Connection connection, String query, BufferAllocator allocator, Calendar calendar) - throws SQLException, IOException { - - Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null"); - Preconditions.checkNotNull(calendar, "Calendar object cannot be null"); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(allocator, calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - return sqlToArrow(connection, query, config); - } - - /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow - * objects. - * - *

This method is for test only. - * - * @param connection Database connection to be used. This method will not close the passed - * connection object. Since the caller has passed the connection object it's the - * responsibility of the caller to close or return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param config Configuration - * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources - * opened such as ResultSet and Statement objects. - */ - public static VectorSchemaRoot sqlToArrow( - Connection connection, String query, JdbcToArrowConfig config) - throws SQLException, IOException { - Preconditions.checkNotNull(connection, "JDBC connection object cannot be null"); - Preconditions.checkArgument( - query != null && query.length() > 0, "SQL query cannot be null or empty"); - - try (Statement stmt = connection.createStatement()) { - return sqlToArrow(stmt.executeQuery(query), config); - } - } - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow - * objects. This method uses the default RootAllocator and Calendar object. - * - *

This method is for test only. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException on error - */ - public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException, IOException { - Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); - - return sqlToArrow(resultSet, JdbcToArrowUtils.getUtcCalendar()); - } - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow - * objects. - * - *

This method is for test only. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @param allocator Memory allocator - * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException on error - */ - public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator allocator) - throws SQLException, IOException { - Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - return sqlToArrow(resultSet, config); - } - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow - * objects. - * - *

This method is for test only. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null - * if none. - * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException on error - */ - public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) - throws SQLException, IOException { - Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - return sqlToArrow(resultSet, config); - } - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow - * objects. - * - *

This method is for test only. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @param allocator Memory allocator to use. - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null - * if none. - * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException on error - */ - public static VectorSchemaRoot sqlToArrow( - ResultSet resultSet, BufferAllocator allocator, Calendar calendar) - throws SQLException, IOException { - Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(allocator, calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - return sqlToArrow(resultSet, config); - } - - /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow - * objects. - * - *

This method is for test only. - * - * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. - * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException on error - */ - public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config) - throws SQLException, IOException { - Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); - Preconditions.checkNotNull(config, "The configuration cannot be null"); - - VectorSchemaRoot root = - VectorSchemaRoot.create( - JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), - config.getAllocator()); - if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { - ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); - } - JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, config); - return root; - } - - /** - * Register MAP_FIELD20 as ArrowType.Map - * - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null - * if none. - * @param rsmd ResultSetMetaData to lookup column name from result set metadata - * @return typeConverter instance with mapping column to Map type - */ - protected Function jdbcToArrowTypeConverter( - Calendar calendar, ResultSetMetaData rsmd) { - return (jdbcFieldInfo) -> { - String columnLabel = null; - try { - int columnIndex = jdbcFieldInfo.getColumn(); - if (columnIndex != 0) { - columnLabel = rsmd.getColumnLabel(columnIndex); - } - } catch (SQLException e) { - throw new RuntimeException(e); - } - if (MAP.equals(columnLabel)) { - return new ArrowType.Map(false); - } else { - return JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); - } - }; - } - - protected ResultSetMetaData getQueryMetaData(String query) throws SQLException { - return conn.createStatement().executeQuery(query).getMetaData(); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java deleted file mode 100644 index 152b88c45a865..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.sql.Types; -import org.junit.jupiter.api.Test; - -public class JdbcFieldInfoTest { - - @Test - public void testCreateJdbcFieldInfoWithJdbcType() { - JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB); - - assertEquals(Types.BLOB, fieldInfo.getJdbcType()); - assertEquals(0, fieldInfo.getPrecision()); - assertEquals(0, fieldInfo.getScale()); - } - - @Test - public void testCreateJdbcFieldInfoWithJdbcTypePrecisionAndScale() { - JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB, 1, 2); - - assertEquals(Types.BLOB, fieldInfo.getJdbcType()); - assertEquals(1, fieldInfo.getPrecision()); - assertEquals(2, fieldInfo.getScale()); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java deleted file mode 100644 index a05130f18e4ac..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java +++ /dev/null @@ -1,1019 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.sql.Date; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Timestamp; -import java.sql.Types; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.function.BiConsumer; -import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.JsonStringHashMap; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class JdbcParameterBinderTest { - private static final long MILLIS_PER_DAY = 86_400_000; - BufferAllocator allocator; - - @BeforeEach - void beforeEach() { - allocator = new RootAllocator(); - } - - @AfterEach - void afterEach() { - allocator.close(); - } - - @Test - void bindOrder() throws SQLException { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ints0", new ArrowType.Int(32, true)), - Field.nullable("ints1", new ArrowType.Int(32, true)), - Field.nullable("ints2", new ArrowType.Int(32, true)))); - try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, root) - .bind(/*parameterIndex=*/ 1, /*columnIndex=*/ 2) - .bind(/*parameterIndex=*/ 2, /*columnIndex=*/ 0) - .build(); - assertThat(binder.next()).isFalse(); - - final IntVector ints0 = (IntVector) root.getVector(0); - final IntVector ints1 = (IntVector) root.getVector(1); - final IntVector ints2 = (IntVector) root.getVector(2); - ints0.setSafe(0, 4); - ints0.setNull(1); - ints1.setNull(0); - ints1.setSafe(1, -8); - ints2.setNull(0); - ints2.setSafe(1, 12); - root.setRowCount(2); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isNull(); - assertThat(statement.getParamType(1)).isEqualTo(Types.INTEGER); - assertThat(statement.getParamValue(2)).isEqualTo(4); - assertThat(statement.getParam(3)).isNull(); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(12); - assertThat(statement.getParamValue(2)).isNull(); - assertThat(statement.getParamType(2)).isEqualTo(Types.INTEGER); - assertThat(statement.getParam(3)).isNull(); - assertThat(binder.next()).isFalse(); - - binder.reset(); - - ints0.setNull(0); - ints0.setSafe(1, -2); - ints2.setNull(0); - ints2.setSafe(1, 6); - root.setRowCount(2); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isNull(); - assertThat(statement.getParamType(1)).isEqualTo(Types.INTEGER); - assertThat(statement.getParamValue(2)).isNull(); - assertThat(statement.getParamType(2)).isEqualTo(Types.INTEGER); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(6); - assertThat(statement.getParamValue(2)).isEqualTo(-2); - assertThat(statement.getParam(3)).isNull(); - assertThat(binder.next()).isFalse(); - } - } - - @Test - void customBinder() throws SQLException { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("ints0", new ArrowType.Int(32, true)))); - - try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, root) - .bind( - /*parameterIndex=*/ 1, - new ColumnBinder() { - private final IntVector vector = (IntVector) root.getVector(0); - - @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) - throws SQLException { - Integer value = vector.getObject(rowIndex); - if (value == null) { - statement.setString(parameterIndex, "null"); - } else { - statement.setString(parameterIndex, Integer.toString(value)); - } - } - - @Override - public int getJdbcType() { - return Types.INTEGER; - } - - @Override - public FieldVector getVector() { - return vector; - } - }) - .build(); - assertThat(binder.next()).isFalse(); - - final IntVector ints = (IntVector) root.getVector(0); - ints.setSafe(0, 4); - ints.setNull(1); - - root.setRowCount(2); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo("4"); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo("null"); - assertThat(binder.next()).isFalse(); - } - } - - @Test - void bool() throws SQLException { - testSimpleType( - ArrowType.Bool.INSTANCE, - Types.BOOLEAN, - (BitVector vector, Integer index, Boolean value) -> vector.setSafe(index, value ? 1 : 0), - BitVector::setNull, - Arrays.asList(true, false, true)); - } - - @Test - void int8() throws SQLException { - testSimpleType( - new ArrowType.Int(8, true), - Types.TINYINT, - TinyIntVector::setSafe, - TinyIntVector::setNull, - Arrays.asList(Byte.MAX_VALUE, Byte.MIN_VALUE, (byte) 42)); - } - - @Test - void int16() throws SQLException { - testSimpleType( - new ArrowType.Int(16, true), - Types.SMALLINT, - SmallIntVector::setSafe, - SmallIntVector::setNull, - Arrays.asList(Short.MAX_VALUE, Short.MIN_VALUE, (short) 42)); - } - - @Test - void int32() throws SQLException { - testSimpleType( - new ArrowType.Int(32, true), - Types.INTEGER, - IntVector::setSafe, - IntVector::setNull, - Arrays.asList(Integer.MAX_VALUE, Integer.MIN_VALUE, 42)); - } - - @Test - void int64() throws SQLException { - testSimpleType( - new ArrowType.Int(64, true), - Types.BIGINT, - BigIntVector::setSafe, - BigIntVector::setNull, - Arrays.asList(Long.MAX_VALUE, Long.MIN_VALUE, 42L)); - } - - @Test - void float32() throws SQLException { - testSimpleType( - new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), - Types.REAL, - Float4Vector::setSafe, - Float4Vector::setNull, - Arrays.asList(Float.MIN_VALUE, Float.MAX_VALUE, Float.POSITIVE_INFINITY)); - } - - @Test - void float64() throws SQLException { - testSimpleType( - new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), - Types.DOUBLE, - Float8Vector::setSafe, - Float8Vector::setNull, - Arrays.asList(Double.MIN_VALUE, Double.MAX_VALUE, Double.POSITIVE_INFINITY)); - } - - @Test - void time32() throws SQLException { - testSimpleType( - new ArrowType.Time(TimeUnit.SECOND, 32), - Types.TIME, - (valueVectors, index, value) -> - valueVectors.setSafe(index, (int) (value.getTime() / 1_000)), - TimeSecVector::setNull, - Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - testSimpleType( - new ArrowType.Time(TimeUnit.MILLISECOND, 32), - Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) value.getTime()), - TimeMilliVector::setNull, - Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - } - - @Test - void time64() throws SQLException { - testSimpleType( - new ArrowType.Time(TimeUnit.MICROSECOND, 64), - Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000)), - TimeMicroVector::setNull, - Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - testSimpleType( - new ArrowType.Time(TimeUnit.NANOSECOND, 64), - Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000_000)), - TimeNanoVector::setNull, - Arrays.asList(new Time(-128), new Time(104), new Time(-42))); - } - - @Test - void date32() throws SQLException { - testSimpleType( - new ArrowType.Date(DateUnit.DAY), - Types.DATE, - (valueVectors, index, value) -> - valueVectors.setSafe(index, (int) (value.getTime() / MILLIS_PER_DAY)), - DateDayVector::setNull, - Arrays.asList( - new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); - } - - @Test - void date64() throws SQLException { - testSimpleType( - new ArrowType.Date(DateUnit.MILLISECOND), - Types.DATE, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - DateMilliVector::setNull, - Arrays.asList( - new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); - } - - @Test - void timestamp() throws SQLException { - List values = - Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); - testSimpleType( - new ArrowType.Timestamp(TimeUnit.SECOND, null), - Types.TIMESTAMP, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() / 1_000), - TimeStampSecVector::setNull, - values); - testSimpleType( - new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), - Types.TIMESTAMP, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - TimeStampMilliVector::setNull, - values); - testSimpleType( - new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), - Types.TIMESTAMP, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000), - TimeStampMicroVector::setNull, - values); - testSimpleType( - new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), - Types.TIMESTAMP, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000_000), - TimeStampNanoVector::setNull, - values); - } - - @Test - void timestampTz() throws SQLException { - List values = - Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); - testSimpleType( - new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"), - Types.TIMESTAMP_WITH_TIMEZONE, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() / 1_000), - TimeStampSecTZVector::setNull, - values); - testSimpleType( - new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"), - Types.TIMESTAMP_WITH_TIMEZONE, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - TimeStampMilliTZVector::setNull, - values); - testSimpleType( - new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"), - Types.TIMESTAMP_WITH_TIMEZONE, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000), - TimeStampMicroTZVector::setNull, - values); - testSimpleType( - new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"), - Types.TIMESTAMP_WITH_TIMEZONE, - (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000_000), - TimeStampNanoTZVector::setNull, - values); - } - - @Test - void utf8() throws SQLException { - testSimpleType( - ArrowType.Utf8.INSTANCE, - Types.VARCHAR, - (VarCharVector vector, Integer index, String value) -> - vector.setSafe(index, value.getBytes(StandardCharsets.UTF_8)), - BaseVariableWidthVector::setNull, - Arrays.asList("", "foobar", "abc")); - } - - @Test - void largeUtf8() throws SQLException { - testSimpleType( - ArrowType.LargeUtf8.INSTANCE, - Types.LONGVARCHAR, - (LargeVarCharVector vector, Integer index, String value) -> - vector.setSafe(index, value.getBytes(StandardCharsets.UTF_8)), - BaseLargeVariableWidthVector::setNull, - Arrays.asList("", "foobar", "abc")); - } - - @Test - void binary() throws SQLException { - testSimpleType( - ArrowType.Binary.INSTANCE, - Types.VARBINARY, - (VarBinaryVector vector, Integer index, byte[] value) -> vector.setSafe(index, value), - BaseVariableWidthVector::setNull, - Arrays.asList(new byte[0], new byte[] {2, -4}, new byte[] {0, -1, 127, -128})); - } - - @Test - void largeBinary() throws SQLException { - testSimpleType( - ArrowType.LargeBinary.INSTANCE, - Types.LONGVARBINARY, - (LargeVarBinaryVector vector, Integer index, byte[] value) -> vector.setSafe(index, value), - BaseLargeVariableWidthVector::setNull, - Arrays.asList(new byte[0], new byte[] {2, -4}, new byte[] {0, -1, 127, -128})); - } - - @Test - void fixedSizeBinary() throws SQLException { - testSimpleType( - new ArrowType.FixedSizeBinary(3), - Types.BINARY, - FixedSizeBinaryVector::setSafe, - FixedSizeBinaryVector::setNull, - Arrays.asList(new byte[3], new byte[] {1, 2, -4}, new byte[] {-1, 127, -128})); - } - - @Test - void decimal128() throws SQLException { - testSimpleType( - new ArrowType.Decimal(/*precision*/ 12, /*scale*/ 3, 128), - Types.DECIMAL, - DecimalVector::setSafe, - DecimalVector::setNull, - Arrays.asList( - new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); - } - - @Test - void decimal256() throws SQLException { - testSimpleType( - new ArrowType.Decimal(/*precision*/ 12, /*scale*/ 3, 256), - Types.DECIMAL, - Decimal256Vector::setSafe, - Decimal256Vector::setNull, - Arrays.asList( - new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); - } - - @Test - void listOfDouble() throws SQLException { - TriConsumer setValue = - (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(doubleValue -> writer.float8().writeFloat8(doubleValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = - Arrays.asList( - new Double[] {0.0, Math.PI}, new Double[] {1.1, -352346.2, 2355.6}, - new Double[] {-1024.3}, new Double[] {}); - testListType( - new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), - setValue, - ListVector::setNull, - values); - } - - @Test - void listOfInt64() throws SQLException { - TriConsumer setValue = - (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(longValue -> writer.bigInt().writeBigInt(longValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = - Arrays.asList( - new Long[] {1L, 2L, 3L}, - new Long[] {4L, 5L}, - new Long[] {512L, 1024L, 2048L, 4096L}, - new Long[] {}); - testListType((ArrowType) new ArrowType.Int(64, true), setValue, ListVector::setNull, values); - } - - @Test - void listOfInt32() throws SQLException { - TriConsumer setValue = - (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(integerValue -> writer.integer().writeInt(integerValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = - Arrays.asList( - new Integer[] {1, 2, 3}, - new Integer[] {4, 5}, - new Integer[] {512, 1024, 2048, 4096}, - new Integer[] {}); - testListType((ArrowType) new ArrowType.Int(32, true), setValue, ListVector::setNull, values); - } - - @Test - void listOfBoolean() throws SQLException { - TriConsumer setValue = - (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values) - .forEach(booleanValue -> writer.bit().writeBit(booleanValue ? 1 : 0)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = - Arrays.asList( - new Boolean[] {true, false}, - new Boolean[] {false, false}, - new Boolean[] {true, true, false, true}, - new Boolean[] {}); - testListType((ArrowType) new ArrowType.Bool(), setValue, ListVector::setNull, values); - } - - @Test - void listOfString() throws SQLException { - TriConsumer setValue = - (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values) - .forEach( - stringValue -> { - if (stringValue != null) { - byte[] stringValueBytes = stringValue.getBytes(StandardCharsets.UTF_8); - try (ArrowBuf stringBuffer = allocator.buffer(stringValueBytes.length)) { - stringBuffer.writeBytes(stringValueBytes); - writer.varChar().writeVarChar(0, stringValueBytes.length, stringBuffer); - } - } else { - writer.varChar().writeNull(); - } - }); - writer.endList(); - listVector.setLastSet(index); - }; - List values = - Arrays.asList( - new String[] {"aaaa", "b1"}, - new String[] {"c", null, "d"}, - new String[] {"e", "f", "g", "h"}, - new String[] {}); - testListType((ArrowType) new ArrowType.Utf8(), setValue, ListVector::setNull, values); - } - - @Test - void mapOfString() throws SQLException { - TriConsumer> setValue = - (mapVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(index); - mapWriter.startMap(); - values - .entrySet() - .forEach( - mapValue -> { - if (mapValue != null) { - byte[] keyBytes = mapValue.getKey().getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = mapValue.getValue().getBytes(StandardCharsets.UTF_8); - try (ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = allocator.buffer(valueBytes.length); ) { - mapWriter.startEntry(); - keyBuf.writeBytes(keyBytes); - valueBuf.writeBytes(valueBytes); - mapWriter.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - mapWriter.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - mapWriter.endEntry(); - } - } else { - mapWriter.writeNull(); - } - }); - mapWriter.endMap(); - }; - - JsonStringHashMap value1 = new JsonStringHashMap(); - value1.put("a", "b"); - value1.put("c", "d"); - JsonStringHashMap value2 = new JsonStringHashMap(); - value2.put("d", "e"); - value2.put("f", "g"); - value2.put("k", "l"); - JsonStringHashMap value3 = new JsonStringHashMap(); - value3.put("y", "z"); - value3.put("arrow", "cool"); - List> values = - Arrays.asList(value1, value2, value3, Collections.emptyMap()); - testMapType( - new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Utf8()); - } - - @Test - void mapOfInteger() throws SQLException { - TriConsumer> setValue = - (mapVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(index); - mapWriter.startMap(); - values - .entrySet() - .forEach( - mapValue -> { - if (mapValue != null) { - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(mapValue.getKey()); - mapWriter.value().integer().writeInt(mapValue.getValue()); - mapWriter.endEntry(); - } else { - mapWriter.writeNull(); - } - }); - mapWriter.endMap(); - }; - - JsonStringHashMap value1 = new JsonStringHashMap(); - value1.put(1, 2); - value1.put(3, 4); - JsonStringHashMap value2 = new JsonStringHashMap(); - value2.put(5, 6); - value2.put(7, 8); - value2.put(9, 1024); - JsonStringHashMap value3 = new JsonStringHashMap(); - value3.put(Integer.MIN_VALUE, Integer.MAX_VALUE); - value3.put(0, 4096); - List> values = - Arrays.asList(value1, value2, value3, Collections.emptyMap()); - testMapType( - new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Int(32, true)); - } - - @FunctionalInterface - interface TriConsumer { - void accept(T value1, U value2, V value3); - } - - void testSimpleType( - ArrowType arrowType, - int jdbcType, - TriConsumer setValue, - BiConsumer setNull, - List values) - throws SQLException { - Schema schema = new Schema(Collections.singletonList(Field.nullable("field", arrowType))); - try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, root).bindAll().build(); - assertThat(binder.next()).isFalse(); - - @SuppressWarnings("unchecked") - final V vector = (V) root.getVector(0); - final ColumnBinder columnBinder = ColumnBinder.forVector(vector); - assertThat(columnBinder.getJdbcType()).isEqualTo(jdbcType); - - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(1)); - setNull.accept(vector, 2); - root.setRowCount(3); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isNull(); - assertThat(statement.getParamType(1)).isEqualTo(jdbcType); - assertThat(binder.next()).isFalse(); - - binder.reset(); - - setNull.accept(vector, 0); - setValue.accept(vector, 1, values.get(2)); - setValue.accept(vector, 2, values.get(0)); - setValue.accept(vector, 3, values.get(2)); - setValue.accept(vector, 4, values.get(1)); - root.setRowCount(5); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isNull(); - assertThat(statement.getParamType(1)).isEqualTo(jdbcType); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isFalse(); - } - - // Non-nullable (since some types have a specialized binder) - schema = new Schema(Collections.singletonList(Field.notNullable("field", arrowType))); - try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, root).bindAll().build(); - assertThat(binder.next()).isFalse(); - - @SuppressWarnings("unchecked") - final V vector = (V) root.getVector(0); - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(1)); - root.setRowCount(2); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isFalse(); - - binder.reset(); - - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(2)); - setValue.accept(vector, 2, values.get(0)); - setValue.accept(vector, 3, values.get(2)); - setValue.accept(vector, 4, values.get(1)); - root.setRowCount(5); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isFalse(); - } - } - - void testListType( - ArrowType arrowType, - TriConsumer setValue, - BiConsumer setNull, - List values) - throws SQLException { - int jdbcType = Types.ARRAY; - Schema schema = - new Schema( - Collections.singletonList( - new Field( - "field", - FieldType.nullable(new ArrowType.List()), - Collections.singletonList( - new Field("element", FieldType.notNullable(arrowType), null))))); - try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, root).bindAll().build(); - assertThat(binder.next()).isFalse(); - - @SuppressWarnings("unchecked") - final V vector = (V) root.getVector(0); - final ColumnBinder columnBinder = ColumnBinder.forVector(vector); - assertThat(columnBinder.getJdbcType()).isEqualTo(jdbcType); - - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(1)); - setNull.accept(vector, 2); - root.setRowCount(3); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isNull(); - assertThat(statement.getParamType(1)).isEqualTo(jdbcType); - assertThat(binder.next()).isFalse(); - - binder.reset(); - - setNull.accept(vector, 0); - setValue.accept(vector, 1, values.get(3)); - setValue.accept(vector, 2, values.get(0)); - setValue.accept(vector, 3, values.get(2)); - setValue.accept(vector, 4, values.get(1)); - root.setRowCount(5); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isNull(); - assertThat(statement.getParamType(1)).isEqualTo(jdbcType); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(3)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isFalse(); - } - - // Non-nullable (since some types have a specialized binder) - schema = - new Schema( - Collections.singletonList( - new Field( - "field", - FieldType.notNullable(new ArrowType.List()), - Collections.singletonList( - new Field("element", FieldType.notNullable(arrowType), null))))); - try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, root).bindAll().build(); - assertThat(binder.next()).isFalse(); - - @SuppressWarnings("unchecked") - final V vector = (V) root.getVector(0); - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(1)); - root.setRowCount(2); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isFalse(); - - binder.reset(); - - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(2)); - setValue.accept(vector, 2, values.get(0)); - setValue.accept(vector, 3, values.get(2)); - setValue.accept(vector, 4, values.get(1)); - root.setRowCount(5); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isFalse(); - } - } - - void testMapType( - ArrowType arrowType, - TriConsumer setValue, - BiConsumer setNull, - List values, - ArrowType elementType) - throws SQLException { - int jdbcType = Types.VARCHAR; - FieldType keyType = new FieldType(false, elementType, null, null); - FieldType mapType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - Schema schema = - new Schema( - Collections.singletonList( - new Field( - "field", - FieldType.nullable(arrowType), - Collections.singletonList( - new Field( - MapVector.KEY_NAME, - mapType, - Arrays.asList( - new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, keyType, null))))))); - try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, root).bindAll().build(); - assertThat(binder.next()).isFalse(); - - @SuppressWarnings("unchecked") - final V vector = (V) root.getVector(0); - final ColumnBinder columnBinder = ColumnBinder.forVector(vector); - assertThat(columnBinder.getJdbcType()).isEqualTo(jdbcType); - - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(1)); - setNull.accept(vector, 2); - root.setRowCount(3); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0).toString()); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1).toString()); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isNull(); - assertThat(statement.getParamType(1)).isEqualTo(jdbcType); - assertThat(binder.next()).isFalse(); - - binder.reset(); - - setNull.accept(vector, 0); - setValue.accept(vector, 1, values.get(3)); - setValue.accept(vector, 2, values.get(0)); - setValue.accept(vector, 3, values.get(2)); - setValue.accept(vector, 4, values.get(1)); - root.setRowCount(5); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isNull(); - assertThat(statement.getParamType(1)).isEqualTo(jdbcType); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(3).toString()); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0).toString()); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2).toString()); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1).toString()); - assertThat(binder.next()).isFalse(); - } - - // Non-nullable (since some types have a specialized binder) - schema = - new Schema( - Collections.singletonList( - new Field( - "field", - FieldType.notNullable(arrowType), - Collections.singletonList( - new Field( - MapVector.KEY_NAME, - mapType, - Arrays.asList( - new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, keyType, null))))))); - try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - @SuppressWarnings("unchecked") - final V vector = (V) root.getVector(0); - - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, root) - .bind( - 1, - new org.apache.arrow.adapter.jdbc.binder.MapBinder( - (MapVector) vector, Types.OTHER)) - .build(); - assertThat(binder.next()).isFalse(); - - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(1)); - root.setRowCount(2); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isFalse(); - - binder.reset(); - - setValue.accept(vector, 0, values.get(0)); - setValue.accept(vector, 1, values.get(2)); - setValue.accept(vector, 2, values.get(0)); - setValue.accept(vector, 3, values.get(2)); - setValue.accept(vector, 4, values.get(1)); - root.setRowCount(5); - - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(0)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(2)); - assertThat(binder.next()).isTrue(); - assertThat(statement.getParamValue(1)).isEqualTo(values.get(1)); - assertThat(binder.next()).isFalse(); - } - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java deleted file mode 100644 index e7e324c7847f4..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class JdbcToArrowCommentMetadataTest { - - private static final String COMMENT = - "comment"; // use this metadata key for interoperability with Spark StructType - private Connection conn = null; - - /** - * This method creates Connection object and DB table and also populate data into table for test. - * - * @throws SQLException on error - * @throws ClassNotFoundException on error - */ - @BeforeEach - public void setUp() throws SQLException, ClassNotFoundException { - String url = - "jdbc:h2:mem:JdbcToArrowTest?characterEncoding=UTF-8;INIT=runscript from 'classpath:/h2/comment.sql'"; - String driver = "org.h2.Driver"; - Class.forName(driver); - conn = DriverManager.getConnection(url); - } - - @AfterEach - public void tearDown() throws SQLException { - if (conn != null) { - conn.close(); - conn = null; - } - } - - private static Field field( - String name, boolean nullable, ArrowType type, Map metadata) { - return new Field(name, new FieldType(nullable, type, null, metadata), Collections.emptyList()); - } - - private static Map metadata(String... entries) { - if (entries.length % 2 != 0) { - throw new IllegalArgumentException("Map must have equal number of keys and values"); - } - - final Map result = new HashMap<>(); - for (int i = 0; i < entries.length; i += 2) { - result.put(entries[i], entries[i + 1]); - } - return result; - } - - @Test - public void schemaComment() throws Exception { - boolean includeMetadata = false; - Schema schema = getSchemaWithCommentFromQuery(includeMetadata); - Schema expectedSchema = - new Schema( - Arrays.asList( - field( - "ID", - false, - Types.MinorType.BIGINT.getType(), - metadata("comment", "Record identifier")), - field( - "NAME", - true, - Types.MinorType.VARCHAR.getType(), - metadata("comment", "Name of record")), - field("COLUMN1", true, Types.MinorType.BIT.getType(), metadata()), - field( - "COLUMNN", - true, - Types.MinorType.INT.getType(), - metadata("comment", "Informative description of columnN"))), - metadata("comment", "This is super special table with valuable data")); - assertThat(schema).isEqualTo(expectedSchema); - } - - @Test - public void schemaCommentWithDatabaseMetadata() throws Exception { - boolean includeMetadata = true; - Schema schema = getSchemaWithCommentFromQuery(includeMetadata); - Schema expectedSchema = - new Schema( - Arrays.asList( - field( - "ID", - false, - Types.MinorType.BIGINT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "ID", - "SQL_TYPE", "BIGINT", - "comment", "Record identifier")), - field( - "NAME", - true, - Types.MinorType.VARCHAR.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "NAME", - "SQL_TYPE", "CHARACTER VARYING", - "comment", "Name of record")), - field( - "COLUMN1", - true, - Types.MinorType.BIT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "COLUMN1", - "SQL_TYPE", "BOOLEAN")), - field( - "COLUMNN", - true, - Types.MinorType.INT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "COLUMNN", - "SQL_TYPE", "INTEGER", - "comment", "Informative description of columnN"))), - metadata("comment", "This is super special table with valuable data")); - assertThat(schema).isEqualTo(expectedSchema); - /* corresponding Apache Spark DDL after conversion: - ID BIGINT NOT NULL COMMENT 'Record identifier', - NAME STRING COMMENT 'Name of record', - COLUMN1 BOOLEAN, - COLUMNN INT COMMENT 'Informative description of columnN' - */ - assertThat(schema).isEqualTo(expectedSchema); - } - - private Schema getSchemaWithCommentFromQuery(boolean includeMetadata) throws SQLException { - DatabaseMetaData metaData = conn.getMetaData(); - try (Statement statement = conn.createStatement()) { - try (ResultSet resultSet = statement.executeQuery("select * from table1")) { - ResultSetMetaData resultSetMetaData = resultSet.getMetaData(); - Map> columnCommentByColumnIndex = - getColumnComments(metaData, resultSetMetaData); - - String tableName = getTableNameFromResultSetMetaData(resultSetMetaData); - String tableComment = getTableComment(metaData, tableName); - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder() - .setAllocator(new RootAllocator()) - .setSchemaMetadata(Collections.singletonMap(COMMENT, tableComment)) - .setColumnMetadataByColumnIndex(columnCommentByColumnIndex) - .setIncludeMetadata(includeMetadata) - .build(); - return JdbcToArrowUtils.jdbcToArrowSchema(resultSetMetaData, config); - } - } - } - - private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMetaData) - throws SQLException { - Set tablesFromQuery = new HashSet<>(); - for (int idx = 1, columnCount = resultSetMetaData.getColumnCount(); idx <= columnCount; idx++) { - String tableName = resultSetMetaData.getTableName(idx); - if (tableName != null && !tableName.isEmpty()) { - tablesFromQuery.add(tableName); - } - } - if (tablesFromQuery.size() == 1) { - return tablesFromQuery.iterator().next(); - } - throw new RuntimeException("Table metadata is absent or ambiguous"); - } - - private Map> getColumnComments( - DatabaseMetaData metaData, ResultSetMetaData resultSetMetaData) throws SQLException { - Map> columnCommentByColumnIndex = new HashMap<>(); - for (int columnIdx = 1, columnCount = resultSetMetaData.getColumnCount(); - columnIdx <= columnCount; - columnIdx++) { - String columnComment = - getColumnComment( - metaData, - resultSetMetaData.getTableName(columnIdx), - resultSetMetaData.getColumnName(columnIdx)); - if (columnComment != null && !columnComment.isEmpty()) { - columnCommentByColumnIndex.put(columnIdx, Collections.singletonMap(COMMENT, columnComment)); - } - } - return columnCommentByColumnIndex; - } - - private String getTableComment(DatabaseMetaData metaData, String tableName) throws SQLException { - if (tableName == null || tableName.isEmpty()) { - return null; - } - String comment = null; - int rowCount = 0; - try (ResultSet tableMetadata = metaData.getTables(null, null, tableName, null)) { - if (tableMetadata.next()) { - comment = tableMetadata.getString("REMARKS"); - rowCount++; - } - } - if (rowCount == 1) { - return comment; - } - if (rowCount > 1) { - throw new RuntimeException("Multiple tables found for table name"); - } - throw new RuntimeException("Table comment not found"); - } - - private String getColumnComment(DatabaseMetaData metaData, String tableName, String columnName) - throws SQLException { - try (ResultSet tableMetadata = metaData.getColumns(null, null, tableName, columnName)) { - if (tableMetadata.next()) { - return tableMetadata.getString("REMARKS"); - } - } - return null; - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java deleted file mode 100644 index 70c753cf2f8ba..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.sql.Types; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Locale; -import java.util.TimeZone; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -public class JdbcToArrowConfigTest { - - private static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - private static final Calendar calendar = - Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); - - @Test - public void testConfigNullArguments() { - assertThrows( - NullPointerException.class, - () -> { - new JdbcToArrowConfig(null, null); - }); - } - - @Test - public void testBuilderNullArguments() { - assertThrows( - NullPointerException.class, - () -> { - new JdbcToArrowConfigBuilder(null, null); - }); - } - - @Test - public void testConfigNullCalendar() { - JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null); - assertNull(config.getCalendar()); - } - - @Test - public void testBuilderNullCalendar() { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, null); - JdbcToArrowConfig config = builder.build(); - assertNull(config.getCalendar()); - } - - @Test - public void testConfigNullAllocator() { - assertThrows( - NullPointerException.class, - () -> { - new JdbcToArrowConfig(null, calendar); - }); - } - - @Test - public void testBuilderNullAllocator() { - assertThrows( - NullPointerException.class, - () -> { - new JdbcToArrowConfigBuilder(null, calendar); - }); - } - - @Test - public void testSetNullAllocator() { - assertThrows( - NullPointerException.class, - () -> { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar); - builder.setAllocator(null); - }); - } - - @Test - public void testSetNullCalendar() { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar); - JdbcToArrowConfig config = builder.setCalendar(null).build(); - assertNull(config.getCalendar()); - } - - @Test - public void testConfig() { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar); - JdbcToArrowConfig config = builder.build(); - - assertEquals(allocator, config.getAllocator()); - assertEquals(calendar, config.getCalendar()); - - Calendar newCalendar = Calendar.getInstance(); - BufferAllocator newAllocator = new RootAllocator(Integer.SIZE); - - builder.setAllocator(newAllocator).setCalendar(newCalendar); - config = builder.build(); - - assertEquals(newAllocator, config.getAllocator()); - assertEquals(newCalendar, config.getCalendar()); - } - - @Test - public void testIncludeMetadata() { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false); - - JdbcToArrowConfig config = builder.build(); - assertFalse(config.shouldIncludeMetadata()); - - builder.setIncludeMetadata(true); - config = builder.build(); - assertTrue(config.shouldIncludeMetadata()); - - config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build(); - assertTrue(config.shouldIncludeMetadata()); - - config = - new JdbcToArrowConfig( - allocator, - calendar, /* include metadata */ - true, - /* reuse vector schema root */ true, - null, - null, - JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, - null); - assertTrue(config.shouldIncludeMetadata()); - assertTrue(config.isReuseVectorSchemaRoot()); - - config = - new JdbcToArrowConfig( - allocator, - calendar, /* include metadata */ - false, - /* reuse vector schema root */ false, - null, - null, - JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, - null); - assertFalse(config.shouldIncludeMetadata()); - assertFalse(config.isReuseVectorSchemaRoot()); - } - - @Test - public void testArraySubTypes() { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false); - JdbcToArrowConfig config = builder.build(); - - final int columnIndex = 1; - final String columnName = "COLUMN"; - - assertNull(config.getArraySubTypeByColumnIndex(columnIndex)); - assertNull(config.getArraySubTypeByColumnName(columnName)); - - final HashMap indexMapping = new HashMap(); - indexMapping.put(2, new JdbcFieldInfo(Types.BIGINT)); - - final HashMap fieldMapping = new HashMap(); - fieldMapping.put("NEW_COLUMN", new JdbcFieldInfo(Types.BINARY)); - - builder.setArraySubTypeByColumnIndexMap(indexMapping); - builder.setArraySubTypeByColumnNameMap(fieldMapping); - config = builder.build(); - - assertNull(config.getArraySubTypeByColumnIndex(columnIndex)); - assertNull(config.getArraySubTypeByColumnName(columnName)); - - indexMapping.put(columnIndex, new JdbcFieldInfo(Types.BIT)); - fieldMapping.put(columnName, new JdbcFieldInfo(Types.BLOB)); - - assertNotNull(config.getArraySubTypeByColumnIndex(columnIndex)); - assertEquals(Types.BIT, config.getArraySubTypeByColumnIndex(columnIndex).getJdbcType()); - assertEquals(Types.BLOB, config.getArraySubTypeByColumnName(columnName).getJdbcType()); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java deleted file mode 100644 index 74b1ca34d73c6..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.math.BigDecimal; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.AbstractMap; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.JsonStringHashMap; -import org.apache.arrow.vector.util.ObjectMapperFactory; -import org.apache.arrow.vector.util.Text; - -/** - * This is a Helper class which has functionalities to read and assert the values from the given - * FieldVector object. - */ -public class JdbcToArrowTestHelper { - - public static void assertIntVectorValues(IntVector intVector, int rowCount, Integer[] values) { - assertEquals(rowCount, intVector.getValueCount()); - - for (int j = 0; j < intVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(intVector.isNull(j)); - } else { - assertEquals(values[j].intValue(), intVector.get(j)); - } - } - } - - public static void assertBooleanVectorValues( - BitVector bitVector, int rowCount, Boolean[] values) { - assertEquals(rowCount, bitVector.getValueCount()); - - for (int j = 0; j < bitVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(bitVector.isNull(j)); - } else { - assertEquals(values[j].booleanValue(), bitVector.get(j) == 1); - } - } - } - - public static void assertBitVectorValues(BitVector bitVector, int rowCount, Integer[] values) { - assertEquals(rowCount, bitVector.getValueCount()); - - for (int j = 0; j < bitVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(bitVector.isNull(j)); - } else { - assertEquals(values[j].intValue(), bitVector.get(j)); - } - } - } - - public static void assertTinyIntVectorValues( - TinyIntVector tinyIntVector, int rowCount, Integer[] values) { - assertEquals(rowCount, tinyIntVector.getValueCount()); - - for (int j = 0; j < tinyIntVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(tinyIntVector.isNull(j)); - } else { - assertEquals(values[j].intValue(), tinyIntVector.get(j)); - } - } - } - - public static void assertSmallIntVectorValues( - SmallIntVector smallIntVector, int rowCount, Integer[] values) { - assertEquals(rowCount, smallIntVector.getValueCount()); - - for (int j = 0; j < smallIntVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(smallIntVector.isNull(j)); - } else { - assertEquals(values[j].intValue(), smallIntVector.get(j)); - } - } - } - - public static void assertBigIntVectorValues( - BigIntVector bigIntVector, int rowCount, Long[] values) { - assertEquals(rowCount, bigIntVector.getValueCount()); - - for (int j = 0; j < bigIntVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(bigIntVector.isNull(j)); - } else { - assertEquals(values[j].longValue(), bigIntVector.get(j)); - } - } - } - - public static void assertDecimalVectorValues( - DecimalVector decimalVector, int rowCount, BigDecimal[] values) { - assertEquals(rowCount, decimalVector.getValueCount()); - - for (int j = 0; j < decimalVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(decimalVector.isNull(j)); - } else { - assertEquals(values[j].doubleValue(), decimalVector.getObject(j).doubleValue(), 0); - } - } - } - - public static void assertFloat8VectorValues( - Float8Vector float8Vector, int rowCount, Double[] values) { - assertEquals(rowCount, float8Vector.getValueCount()); - - for (int j = 0; j < float8Vector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(float8Vector.isNull(j)); - } else { - assertEquals(values[j], float8Vector.get(j), 0.01); - } - } - } - - public static void assertFloat4VectorValues( - Float4Vector float4Vector, int rowCount, Float[] values) { - assertEquals(rowCount, float4Vector.getValueCount()); - - for (int j = 0; j < float4Vector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(float4Vector.isNull(j)); - } else { - assertEquals(values[j], float4Vector.get(j), 0.01); - } - } - } - - public static void assertTimeVectorValues( - TimeMilliVector timeMilliVector, int rowCount, Long[] values) { - assertEquals(rowCount, timeMilliVector.getValueCount()); - - for (int j = 0; j < timeMilliVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(timeMilliVector.isNull(j)); - } else { - assertEquals(values[j].longValue(), timeMilliVector.get(j)); - } - } - } - - public static void assertDateVectorValues( - DateDayVector dateDayVector, int rowCount, Integer[] values) { - assertEquals(rowCount, dateDayVector.getValueCount()); - - for (int j = 0; j < dateDayVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(dateDayVector.isNull(j)); - } else { - assertEquals(values[j].longValue(), dateDayVector.get(j)); - } - } - } - - public static void assertTimeStampVectorValues( - TimeStampVector timeStampVector, int rowCount, Long[] values) { - assertEquals(rowCount, timeStampVector.getValueCount()); - - for (int j = 0; j < timeStampVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(timeStampVector.isNull(j)); - } else { - assertEquals(values[j].longValue(), timeStampVector.get(j)); - } - } - } - - public static void assertVarBinaryVectorValues( - VarBinaryVector varBinaryVector, int rowCount, byte[][] values) { - assertEquals(rowCount, varBinaryVector.getValueCount()); - - for (int j = 0; j < varBinaryVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(varBinaryVector.isNull(j)); - } else { - assertArrayEquals(values[j], varBinaryVector.get(j)); - } - } - } - - public static void assertVarcharVectorValues( - VarCharVector varCharVector, int rowCount, byte[][] values) { - assertEquals(rowCount, varCharVector.getValueCount()); - - for (int j = 0; j < varCharVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(varCharVector.isNull(j)); - } else { - assertArrayEquals(values[j], varCharVector.get(j)); - } - } - } - - public static void assertNullVectorValues(NullVector vector, int rowCount) { - assertEquals(rowCount, vector.getValueCount()); - } - - public static void assertListVectorValues( - ListVector listVector, int rowCount, Integer[][] values) { - assertEquals(rowCount, listVector.getValueCount()); - - for (int j = 0; j < listVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(listVector.isNull(j)); - } else { - List list = (List) listVector.getObject(j); - assertEquals(Arrays.asList(values[j]), list); - } - } - } - - public static void assertMapVectorValues( - MapVector mapVector, int rowCount, Map[] values) { - assertEquals(rowCount, mapVector.getValueCount()); - - for (int j = 0; j < mapVector.getValueCount(); j++) { - if (values[j] == null) { - assertTrue(mapVector.isNull(j)); - } else { - JsonStringArrayList> actualSource = - (JsonStringArrayList>) mapVector.getObject(j); - Map actualMap = null; - if (actualSource != null && !actualSource.isEmpty()) { - actualMap = - actualSource.stream() - .map( - entry -> - new AbstractMap.SimpleEntry<>( - entry.get("key").toString(), - entry.get("value") != null ? entry.get("value").toString() : null)) - .collect( - HashMap::new, - (collector, val) -> collector.put(val.getKey(), val.getValue()), - HashMap::putAll); - } - assertEquals(values[j], actualMap); - } - } - } - - public static Map[] getMapValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - Map[] maps = new Map[dataArr.length]; - ObjectMapper objectMapper = ObjectMapperFactory.newObjectMapper(); - TypeReference> typeReference = new TypeReference>() {}; - for (int idx = 0; idx < dataArr.length; idx++) { - String jsonString = dataArr[idx].replace("|", ","); - if (!jsonString.isEmpty()) { - try { - maps[idx] = objectMapper.readValue(jsonString, typeReference); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } - } - return maps; - } - - public static void assertNullValues(BaseValueVector vector, int rowCount) { - assertEquals(rowCount, vector.getValueCount()); - - for (int j = 0; j < vector.getValueCount(); j++) { - assertTrue(vector.isNull(j)); - } - } - - public static void assertFieldMetadataIsEmpty(VectorSchemaRoot schema) { - assertNotNull(schema); - assertNotNull(schema.getSchema()); - assertNotNull(schema.getSchema().getFields()); - - for (Field field : schema.getSchema().getFields()) { - assertNotNull(field.getMetadata()); - assertEquals(0, field.getMetadata().size()); - } - } - - public static void assertFieldMetadataMatchesResultSetMetadata( - ResultSetMetaData rsmd, Schema schema) throws SQLException { - assertNotNull(schema); - assertNotNull(schema.getFields()); - assertNotNull(rsmd); - - List fields = schema.getFields(); - - assertEquals(rsmd.getColumnCount(), fields.size()); - - // Vector columns are created in the same order as ResultSet columns. - for (int i = 1; i <= rsmd.getColumnCount(); ++i) { - Map metadata = fields.get(i - 1).getMetadata(); - - assertNotNull(metadata); - assertEquals(5, metadata.size()); - - assertEquals(rsmd.getCatalogName(i), metadata.get(Constants.SQL_CATALOG_NAME_KEY)); - assertEquals(rsmd.getSchemaName(i), metadata.get(Constants.SQL_SCHEMA_NAME_KEY)); - assertEquals(rsmd.getTableName(i), metadata.get(Constants.SQL_TABLE_NAME_KEY)); - assertEquals(rsmd.getColumnLabel(i), metadata.get(Constants.SQL_COLUMN_NAME_KEY)); - assertEquals(rsmd.getColumnTypeName(i), metadata.get(Constants.SQL_TYPE_KEY)); - } - } - - public static Integer[] getIntValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - Integer[] valueArr = new Integer[dataArr.length]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : Integer.parseInt(data); - } - return valueArr; - } - - public static Boolean[] getBooleanValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - Boolean[] valueArr = new Boolean[dataArr.length]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().equals("1"); - } - return valueArr; - } - - public static BigDecimal[] getDecimalValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - BigDecimal[] valueArr = new BigDecimal[dataArr.length]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : new BigDecimal(data); - } - return valueArr; - } - - public static Double[] getDoubleValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - Double[] valueArr = new Double[dataArr.length]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : Double.parseDouble(data); - } - return valueArr; - } - - public static Float[] getFloatValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - Float[] valueArr = new Float[dataArr.length]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : Float.parseFloat(data); - } - return valueArr; - } - - public static Long[] getLongValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - Long[] valueArr = new Long[dataArr.length]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : Long.parseLong(data); - } - return valueArr; - } - - public static byte[][] getCharArray(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - byte[][] valueArr = new byte[dataArr.length][]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = - "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); - } - return valueArr; - } - - public static byte[][] getCharArrayWithCharSet( - String[] values, String dataType, Charset charSet) { - String[] dataArr = getValues(values, dataType); - byte[][] valueArr = new byte[dataArr.length][]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(charSet); - } - return valueArr; - } - - public static byte[][] getBinaryValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - byte[][] valueArr = new byte[dataArr.length][]; - int i = 0; - for (String data : dataArr) { - valueArr[i++] = - "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); - } - return valueArr; - } - - @SuppressWarnings("StringSplitter") - public static String[] getValues(String[] values, String dataType) { - String value = ""; - for (String val : values) { - if (val.startsWith(dataType)) { - value = val.split("=")[1]; - break; - } - } - return value.split(","); - } - - public static Integer[][] getListValues(String[] values, String dataType) { - String[] dataArr = getValues(values, dataType); - return getListValues(dataArr); - } - - @SuppressWarnings("StringSplitter") - public static Integer[][] getListValues(String[] dataArr) { - Integer[][] valueArr = new Integer[dataArr.length][]; - int i = 0; - for (String data : dataArr) { - if ("null".equals(data.trim())) { - valueArr[i++] = null; - } else if ("()".equals(data.trim())) { - valueArr[i++] = new Integer[0]; - } else { - String[] row = data.replace("(", "").replace(")", "").split(";"); - Integer[] arr = new Integer[row.length]; - for (int j = 0; j < arr.length; j++) { - arr[j] = "null".equals(row[j]) ? null : Integer.parseInt(row[j]); - } - valueArr[i++] = arr; - } - } - return valueArr; - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java deleted file mode 100644 index 8dfc684e22f24..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java +++ /dev/null @@ -1,535 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import java.io.InputStream; -import java.io.Reader; -import java.math.BigDecimal; -import java.net.URL; -import java.sql.Array; -import java.sql.Blob; -import java.sql.Clob; -import java.sql.Connection; -import java.sql.Date; -import java.sql.NClob; -import java.sql.ParameterMetaData; -import java.sql.PreparedStatement; -import java.sql.Ref; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.RowId; -import java.sql.SQLException; -import java.sql.SQLWarning; -import java.sql.SQLXML; -import java.sql.Time; -import java.sql.Timestamp; -import java.util.Calendar; -import java.util.HashMap; -import java.util.Map; - -/** A PreparedStatement that just stores parameters set on it. */ -public final class MockPreparedStatement implements PreparedStatement { - static class ParameterHolder { - final Object value; - final Integer sqlType; - public Calendar calendar; - - ParameterHolder(Object value, Integer sqlType) { - this.value = value; - this.sqlType = sqlType; - } - } - - private final Map parameters; - - MockPreparedStatement() { - parameters = new HashMap<>(); - } - - ParameterHolder getParam(int index) { - return parameters.get(index); - } - - Object getParamValue(int index) { - return parameters.get(index).value; - } - - Integer getParamType(int index) { - return parameters.get(index).sqlType; - } - - @Override - public ResultSet executeQuery() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int executeUpdate() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setNull(int parameterIndex, int sqlType) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(null, sqlType)); - } - - @Override - public void setBoolean(int parameterIndex, boolean x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setByte(int parameterIndex, byte x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setShort(int parameterIndex, short x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setInt(int parameterIndex, int x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setLong(int parameterIndex, long x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setFloat(int parameterIndex, float x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setDouble(int parameterIndex, double x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setBigDecimal(int parameterIndex, BigDecimal x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setString(int parameterIndex, String x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setBytes(int parameterIndex, byte[] x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setDate(int parameterIndex, Date x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setTime(int parameterIndex, Time x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setTimestamp(int parameterIndex, Timestamp x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setAsciiStream(int parameterIndex, InputStream x, int length) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - @Deprecated - public void setUnicodeStream(int parameterIndex, InputStream x, int length) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setBinaryStream(int parameterIndex, InputStream x, int length) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void clearParameters() throws SQLException { - parameters.clear(); - } - - @Override - public void setObject(int parameterIndex, Object x, int targetSqlType) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, targetSqlType)); - } - - @Override - public void setObject(int parameterIndex, Object x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public boolean execute() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void addBatch() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setCharacterStream(int parameterIndex, Reader reader, int length) - throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(reader, null)); - } - - @Override - public void setRef(int parameterIndex, Ref x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setBlob(int parameterIndex, Blob x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setClob(int parameterIndex, Clob x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setArray(int parameterIndex, Array x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public ResultSetMetaData getMetaData() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setDate(int parameterIndex, Date x, Calendar cal) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {} - - @Override - public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws SQLException { - ParameterHolder value = new ParameterHolder(x, null); - value.calendar = cal; - parameters.put(parameterIndex, value); - } - - @Override - public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {} - - @Override - public void setURL(int parameterIndex, URL x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public ParameterMetaData getParameterMetaData() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setRowId(int parameterIndex, RowId x) throws SQLException { - parameters.put(parameterIndex, new ParameterHolder(x, null)); - } - - @Override - public void setNString(int parameterIndex, String value) throws SQLException {} - - @Override - public void setNCharacterStream(int parameterIndex, Reader value, long length) - throws SQLException {} - - @Override - public void setNClob(int parameterIndex, NClob value) throws SQLException {} - - @Override - public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {} - - @Override - public void setBlob(int parameterIndex, InputStream inputStream, long length) - throws SQLException {} - - @Override - public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {} - - @Override - public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {} - - @Override - public void setObject(int parameterIndex, Object x, int targetSqlType, int scaleOrLength) - throws SQLException {} - - @Override - public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {} - - @Override - public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {} - - @Override - public void setCharacterStream(int parameterIndex, Reader reader, long length) - throws SQLException {} - - @Override - public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {} - - @Override - public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {} - - @Override - public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {} - - @Override - public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {} - - @Override - public void setClob(int parameterIndex, Reader reader) throws SQLException {} - - @Override - public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {} - - @Override - public void setNClob(int parameterIndex, Reader reader) throws SQLException {} - - @Override - public ResultSet executeQuery(String sql) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int executeUpdate(String sql) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void close() throws SQLException {} - - @Override - public int getMaxFieldSize() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setMaxFieldSize(int max) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int getMaxRows() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setMaxRows(int max) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setEscapeProcessing(boolean enable) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int getQueryTimeout() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setQueryTimeout(int seconds) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void cancel() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public SQLWarning getWarnings() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void clearWarnings() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setCursorName(String name) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean execute(String sql) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public ResultSet getResultSet() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int getUpdateCount() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean getMoreResults() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setFetchDirection(int direction) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int getFetchDirection() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setFetchSize(int rows) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int getFetchSize() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int getResultSetConcurrency() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int getResultSetType() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void addBatch(String sql) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void clearBatch() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int[] executeBatch() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public Connection getConnection() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean getMoreResults(int current) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public ResultSet getGeneratedKeys() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int executeUpdate(String sql, int autoGeneratedKeys) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int executeUpdate(String sql, int[] columnIndexes) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int executeUpdate(String sql, String[] columnNames) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean execute(String sql, int autoGeneratedKeys) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean execute(String sql, int[] columnIndexes) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean execute(String sql, String[] columnNames) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public int getResultSetHoldability() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean isClosed() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void setPoolable(boolean poolable) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean isPoolable() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public void closeOnCompletion() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean isCloseOnCompletion() throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public T unwrap(Class iface) throws SQLException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean isWrapperFor(Class iface) throws SQLException { - throw new UnsupportedOperationException(); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java deleted file mode 100644 index 5f5f6dcb98d43..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java +++ /dev/null @@ -1,1751 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import java.io.InputStream; -import java.io.Reader; -import java.math.BigDecimal; -import java.net.URL; -import java.sql.Array; -import java.sql.Blob; -import java.sql.Clob; -import java.sql.Date; -import java.sql.NClob; -import java.sql.Ref; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.RowId; -import java.sql.SQLException; -import java.sql.SQLType; -import java.sql.SQLWarning; -import java.sql.SQLXML; -import java.sql.Statement; -import java.sql.Time; -import java.sql.Timestamp; -import java.sql.Types; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.List; -import java.util.Map; - -public class ResultSetUtility { - - public static ResultSet generateEmptyResultSet() throws SQLException { - MockDataElement element = new MockDataElement("string_example"); - MockResultSetMetaData.MockColumnMetaData columnMetaData = - MockResultSetMetaData.MockColumnMetaData.fromDataElement(element, 1); - ArrayList cols = new ArrayList<>(); - cols.add(columnMetaData); - ResultSetMetaData metadata = new MockResultSetMetaData(cols); - return MockResultSet.builder().setMetaData(metadata).build(); - } - - public static MockResultSet generateBasicResultSet(int rows) throws SQLException { - MockResultSet.Builder builder = MockResultSet.builder(); - for (int i = 0; i < rows; i++) { - builder.addDataElement("row number: " + (i + 1)).addDataElement("data").finishRow(); - } - return builder.build(); - } - - public static class MockResultSet extends ThrowingResultSet { - private final List rows; - private int index = 0; - private boolean isClosed = false; - private ResultSetMetaData metadata; - private boolean wasNull; - - public MockResultSet(List rows) throws SQLException { - this(rows, MockResultSetMetaData.fromRows(rows)); - } - - public MockResultSet(List rows, ResultSetMetaData metadata) { - this.rows = rows; - this.metadata = metadata; - this.wasNull = false; - } - - public static Builder builder() { - return new Builder(); - } - - private void throwIfClosed() throws SQLException { - if (isClosed) { - throw new SQLException("ResultSet is already closed!"); - } - } - - private void setWasNull(MockDataElement element) { - wasNull = element.isNull(); - } - - @Override - public boolean next() throws SQLException { - throwIfClosed(); - index++; - return index <= rows.size(); - } - - @Override - public void close() throws SQLException { - throwIfClosed(); - isClosed = true; - } - - @Override - public boolean isBeforeFirst() throws SQLException { - throwIfClosed(); - return index == 0; - } - - @Override - public boolean isAfterLast() throws SQLException { - return index > rows.size(); - } - - @Override - public boolean isFirst() throws SQLException { - return index == 1; - } - - @Override - public boolean isLast() throws SQLException { - return index == rows.size(); - } - - @Override - public void beforeFirst() throws SQLException { - index = 0; - } - - @Override - public void afterLast() throws SQLException { - index = rows.size(); - } - - private MockRow getCurrentRow() throws SQLException { - throwIfClosed(); - if (index == 0) { - throw new SQLException("Index is before first element!"); - } - if (index <= rows.size()) { - return rows.get(index - 1); - } - throw new SQLException("Unable to fetch row at index: " + index); - } - - private MockDataElement getDataElementAtCol(int idx) throws SQLException { - MockRow row = getCurrentRow(); - MockDataElement element = row.getDataElementAtIndex(idx - 1); - setWasNull(element); - return element; - } - - @Override - public String getString(int idx) throws SQLException { - return getDataElementAtCol(idx).getString(); - } - - @Override - public boolean getBoolean(int idx) throws SQLException { - return getDataElementAtCol(idx).getBoolean(); - } - - @Override - public short getShort(int idx) throws SQLException { - return getDataElementAtCol(idx).getShort(); - } - - @Override - public int getInt(int idx) throws SQLException { - return getDataElementAtCol(idx).getInt(); - } - - @Override - public long getLong(int idx) throws SQLException { - return getDataElementAtCol(idx).getLong(); - } - - @Override - public float getFloat(int idx) throws SQLException { - return getDataElementAtCol(idx).getFloat(); - } - - @Override - public double getDouble(int idx) throws SQLException { - return getDataElementAtCol(idx).getDouble(); - } - - @Override - public BigDecimal getBigDecimal(int idx) throws SQLException { - return getDataElementAtCol(idx).getBigDecimal(); - } - - @Override - public Date getDate(int idx) throws SQLException { - return getDataElementAtCol(idx).getDate(); - } - - @Override - public Time getTime(int idx) throws SQLException { - return getDataElementAtCol(idx).getTime(); - } - - @Override - public Timestamp getTimestamp(int idx) throws SQLException { - return getDataElementAtCol(idx).getTimestamp(); - } - - @Override - public ResultSetMetaData getMetaData() throws SQLException { - return metadata; - } - - @Override - public boolean wasNull() throws SQLException { - return wasNull; - } - - public static class Builder { - private final ArrayList rows; - private ArrayList bufferedElements; - private ResultSetMetaData metadata; - - Builder() { - this.rows = new ArrayList<>(); - this.bufferedElements = new ArrayList<>(); - } - - public Builder finishRow() { - rows.add(new MockRow(this.bufferedElements)); - this.bufferedElements = new ArrayList<>(); - return this; - } - - public Builder addDataElement(MockDataElement element) { - this.bufferedElements.add(element); - return this; - } - - public Builder addDataElement(String str) { - return this.addDataElement(new MockDataElement(str)); - } - - public Builder addDataElement(Object val, int sqlType) { - return this.addDataElement(new MockDataElement(val, sqlType)); - } - - public Builder setMetaData(ResultSetMetaData metadata) { - this.metadata = metadata; - return this; - } - - public MockResultSet build() throws SQLException { - if (this.metadata == null) { - return new MockResultSet(this.rows); - } - return new MockResultSet(this.rows, this.metadata); - } - } - } - - public static class MockResultSetMetaData extends ThrowingResultSetMetaData { - private final List columns; - - public MockResultSetMetaData(List columns) { - this.columns = columns; - } - - @Override - public int getColumnCount() throws SQLException { - return columns.size(); - } - - @Override - public String getColumnLabel(int column) throws SQLException { - return columns.get(column - 1).getLabel(); - } - - @Override - public String getColumnName(int column) throws SQLException { - return columns.get(column - 1).getName(); - } - - @Override - public int getColumnType(int column) throws SQLException { - return columns.get(column - 1).getType(); - } - - @Override - public int getPrecision(int column) throws SQLException { - return columns.get(column - 1).getPrecision(); - } - - @Override - public int getScale(int column) throws SQLException { - return columns.get(column - 1).getScale(); - } - - @Override - public int isNullable(int column) throws SQLException { - return columns.get(column - 1).isNullable(); - } - - @Override - public int getColumnDisplaySize(int column) throws SQLException { - return columns.get(column - 1).getDisplaySize(); - } - - @Override - public String getColumnTypeName(int column) throws SQLException { - return columns.get(column - 1).getTypeName(); - } - - public static MockResultSetMetaData fromRows(List rows) throws SQLException { - // Note: This attempts to dynamically construct ResultSetMetaData from the first row in a - // given result set. - // If there are now rows, or the result set contains no columns, this cannot be dynamically - // generated and - // an exception will be thrown. - if (rows.size() == 0) { - throw new SQLException( - "Unable to dynamically generate ResultSetMetaData because row count is zero!"); - } - MockRow firstRow = rows.get(0); - if (firstRow.dataElements.size() == 0) { - throw new SQLException( - "Unable to dynamically generate ResultSetMetaData because column count is zero!"); - } - ArrayList columns = new ArrayList<>(); - for (int i = 0; i < firstRow.dataElements.size(); i++) { - MockDataElement element = firstRow.getDataElementAtIndex(i); - columns.add(MockColumnMetaData.fromDataElement(element, i)); - } - return new MockResultSetMetaData(columns); - } - - public static class MockColumnMetaData { - private int sqlType; - private int precision; - private int scale; - private int nullable; - private String label; - private String typeName; - private int displaySize; - - private MockColumnMetaData() {} - - private String getLabel() { - return label; - } - - private String getName() { - return getLabel(); - } - - private int getType() { - return sqlType; - } - - private int getPrecision() { - return precision; - } - - private int getScale() { - return scale; - } - - private int isNullable() { - return nullable; - } - - private String getTypeName() { - return typeName; - } - - private int getDisplaySize() { - return displaySize; - } - - public static MockColumnMetaData fromDataElement(MockDataElement element, int i) - throws SQLException { - return MockColumnMetaData.builder() - .sqlType(element.getSqlType()) - .precision(element.getPrecision()) - .scale(element.getScale()) - .nullable(element.isNullable()) - .setTypeName("TYPE") - .setDisplaySize(420) - .label("col_" + i) - .build(); - } - - public static Builder builder() { - return new Builder(); - } - - public static class Builder { - private MockColumnMetaData columnMetaData = new MockColumnMetaData(); - - public Builder label(String label) { - this.columnMetaData.label = label; - return this; - } - - public Builder sqlType(int sqlType) { - this.columnMetaData.sqlType = sqlType; - return this; - } - - public Builder precision(int precision) { - this.columnMetaData.precision = precision; - return this; - } - - public Builder scale(int scale) { - this.columnMetaData.scale = scale; - return this; - } - - public Builder nullable(int nullable) { - this.columnMetaData.nullable = nullable; - return this; - } - - public Builder setTypeName(String typeName) { - this.columnMetaData.typeName = typeName; - return this; - } - - public Builder setDisplaySize(int displaySize) { - this.columnMetaData.displaySize = displaySize; - return this; - } - - public MockColumnMetaData build() { - return this.columnMetaData; - } - } - } - } - - public static class MockRow { - private final List dataElements; - - public MockRow(List elements) { - this.dataElements = elements; - } - - public MockDataElement getDataElementAtIndex(int idx) throws SQLException { - if (idx > dataElements.size()) { - throw new SQLException("Unable to find data element at position: " + idx); - } - return dataElements.get(idx); - } - } - - public static class MockDataElement { - private final Object value; - private final int sqlType; - - public MockDataElement(String val) { - this(val, Types.VARCHAR); - } - - public MockDataElement(Object val, int sqlType) { - this.value = val; - this.sqlType = sqlType; - } - - private boolean isNull() { - return value == null; - } - - private String getValueAsString() { - return value.toString(); - } - - private int getPrecision() throws SQLException { - switch (this.sqlType) { - case Types.VARCHAR: - return getValueAsString().length(); - case Types.DECIMAL: - return getBigDecimal().precision(); - default: - throw getExceptionToThrow("Unable to determine precision for data type: " + sqlType); - } - } - - private int getScale() throws SQLException { - switch (this.sqlType) { - case Types.VARCHAR: - return 0; - case Types.DECIMAL: - return getBigDecimal().scale(); - default: - throw getExceptionToThrow("Unable to determine scale for data type!"); - } - } - - private int isNullable() throws SQLException { - switch (this.sqlType) { - case Types.VARCHAR: - case Types.DECIMAL: - return ResultSetMetaData.columnNullable; - default: - return ResultSetMetaData.columnNullableUnknown; - } - } - - private int getSqlType() throws SQLException { - return this.sqlType; - } - - public BigDecimal getBigDecimal() throws SQLException { - if (value == null) { - return null; - } - try { - return new BigDecimal(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public String getString() throws SQLException { - if (value == null) { - return null; - } - return getValueAsString(); - } - - public boolean getBoolean() throws SQLException { - if (value == null) { - return false; - } - try { - return (boolean) value; - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public int getInt() throws SQLException { - if (value == null) { - return 0; - } - try { - return Integer.parseInt(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public long getLong() throws SQLException { - if (value == null) { - return 0L; - } - try { - return Long.parseLong(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public double getDouble() throws SQLException { - if (value == null) { - return 0.0; - } - try { - return Double.parseDouble(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public Date getDate() throws SQLException { - if (value == null) { - return null; - } - try { - return Date.valueOf(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public Time getTime() throws SQLException { - if (value == null) { - return null; - } - try { - return Time.valueOf(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public Timestamp getTimestamp() throws SQLException { - if (value == null) { - return null; - } - try { - return Timestamp.valueOf(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public float getFloat() throws SQLException { - if (value == null) { - return 0.0f; - } - try { - return Float.parseFloat(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - - public short getShort() throws SQLException { - if (value == null) { - return 0; - } - try { - return Short.parseShort(getValueAsString()); - } catch (Exception ex) { - throw new SQLException(ex); - } - } - } - - public static class ThrowingResultSet implements ResultSet { - - @Override - public boolean next() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void close() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean wasNull() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getString(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean getBoolean(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public byte getByte(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public short getShort(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getInt(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public long getLong(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public float getFloat(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public double getDouble(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public BigDecimal getBigDecimal(int columnIndex, int scale) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public byte[] getBytes(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Date getDate(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Time getTime(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Timestamp getTimestamp(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public InputStream getAsciiStream(int columnIndex) throws SQLException { - return null; - } - - @Override - public InputStream getUnicodeStream(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public InputStream getBinaryStream(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getString(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean getBoolean(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public byte getByte(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public short getShort(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getInt(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public long getLong(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public float getFloat(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public double getDouble(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public BigDecimal getBigDecimal(String columnLabel, int scale) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public byte[] getBytes(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Date getDate(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Time getTime(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Timestamp getTimestamp(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public InputStream getAsciiStream(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public InputStream getUnicodeStream(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public InputStream getBinaryStream(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public SQLWarning getWarnings() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void clearWarnings() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getCursorName() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public ResultSetMetaData getMetaData() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Object getObject(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Object getObject(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int findColumn(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Reader getCharacterStream(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Reader getCharacterStream(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public BigDecimal getBigDecimal(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public BigDecimal getBigDecimal(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isBeforeFirst() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isAfterLast() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isFirst() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isLast() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void beforeFirst() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void afterLast() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean first() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean last() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getRow() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean absolute(int row) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean relative(int rows) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean previous() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getFetchDirection() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void setFetchDirection(int direction) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getFetchSize() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void setFetchSize(int rows) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getType() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getConcurrency() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean rowUpdated() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean rowInserted() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean rowDeleted() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNull(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBoolean(int columnIndex, boolean x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateByte(int columnIndex, byte x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateShort(int columnIndex, short x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateInt(int columnIndex, int x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateLong(int columnIndex, long x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateFloat(int columnIndex, float x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateDouble(int columnIndex, double x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBigDecimal(int columnIndex, BigDecimal x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateString(int columnIndex, String x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBytes(int columnIndex, byte[] x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateDate(int columnIndex, Date x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateTime(int columnIndex, Time x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateTimestamp(int columnIndex, Timestamp x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateAsciiStream(int columnIndex, InputStream x, int length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBinaryStream(int columnIndex, InputStream x, int length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateCharacterStream(int columnIndex, Reader x, int length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateObject(int columnIndex, Object x, int scaleOrLength) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateObject(int columnIndex, Object x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNull(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBoolean(String columnLabel, boolean x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateByte(String columnLabel, byte x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateShort(String columnLabel, short x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateInt(String columnLabel, int x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateLong(String columnLabel, long x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateFloat(String columnLabel, float x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateDouble(String columnLabel, double x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBigDecimal(String columnLabel, BigDecimal x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateString(String columnLabel, String x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBytes(String columnLabel, byte[] x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateDate(String columnLabel, Date x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateTime(String columnLabel, Time x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateTimestamp(String columnLabel, Timestamp x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateAsciiStream(String columnLabel, InputStream x, int length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBinaryStream(String columnLabel, InputStream x, int length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateCharacterStream(String columnLabel, Reader reader, int length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateObject(String columnLabel, Object x, int scaleOrLength) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateObject(String columnLabel, Object x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void insertRow() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateRow() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void deleteRow() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void refreshRow() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void cancelRowUpdates() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void moveToInsertRow() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void moveToCurrentRow() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Statement getStatement() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Object getObject(int columnIndex, Map> map) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Ref getRef(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Blob getBlob(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Clob getClob(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Array getArray(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Object getObject(String columnLabel, Map> map) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Ref getRef(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Blob getBlob(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Clob getClob(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Array getArray(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Date getDate(int columnIndex, Calendar cal) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Date getDate(String columnLabel, Calendar cal) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Time getTime(int columnIndex, Calendar cal) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Time getTime(String columnLabel, Calendar cal) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Timestamp getTimestamp(int columnIndex, Calendar cal) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Timestamp getTimestamp(String columnLabel, Calendar cal) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public URL getURL(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public URL getURL(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateRef(int columnIndex, Ref x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateRef(String columnLabel, Ref x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBlob(int columnIndex, Blob x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBlob(String columnLabel, Blob x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateClob(int columnIndex, Clob x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateClob(String columnLabel, Clob x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateArray(int columnIndex, Array x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateArray(String columnLabel, Array x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public RowId getRowId(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public RowId getRowId(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateRowId(int columnIndex, RowId x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateRowId(String columnLabel, RowId x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getHoldability() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isClosed() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNString(int columnIndex, String nString) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNString(String columnLabel, String nString) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNClob(int columnIndex, NClob nClob) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNClob(String columnLabel, NClob nClob) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public NClob getNClob(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public NClob getNClob(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public SQLXML getSQLXML(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public SQLXML getSQLXML(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateSQLXML(int columnIndex, SQLXML xmlObject) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateSQLXML(String columnLabel, SQLXML xmlObject) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getNString(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getNString(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Reader getNCharacterStream(int columnIndex) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public Reader getNCharacterStream(String columnLabel) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNCharacterStream(int columnIndex, Reader x, long length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNCharacterStream(String columnLabel, Reader reader, long length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateAsciiStream(int columnIndex, InputStream x, long length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBinaryStream(int columnIndex, InputStream x, long length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateCharacterStream(int columnIndex, Reader x, long length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateAsciiStream(String columnLabel, InputStream x, long length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBinaryStream(String columnLabel, InputStream x, long length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateCharacterStream(String columnLabel, Reader reader, long length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBlob(int columnIndex, InputStream inputStream, long length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBlob(String columnLabel, InputStream inputStream, long length) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateClob(int columnIndex, Reader reader, long length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateClob(String columnLabel, Reader reader, long length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNClob(int columnIndex, Reader reader, long length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNClob(String columnLabel, Reader reader, long length) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNCharacterStream(int columnIndex, Reader x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNCharacterStream(String columnLabel, Reader reader) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateAsciiStream(int columnIndex, InputStream x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBinaryStream(int columnIndex, InputStream x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateCharacterStream(int columnIndex, Reader x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateAsciiStream(String columnLabel, InputStream x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBinaryStream(String columnLabel, InputStream x) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateCharacterStream(String columnLabel, Reader reader) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBlob(int columnIndex, InputStream inputStream) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateBlob(String columnLabel, InputStream inputStream) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateClob(int columnIndex, Reader reader) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateClob(String columnLabel, Reader reader) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNClob(int columnIndex, Reader reader) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateNClob(String columnLabel, Reader reader) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public T getObject(int columnIndex, Class type) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public T getObject(String columnLabel, Class type) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateObject(int columnIndex, Object x, SQLType targetSqlType, int scaleOrLength) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateObject(String columnLabel, Object x, SQLType targetSqlType, int scaleOrLength) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateObject(int columnIndex, Object x, SQLType targetSqlType) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public void updateObject(String columnLabel, Object x, SQLType targetSqlType) - throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public T unwrap(Class iface) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isWrapperFor(Class iface) throws SQLException { - throw getExceptionToThrow(); - } - } - - private static SQLException getExceptionToThrow() { - return getExceptionToThrow("Method is not implemented!"); - } - - private static SQLException getExceptionToThrow(String message) { - return new SQLException(message); - } - - public static class ThrowingResultSetMetaData implements ResultSetMetaData { - @Override - public int getColumnCount() throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isAutoIncrement(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isCaseSensitive(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isSearchable(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isCurrency(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int isNullable(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isSigned(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getColumnDisplaySize(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getColumnLabel(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getColumnName(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getSchemaName(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getPrecision(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getScale(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getTableName(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getCatalogName(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public int getColumnType(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getColumnTypeName(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isReadOnly(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isWritable(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isDefinitelyWritable(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public String getColumnClassName(int column) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public T unwrap(Class iface) throws SQLException { - throw getExceptionToThrow(); - } - - @Override - public boolean isWrapperFor(Class iface) throws SQLException { - throw getExceptionToThrow(); - } - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java deleted file mode 100644 index c7dc9b2791ce0..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Types; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.junit.jupiter.api.Test; - -/** Tests of the ResultSetUtility. */ -public class ResultSetUtilityTest { - @Test - public void testZeroRowResultSet() throws Exception { - for (boolean reuseVectorSchemaRoot : new boolean[] {false, true}) { - try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { - ResultSet rs = ResultSetUtility.generateEmptyResultSet(); - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); - - ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config); - assertTrue(iter.hasNext(), "Iterator on zero row ResultSet should haveNext() before use"); - VectorSchemaRoot root = iter.next(); - assertNotNull(root, "VectorSchemaRoot from first next() result should never be null"); - assertEquals( - 0, root.getRowCount(), "VectorSchemaRoot from empty ResultSet should have zero rows"); - assertFalse( - iter.hasNext(), - "hasNext() should return false on empty ResultSets after initial next() call"); - } - } - } - - @Test - public void testBasicResultSet() throws Exception { - try (ResultSetUtility.MockResultSet resultSet = ResultSetUtility.generateBasicResultSet(3)) { - // Before row 1: - assertTrue(resultSet.isBeforeFirst()); - assertFalse(resultSet.isFirst()); - assertFalse(resultSet.isLast()); - assertFalse(resultSet.isAfterLast()); - assertThrows(SQLException.class, () -> resultSet.getString(1)); - - // Row 1: - assertTrue(resultSet.next()); - assertFalse(resultSet.isBeforeFirst()); - assertTrue(resultSet.isFirst()); - assertFalse(resultSet.isLast()); - assertFalse(resultSet.isAfterLast()); - assertEquals("row number: 1", resultSet.getString(1)); - - // Row 2: - assertTrue(resultSet.next()); - assertFalse(resultSet.isBeforeFirst()); - assertFalse(resultSet.isFirst()); - assertFalse(resultSet.isLast()); - assertFalse(resultSet.isAfterLast()); - assertEquals("row number: 2", resultSet.getString(1)); - - // Row 3: - assertTrue(resultSet.next()); - assertFalse(resultSet.isBeforeFirst()); - assertFalse(resultSet.isFirst()); - assertTrue(resultSet.isLast()); - assertFalse(resultSet.isAfterLast()); - assertEquals("row number: 3", resultSet.getString(1)); - - // After row 3: - assertFalse(resultSet.next()); - assertFalse(resultSet.isBeforeFirst()); - assertFalse(resultSet.isFirst()); - assertFalse(resultSet.isLast()); - assertTrue(resultSet.isAfterLast()); - } - } - - @Test - public void testMockDataTypes() throws SQLException { - ResultSetUtility.MockDataElement element = - new ResultSetUtility.MockDataElement(1L, Types.NUMERIC); - assertEquals(1L, element.getLong()); - assertEquals(1, element.getInt()); - assertEquals("1", element.getString()); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java deleted file mode 100644 index 7fa8188a99158..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; - -/** POJO to handle the YAML data from the test YAML file. */ -@JsonIgnoreProperties(ignoreUnknown = true) -public class Table { - private String name; - private String type; - private String vector; - private String timezone; - private String create; - private String[] data; - private String query; - private String drop; - private String[] values; - private String[] vectors; - private int rowCount; - - public Table() {} - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - - public String getVector() { - return vector; - } - - public void setVector(String vector) { - this.vector = vector; - } - - public String[] getValues() { - return values; - } - - public void setValues(String[] values) { - this.values = values; - } - - public Long[] getLongValues() { - Long[] arr = new Long[values.length]; - int i = 0; - for (String str : values) { - arr[i++] = Long.parseLong(str); - } - return arr; - } - - public Integer[] getIntValues() { - Integer[] arr = new Integer[values.length]; - int i = 0; - for (String str : values) { - arr[i++] = Integer.parseInt(str); - } - return arr; - } - - public Boolean[] getBoolValues() { - Boolean[] arr = new Boolean[values.length]; - int i = 0; - for (String str : values) { - arr[i++] = Boolean.parseBoolean(str); - } - return arr; - } - - public BigDecimal[] getBigDecimalValues() { - BigDecimal[] arr = new BigDecimal[values.length]; - int i = 0; - for (String str : values) { - arr[i++] = new BigDecimal(str); - } - return arr; - } - - public Double[] getDoubleValues() { - Double[] arr = new Double[values.length]; - int i = 0; - for (String str : values) { - arr[i++] = Double.parseDouble(str); - } - return arr; - } - - public Float[] getFloatValues() { - Float[] arr = new Float[values.length]; - int i = 0; - for (String str : values) { - arr[i++] = Float.parseFloat(str); - } - return arr; - } - - public byte[][] getBinaryValues() { - return getByteArray(values); - } - - public byte[][] getVarCharValues() { - return getByteArray(values); - } - - public byte[][] getBlobValues() { - return getByteArray(values); - } - - public byte[][] getClobValues() { - return getByteArray(values); - } - - public byte[][] getCharValues() { - return getByteArray(values); - } - - public Integer[][] getListValues() { - return JdbcToArrowTestHelper.getListValues(values); - } - - public String getCreate() { - return create; - } - - public void setCreate(String create) { - this.create = create; - } - - public String[] getData() { - return data; - } - - public void setData(String[] data) { - this.data = data; - } - - public String getQuery() { - return query; - } - - public void setQuery(String query) { - this.query = query; - } - - public String getDrop() { - return drop; - } - - public void setDrop(String drop) { - this.drop = drop; - } - - public String getTimezone() { - return timezone; - } - - public void setTimezone(String timezone) { - this.timezone = timezone; - } - - public String[] getVectors() { - return vectors; - } - - public void setVectors(String[] vectors) { - this.vectors = vectors; - } - - public int getRowCount() { - return rowCount; - } - - public void setRowCount(int rowCount) { - this.rowCount = rowCount; - } - - @Override - public String toString() { - return "Table{name='" + name + "', type='" + type + "'}"; - } - - static byte[][] getByteArray(String[] data) { - byte[][] byteArr = new byte[data.length][]; - - for (int i = 0; i < data.length; i++) { - byteArr[i] = data[i].getBytes(StandardCharsets.UTF_8); - } - return byteArr; - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java deleted file mode 100644 index 4993a8b1aeec5..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Types; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Stream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test options for dealing with unreliable ResultSetMetaData from JDBC drivers. */ -public class UnreliableMetaDataTest { - private BufferAllocator allocator; - - @BeforeEach - public void beforeEach() { - allocator = new RootAllocator(); - } - - @AfterEach - public void afterEach() { - allocator.close(); - } - - public static Stream getTestData() { - return Arrays.stream(new Object[][] {{false}, {true}}).map(Arguments::of); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testUnreliableMetaDataPrecisionAndScale(boolean reuseVectorSchemaRoot) - throws Exception { - ResultSet rs = buildIncorrectPrecisionAndScaleMetaDataResultSet(); - ResultSetMetaData rsmd = rs.getMetaData(); - assertEquals(Types.DECIMAL, rsmd.getColumnType(1), "Column type should be Types.DECIMAL"); - assertEquals(0, rsmd.getScale(1), "Column scale should be zero"); - assertEquals(0, rsmd.getPrecision(1), "Column precision should be zero"); - rs.next(); - BigDecimal bd1 = rs.getBigDecimal(1); - assertEquals(new BigDecimal("1000000000000000.01"), bd1, "Value should be 1000000000000000.01"); - assertEquals(2, bd1.scale(), "Value scale should be 2"); - assertEquals(18, bd1.precision(), "Value precision should be 18"); - assertFalse(rs.next(), "No more rows!"); - - // reset the ResultSet: - rs.beforeFirst(); - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); - try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { - assertTrue(iter.hasNext()); - assertThrows( - RuntimeException.class, iter::next, "Expected to fail due to mismatched metadata!"); - } - - // reset the ResultSet: - rs.beforeFirst(); - JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 18, 2); - Map explicitMapping = new HashMap<>(); - explicitMapping.put(1, explicitMappingField); - config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .build(); - - try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { - while (iter.hasNext()) { - VectorSchemaRoot root = iter.next(); - root.close(); - } - } - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testInconsistentPrecisionAndScale(boolean reuseVectorSchemaRoot) throws Exception { - ResultSet rs = buildVaryingPrecisionAndScaleResultSet(); - ResultSetMetaData rsmd = rs.getMetaData(); - assertEquals(Types.DECIMAL, rsmd.getColumnType(1), "Column type should be Types.DECIMAL"); - assertEquals(0, rsmd.getScale(1), "Column scale should be zero"); - assertEquals(0, rsmd.getPrecision(1), "Column precision should be zero"); - rs.next(); - BigDecimal bd1 = rs.getBigDecimal(1); - assertEquals(new BigDecimal("1000000000000000.01"), bd1, "Value should be 1000000000000000.01"); - assertEquals(2, bd1.scale(), "Value scale should be 2"); - assertEquals(18, bd1.precision(), "Value precision should be 18"); - rs.next(); - BigDecimal bd2 = rs.getBigDecimal(1); - assertEquals( - new BigDecimal("1000000000300.0000001"), bd2, "Value should be 1000000000300.0000001"); - assertEquals(7, bd2.scale(), "Value scale should be 7"); - assertEquals(20, bd2.precision(), "Value precision should be 20"); - rs.beforeFirst(); - JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 20, 7); - Map explicitMapping = new HashMap<>(); - explicitMapping.put(1, explicitMappingField); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .build(); - try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { - assertTrue(iter.hasNext()); - assertThrows( - RuntimeException.class, - iter::next, - "This is expected to fail due to inconsistent BigDecimal scales, while strict matching is enabled."); - } - // Reuse same ResultSet, with RoundingMode.UNNECESSARY set to coerce BigDecimal scale as needed: - config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY) - .build(); - try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { - while (iter.hasNext()) { - VectorSchemaRoot root = iter.next(); - root.close(); - } - } - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testIncorrectNullability(boolean reuseVectorSchemaRoot) throws Exception { - // ARROW-17005: ResultSetMetaData may indicate a field is non-nullable even when there are nulls - ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = - ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .sqlType(Types.INTEGER) - .nullable(ResultSetMetaData.columnNoNulls) - .build(); - ResultSetMetaData metadata = - new ResultSetUtility.MockResultSetMetaData(Collections.singletonList(columnMetaData)); - final ResultSetUtility.MockResultSet.Builder resultSetBuilder = - ResultSetUtility.MockResultSet.builder() - .setMetaData(metadata) - .addDataElement(new ResultSetUtility.MockDataElement(1024, Types.INTEGER)) - .finishRow() - .addDataElement(new ResultSetUtility.MockDataElement(null, Types.INTEGER)) - .finishRow(); - final Schema notNullSchema = - new Schema( - Collections.singletonList( - Field.notNullable(/*name=*/ null, new ArrowType.Int(32, true)))); - final Schema nullSchema = - new Schema( - Collections.singletonList(Field.nullable(/*name=*/ null, new ArrowType.Int(32, true)))); - - try (final ResultSet rs = resultSetBuilder.build()) { - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); - try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { - assertTrue(iter.hasNext()); - final VectorSchemaRoot root = iter.next(); - // The wrong data is returned here - assertEquals(notNullSchema, root.getSchema()); - assertEquals(2, root.getRowCount()); - final IntVector ints = (IntVector) root.getVector(0); - assertEquals(1024, ints.get(0)); - assertFalse(ints.isNull(1)); - assertFalse(iter.hasNext()); - root.close(); - } - - rs.beforeFirst(); - - // Override the nullability to get the correct result - final Map typeMapping = new HashMap<>(); - JdbcFieldInfo realFieldInfo = - new JdbcFieldInfo( - Types.INTEGER, ResultSetMetaData.columnNullable, /*precision*/ 0, /*scale*/ 0); - typeMapping.put(1, realFieldInfo); - config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(typeMapping) - .build(); - try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { - assertTrue(iter.hasNext()); - final VectorSchemaRoot root = iter.next(); - assertEquals(nullSchema, root.getSchema()); - assertEquals(2, root.getRowCount()); - final IntVector ints = (IntVector) root.getVector(0); - assertEquals(1024, ints.get(0)); - assertTrue(ints.isNull(1)); - assertFalse(iter.hasNext()); - root.close(); - } - - rs.beforeFirst(); - - // columnNullableUnknown won't override the metadata - realFieldInfo = - new JdbcFieldInfo( - Types.INTEGER, ResultSetMetaData.columnNullableUnknown, /*precision*/ 0, /*scale*/ 0); - typeMapping.put(1, realFieldInfo); - config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(typeMapping) - .build(); - try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { - assertTrue(iter.hasNext()); - final VectorSchemaRoot root = iter.next(); - assertEquals(notNullSchema, root.getSchema()); - assertEquals(2, root.getRowCount()); - final IntVector ints = (IntVector) root.getVector(0); - assertEquals(1024, ints.get(0)); - assertFalse(ints.isNull(1)); - assertFalse(iter.hasNext()); - root.close(); - } - } - } - - private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLException { - ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = - ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .sqlType(Types.DECIMAL) - .precision(0) - .scale(0) - .build(); - ArrayList cols = new ArrayList<>(); - cols.add(columnMetaData); - ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(cols); - return ResultSetUtility.MockResultSet.builder() - .setMetaData(metadata) - .addDataElement( - new ResultSetUtility.MockDataElement( - new BigDecimal("1000000000000000.01"), Types.DECIMAL)) - .finishRow() - .build(); - } - - private ResultSet buildVaryingPrecisionAndScaleResultSet() throws SQLException { - ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = - ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .sqlType(Types.DECIMAL) - .precision(0) - .scale(0) - .build(); - ArrayList cols = new ArrayList<>(); - cols.add(columnMetaData); - ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(cols); - return ResultSetUtility.MockResultSet.builder() - .setMetaData(metadata) - .addDataElement( - new ResultSetUtility.MockDataElement( - new BigDecimal("1000000000000000.01"), Types.DECIMAL)) - .finishRow() - .addDataElement( - new ResultSetUtility.MockDataElement( - new BigDecimal("1000000000300.0000001"), Types.DECIMAL)) - .finishRow() - .build(); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java deleted file mode 100644 index 157bf5bdec24b..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; - -public abstract class AbstractConsumerTest { - - protected BufferAllocator allocator; - - @BeforeEach - public void setUp() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java deleted file mode 100644 index b1e253794d0d4..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.consumer; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.junit.jupiter.api.Test; - -public class BinaryConsumerTest extends AbstractConsumerTest { - - private static final int INITIAL_VALUE_ALLOCATION = BaseValueVector.INITIAL_VALUE_ALLOCATION; - private static final int DEFAULT_RECORD_BYTE_COUNT = 8; - - interface InputStreamConsumer { - void consume(BinaryConsumer consumer) throws IOException; - } - - protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) - throws IOException { - try (final VarBinaryVector vector = new VarBinaryVector("binary", allocator)) { - BinaryConsumer consumer = BinaryConsumer.createConsumer(vector, 0, nullable); - dataConsumer.consume(consumer); - assertEquals(expect.length - 1, vector.getLastSet()); - for (int i = 0; i < expect.length; i++) { - byte[] value = expect[i]; - if (value == null) { - assertTrue(vector.isNull(i)); - } else { - assertArrayEquals(expect[i], vector.get(i)); - } - } - } - } - - private byte[] createBytes(int length) { - byte[] bytes = new byte[length]; - for (int i = 0; i < length; i++) { - bytes[i] = (byte) (i % 1024); - } - return bytes; - } - - public void testConsumeInputStream(byte[][] values, boolean nullable) throws IOException { - assertConsume( - nullable, - binaryConsumer -> { - for (byte[] value : values) { - binaryConsumer.consume(new ByteArrayInputStream(value)); - binaryConsumer.moveWriterPosition(); - } - }, - values); - } - - @Test - public void testConsumeInputStream() throws IOException { - testConsumeInputStream(new byte[][] {createBytes(DEFAULT_RECORD_BYTE_COUNT)}, false); - - testConsumeInputStream( - new byte[][] { - createBytes(DEFAULT_RECORD_BYTE_COUNT), createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, - false); - - testConsumeInputStream( - new byte[][] { - createBytes(DEFAULT_RECORD_BYTE_COUNT * 2), - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, - false); - - testConsumeInputStream( - new byte[][] {createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT)}, false); - - testConsumeInputStream( - new byte[][] { - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10), - }, - false); - - testConsumeInputStream( - new byte[][] { - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, - false); - - testConsumeInputStream( - new byte[][] { - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, - false); - - byte[][] testRecords = new byte[INITIAL_VALUE_ALLOCATION * 2][]; - for (int i = 0; i < testRecords.length; i++) { - testRecords[i] = createBytes(DEFAULT_RECORD_BYTE_COUNT); - } - testConsumeInputStream(testRecords, false); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java deleted file mode 100644 index e6ca3ad34c7d9..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.List; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class JdbcAliasToArrowTest { - private Connection conn = null; - - private static final String CREATE_STATEMENT = "CREATE TABLE example_table (id INTEGER);"; - private static final String INSERT_STATEMENT = "INSERT INTO example_table (id) VALUES (?);"; - private static final String QUERY = "SELECT id as a, id as b FROM example_table;"; - private static final String DROP_STATEMENT = "DROP TABLE example_table;"; - private static final String ORIGINAL_COLUMN_NAME = "ID"; - private static final String COLUMN_A = "A"; - private static final String COLUMN_B = "B"; - - @BeforeEach - public void setUp() throws Exception { - String url = "jdbc:h2:mem:JdbcAliasToArrowTest"; - String driver = "org.h2.Driver"; - Class.forName(driver); - conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(CREATE_STATEMENT); - } - } - - /** - * Test h2 database query with alias for column name and column label. To verify reading field - * alias from an H2 database works as expected. If this test fails, something is either wrong with - * the setup, or the H2 SQL behavior changed. - */ - @Test - public void testReadH2Alias() throws Exception { - // insert rows - int rowCount = 4; - insertRows(rowCount); - - try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { - ResultSetMetaData rsmd = resultSet.getMetaData(); - assertEquals(2, rsmd.getColumnCount()); - - // check column name and column label - assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(1)); - assertEquals(COLUMN_A, rsmd.getColumnLabel(1)); - assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(2)); - assertEquals(COLUMN_B, rsmd.getColumnLabel(2)); - - int rowNum = 0; - - while (resultSet.next()) { - assertEquals(rowNum, resultSet.getInt(COLUMN_A)); - assertEquals(rowNum, resultSet.getInt(COLUMN_B)); - ++rowNum; - } - - assertEquals(rowCount, rowNum); - } - } - - /** - * Test jdbc query results with alias to arrow works expected. Arrow result schema name should be - * field alias name. - */ - @Test - public void testJdbcAliasToArrow() throws Exception { - int rowCount = 4; - insertRows(rowCount); - - try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { - final VectorSchemaRoot vector = sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE)); - - assertEquals(rowCount, vector.getRowCount()); - Schema vectorSchema = vector.getSchema(); - List vectorFields = vectorSchema.getFields(); - assertEquals(COLUMN_A, vectorFields.get(0).getName()); - assertEquals(COLUMN_B, vectorFields.get(1).getName()); - } - } - - @AfterEach - public void tearDown() throws SQLException { - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(DROP_STATEMENT); - } finally { - if (conn != null) { - conn.close(); - conn = null; - } - } - } - - private void insertRows(int numRows) throws SQLException { - // Insert [numRows] Rows - try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { - for (int i = 0; i < numRows; ++i) { - stmt.setInt(1, i); - stmt.executeUpdate(); - } - } - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java deleted file mode 100644 index 23a91fe0e567a..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java +++ /dev/null @@ -1,384 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; - -import java.nio.charset.StandardCharsets; -import java.sql.Array; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; -import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class JdbcToArrowArrayTest { - private Connection conn = null; - - private static final String CREATE_STATEMENT = - "CREATE TABLE array_table (id INTEGER, int_array INTEGER ARRAY, float_array REAL ARRAY, " - + "string_array VARCHAR ARRAY);"; - private static final String INSERT_STATEMENT = - "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);"; - private static final String QUERY = - "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; - private static final String DROP_STATEMENT = "DROP TABLE array_table;"; - - private static Map arrayFieldMapping; - - private static final String INT_ARRAY_FIELD_NAME = "INT_ARRAY"; - private static final String FLOAT_ARRAY_FIELD_NAME = "FLOAT_ARRAY"; - private static final String STRING_ARRAY_FIELD_NAME = "STRING_ARRAY"; - - @BeforeEach - public void setUp() throws Exception { - String url = "jdbc:h2:mem:JdbcToArrowTest"; - String driver = "org.h2.Driver"; - Class.forName(driver); - conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(CREATE_STATEMENT); - } - - arrayFieldMapping = new HashMap(); - arrayFieldMapping.put(INT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.INTEGER)); - arrayFieldMapping.put(FLOAT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.REAL)); - arrayFieldMapping.put(STRING_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.VARCHAR)); - } - - // This test verifies reading an array field from an H2 database - // works as expected. If this test fails, something is either wrong - // with the setup, or the H2 SQL behavior changed. - @Test - public void testReadH2Array() throws Exception { - int rowCount = 4; - - Integer[][] intArrays = generateIntegerArrayField(rowCount); - Float[][] floatArrays = generateFloatArrayField(rowCount); - String[][] strArrays = generateStringArrayField(rowCount); - - insertRows(rowCount, intArrays, floatArrays, strArrays); - - try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { - ResultSetMetaData rsmd = resultSet.getMetaData(); - assertEquals(3, rsmd.getColumnCount()); - - for (int i = 1; i <= rsmd.getColumnCount(); ++i) { - assertEquals(Types.ARRAY, rsmd.getColumnType(i)); - } - - int rowNum = 0; - - while (resultSet.next()) { - Array intArray = resultSet.getArray(INT_ARRAY_FIELD_NAME); - assertFalse(resultSet.wasNull()); - try (ResultSet rs = intArray.getResultSet()) { - int arrayIndex = 0; - while (rs.next()) { - assertEquals(intArrays[rowNum][arrayIndex].intValue(), rs.getInt(2)); - ++arrayIndex; - } - assertEquals(intArrays[rowNum].length, arrayIndex); - } - - Array floatArray = resultSet.getArray(FLOAT_ARRAY_FIELD_NAME); - assertFalse(resultSet.wasNull()); - try (ResultSet rs = floatArray.getResultSet()) { - int arrayIndex = 0; - while (rs.next()) { - assertEquals(floatArrays[rowNum][arrayIndex].floatValue(), rs.getFloat(2), 0.001); - ++arrayIndex; - } - assertEquals(floatArrays[rowNum].length, arrayIndex); - } - - Array strArray = resultSet.getArray(STRING_ARRAY_FIELD_NAME); - assertFalse(resultSet.wasNull()); - try (ResultSet rs = strArray.getResultSet()) { - int arrayIndex = 0; - while (rs.next()) { - assertEquals(strArrays[rowNum][arrayIndex], rs.getString(2)); - ++arrayIndex; - } - assertEquals(strArrays[rowNum].length, arrayIndex); - } - - ++rowNum; - } - - assertEquals(rowCount, rowNum); - } - } - - @Test - public void testJdbcToArrow() throws Exception { - int rowCount = 4; - - Integer[][] intArrays = generateIntegerArrayField(rowCount); - Float[][] floatArrays = generateFloatArrayField(rowCount); - String[][] strArrays = generateStringArrayField(rowCount); - - insertRows(rowCount, intArrays, floatArrays, strArrays); - - final JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); - builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); - - final JdbcToArrowConfig config = builder.build(); - - try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { - final VectorSchemaRoot vector = sqlToArrow(resultSet, config); - - assertEquals(rowCount, vector.getRowCount()); - - assertIntegerVectorEquals( - (ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); - assertFloatVectorEquals( - (ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); - assertStringVectorEquals( - (ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); - } - } - - @Test - public void testJdbcToArrowWithNulls() throws Exception { - int rowCount = 4; - - Integer[][] intArrays = { - null, {0}, {1}, {}, - }; - - Float[][] floatArrays = { - {2.0f}, null, {3.0f}, {}, - }; - - String[][] stringArrays = { - {"4"}, null, {"5"}, {}, - }; - - insertRows(rowCount, intArrays, floatArrays, stringArrays); - - final JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); - builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); - - final JdbcToArrowConfig config = builder.build(); - - try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { - final VectorSchemaRoot vector = sqlToArrow(resultSet, config); - - assertEquals(rowCount, vector.getRowCount()); - - assertIntegerVectorEquals( - (ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); - assertFloatVectorEquals( - (ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); - assertStringVectorEquals( - (ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); - } - } - - private void assertIntegerVectorEquals( - ListVector listVector, int rowCount, Integer[][] expectedValues) { - IntVector vector = (IntVector) listVector.getDataVector(); - ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - int prevOffset = 0; - for (int row = 0; row < rowCount; ++row) { - int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); - - if (expectedValues[row] == null) { - assertEquals(0, listVector.isSet(row)); - assertEquals(0, offset - prevOffset); - continue; - } - - assertEquals(1, listVector.isSet(row)); - assertEquals(expectedValues[row].length, offset - prevOffset); - - for (int i = prevOffset; i < offset; ++i) { - assertEquals(expectedValues[row][i - prevOffset].intValue(), vector.get(i)); - } - - prevOffset = offset; - } - } - - private void assertFloatVectorEquals( - ListVector listVector, int rowCount, Float[][] expectedValues) { - Float4Vector vector = (Float4Vector) listVector.getDataVector(); - ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - int prevOffset = 0; - for (int row = 0; row < rowCount; ++row) { - int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); - - if (expectedValues[row] == null) { - assertEquals(0, listVector.isSet(row)); - assertEquals(0, offset - prevOffset); - continue; - } - - assertEquals(1, listVector.isSet(row)); - assertEquals(expectedValues[row].length, offset - prevOffset); - - for (int i = prevOffset; i < offset; ++i) { - assertEquals(expectedValues[row][i - prevOffset].floatValue(), vector.get(i), 0); - } - - prevOffset = offset; - } - } - - private void assertStringVectorEquals( - ListVector listVector, int rowCount, String[][] expectedValues) { - VarCharVector vector = (VarCharVector) listVector.getDataVector(); - ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - int prevOffset = 0; - for (int row = 0; row < rowCount; ++row) { - int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); - - if (expectedValues[row] == null) { - assertEquals(0, listVector.isSet(row)); - assertEquals(0, offset - prevOffset); - continue; - } - - assertEquals(1, listVector.isSet(row)); - assertEquals(expectedValues[row].length, offset - prevOffset); - for (int i = prevOffset; i < offset; ++i) { - assertArrayEquals( - expectedValues[row][i - prevOffset].getBytes(StandardCharsets.UTF_8), vector.get(i)); - } - - prevOffset = offset; - } - } - - @AfterEach - public void tearDown() throws SQLException { - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(DROP_STATEMENT); - } finally { - if (conn != null) { - conn.close(); - conn = null; - } - } - } - - private Integer[][] generateIntegerArrayField(int numRows) { - Integer[][] result = new Integer[numRows][]; - - for (int i = 0; i < numRows; ++i) { - int val = i * 4; - result[i] = new Integer[] {val, val + 1, val + 2, val + 3}; - } - - return result; - } - - private Float[][] generateFloatArrayField(int numRows) { - Float[][] result = new Float[numRows][]; - - for (int i = 0; i < numRows; ++i) { - int val = i * 4; - result[i] = new Float[] {(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; - } - - return result; - } - - private String[][] generateStringArrayField(int numRows) { - String[][] result = new String[numRows][]; - - for (int i = 0; i < numRows; ++i) { - int val = i * 4; - result[i] = - new String[] { - String.valueOf(val), - String.valueOf(val + 1), - String.valueOf(val + 2), - String.valueOf(val + 3) - }; - } - - return result; - } - - private void insertRows( - int numRows, Integer[][] integerArrays, Float[][] floatArrays, String[][] strArrays) - throws SQLException { - - // Insert 4 Rows - try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { - - for (int i = 0; i < numRows; ++i) { - Integer[] integerArray = integerArrays[i]; - Float[] floatArray = floatArrays[i]; - String[] strArray = strArrays[i]; - - Array intArray = integerArray != null ? conn.createArrayOf("INT", integerArray) : null; - Array realArray = floatArray != null ? conn.createArrayOf("REAL", floatArray) : null; - Array varcharArray = strArray != null ? conn.createArrayOf("VARCHAR", strArray) : null; - - // Insert Arrays of 4 Values in Each Row - stmt.setInt(1, i); - stmt.setArray(2, intArray); - stmt.setArray(3, realArray); - stmt.setArray(4, varcharArray); - - stmt.executeUpdate(); - - if (intArray != null) { - intArray.free(); - } - if (realArray != null) { - realArray.free(); - } - if (varcharArray != null) { - varcharArray.free(); - } - } - } - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java deleted file mode 100644 index 39c0085603f17..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArrayWithCharSet; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.sql.DriverManager; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Arrays; -import java.util.Calendar; -import java.util.stream.Stream; -import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; -import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; -import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; -import org.apache.arrow.adapter.jdbc.Table; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with - * UTF-8 Charset, including the multi-byte CJK characters for H2 database. - */ -public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest { - - private static final String[] testFiles = { - "h2/test1_charset_h2.yml", - "h2/test1_charset_ch_h2.yml", - "h2/test1_charset_jp_h2.yml", - "h2/test1_charset_kr_h2.yml" - }; - - @Override - public void initializeDatabase(Table table) throws SQLException, ClassNotFoundException { - this.table = table; - - String url = "jdbc:h2:mem:JdbcToArrowTest?characterEncoding=UTF-8"; - String driver = "org.h2.Driver"; - Class.forName(driver); - conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement(); ) { - stmt.executeUpdate(table.getCreate()); - for (String insert : table.getData()) { - stmt.executeUpdate(insert); - } - } - } - - /** - * Get the test data as a collection of Table objects for each test iteration. - * - * @return Collection of Table objects - * @throws SQLException on error - * @throws ClassNotFoundException on error - * @throws IOException on error - */ - public static Stream getTestData() - throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowCharSetTest.class)) - .map(Arguments::of); - } - - /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 - * Charset, including the multi-byte CJK characters. - */ - @ParameterizedTest - @MethodSource("getTestData") - @Override - public void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - testDataSets( - sqlToArrow( - conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), - false); - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), - false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), - false); - testDataSets( - sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), - false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .build()), - false); - testDataSets( - sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .build()), - false); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testJdbcSchemaMetadata(Table table) throws SQLException, ClassNotFoundException { - this.initializeDatabase(table); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); - ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); - Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); - JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); - } - - /** - * This method calls the assert methods for various DataSets. - * - * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - @Override - public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(CLOB), - table.getRowCount(), - getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8)); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(VARCHAR), - table.getRowCount(), - getCharArrayWithCharSet(table.getValues(), VARCHAR, StandardCharsets.UTF_8)); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(CHAR), - table.getRowCount(), - getCharArrayWithCharSet(table.getValues(), CHAR, StandardCharsets.UTF_8)); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java deleted file mode 100644 index 2274f51745973..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBitVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBooleanVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDecimalVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat4VectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat8VectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertIntVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertListVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertSmallIntVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.IOException; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Arrays; -import java.util.Calendar; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; -import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; -import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; -import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; -import org.apache.arrow.adapter.jdbc.Table; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.extension.OpaqueType; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with - * various data types for H2 database using multiple test data files. - */ -public class JdbcToArrowDataTypesTest extends AbstractJdbcToArrowTest { - - private static final String BIGINT = "big_int"; - private static final String BINARY = "binary"; - private static final String BIT = "bit"; - private static final String BLOB = "blob"; - private static final String BOOL = "bool"; - private static final String CHAR = "char"; - private static final String CLOB = "clob"; - private static final String DATE = "date"; - private static final String DECIMAL = "decimal"; - private static final String DOUBLE = "double"; - private static final String INT = "int"; - private static final String LIST = "list"; - private static final String REAL = "real"; - private static final String SMALLINT = "small_int"; - private static final String TIME = "time"; - private static final String TIMESTAMP = "timestamp"; - private static final String TINYINT = "tiny_int"; - private static final String VARCHAR = "varchar"; - private static final String NULL = "null"; - - private static final String[] testFiles = { - "h2/test1_bigint_h2.yml", - "h2/test1_binary_h2.yml", - "h2/test1_bit_h2.yml", - "h2/test1_blob_h2.yml", - "h2/test1_bool_h2.yml", - "h2/test1_char_h2.yml", - "h2/test1_clob_h2.yml", - "h2/test1_date_h2.yml", - "h2/test1_decimal_h2.yml", - "h2/test1_double_h2.yml", - "h2/test1_int_h2.yml", - "h2/test1_list_h2.yml", - "h2/test1_real_h2.yml", - "h2/test1_smallint_h2.yml", - "h2/test1_time_h2.yml", - "h2/test1_timestamp_h2.yml", - "h2/test1_tinyint_h2.yml", - "h2/test1_varchar_h2.yml", - "h2/test1_null_h2.yml" - }; - - /** - * Get the test data as a collection of Table objects for each test iteration. - * - * @return Collection of Table objects - * @throws SQLException on error - * @throws ClassNotFoundException on error - * @throws IOException on error - */ - public static Stream getTestData() - throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowMapDataTypeTest.class)) - .map(Arguments::of); - } - - /** Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. */ - @ParameterizedTest - @MethodSource("getTestData") - @Override - public void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - testDataSets( - sqlToArrow( - conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), - false); - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), - false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), - false); - testDataSets( - sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), - false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build()), - false); - testDataSets( - sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build()), - false); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testJdbcSchemaMetadata(Table table) throws SQLException, ClassNotFoundException { - this.initializeDatabase(table); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); - Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); - JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); - } - - @Test - void testOpaqueType() throws SQLException, ClassNotFoundException { - try (BufferAllocator allocator = new RootAllocator()) { - String url = "jdbc:h2:mem:JdbcToArrowTest"; - String driver = "org.h2.Driver"; - Class.forName(driver); - conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate("CREATE TABLE unknowntype (a GEOMETRY, b INT)"); - } - - String query = "SELECT * FROM unknowntype"; - Calendar calendar = Calendar.getInstance(); - Function typeConverter = - (field) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(field, calendar); - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder() - .setAllocator(allocator) - .setJdbcToArrowTypeConverter( - JdbcToArrowUtils.reportUnsupportedTypesAsOpaque(typeConverter, "H2")) - .build(); - Schema schema; - try (Statement stmt = conn.createStatement(); - ResultSet rs = stmt.executeQuery(query)) { - schema = - assertDoesNotThrow(() -> JdbcToArrowUtils.jdbcToArrowSchema(rs.getMetaData(), config)); - } - - Schema expected = - new Schema( - Arrays.asList( - Field.nullable( - "A", new OpaqueType(Types.MinorType.NULL.getType(), "GEOMETRY", "H2")), - Field.nullable("B", Types.MinorType.INT.getType()))); - assertEquals(expected, schema); - } - } - - /** - * This method calls the assert methods for various DataSets. - * - * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - @Override - public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - - switch (table.getType()) { - case BIGINT: - assertBigIntVectorValues( - (BigIntVector) root.getVector(table.getVector()), - table.getValues().length, - table.getLongValues()); - break; - case BINARY: - case BLOB: - assertVarBinaryVectorValues( - (VarBinaryVector) root.getVector(table.getVector()), - table.getValues().length, - table.getBinaryValues()); - break; - case BIT: - assertBitVectorValues( - (BitVector) root.getVector(table.getVector()), - table.getValues().length, - table.getIntValues()); - break; - case BOOL: - assertBooleanVectorValues( - (BitVector) root.getVector(table.getVector()), - table.getValues().length, - table.getBoolValues()); - break; - case CHAR: - case VARCHAR: - case CLOB: - assertVarcharVectorValues( - (VarCharVector) root.getVector(table.getVector()), - table.getValues().length, - table.getCharValues()); - break; - case DATE: - assertDateVectorValues( - (DateDayVector) root.getVector(table.getVector()), - table.getValues().length, - table.getIntValues()); - break; - case TIME: - assertTimeVectorValues( - (TimeMilliVector) root.getVector(table.getVector()), - table.getValues().length, - table.getLongValues()); - break; - case TIMESTAMP: - assertTimeStampVectorValues( - (TimeStampVector) root.getVector(table.getVector()), - table.getValues().length, - table.getLongValues()); - break; - case DECIMAL: - assertDecimalVectorValues( - (DecimalVector) root.getVector(table.getVector()), - table.getValues().length, - table.getBigDecimalValues()); - break; - case DOUBLE: - assertFloat8VectorValues( - (Float8Vector) root.getVector(table.getVector()), - table.getValues().length, - table.getDoubleValues()); - break; - case INT: - assertIntVectorValues( - (IntVector) root.getVector(table.getVector()), - table.getValues().length, - table.getIntValues()); - break; - case SMALLINT: - assertSmallIntVectorValues( - (SmallIntVector) root.getVector(table.getVector()), - table.getValues().length, - table.getIntValues()); - break; - case TINYINT: - assertTinyIntVectorValues( - (TinyIntVector) root.getVector(table.getVector()), - table.getValues().length, - table.getIntValues()); - break; - case REAL: - assertFloat4VectorValues( - (Float4Vector) root.getVector(table.getVector()), - table.getValues().length, - table.getFloatValues()); - break; - case NULL: - assertNullVectorValues((NullVector) root.getVector(table.getVector()), table.getRowCount()); - break; - case LIST: - assertListVectorValues( - (ListVector) root.getVector(table.getVector()), - table.getValues().length, - table.getListValues()); - break; - default: - // do nothing - break; - } - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java deleted file mode 100644 index 456d338f6bd75..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertMapVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getMapValues; - -import java.io.IOException; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Calendar; -import java.util.stream.Stream; -import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; -import org.apache.arrow.adapter.jdbc.Table; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.MapVector; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test MapConsumer with OTHER jdbc type. */ -public class JdbcToArrowMapDataTypeTest extends AbstractJdbcToArrowTest { - - public static Stream getTestData() throws IOException { - return Stream.of( - Arguments.of(getTable("h2/test1_map_h2.yml", JdbcToArrowMapDataTypeTest.class))); - } - - /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column. */ - @ParameterizedTest - @MethodSource("getTestData") - @Override - public void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - Calendar calendar = Calendar.getInstance(); - ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), - true); - testDataSets( - sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), - true); - } - - /** - * This method calls the assert methods for various DataSets. - * - * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - @Override - public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - assertMapVectorValues( - (MapVector) root.getVector(MAP), table.getRowCount(), getMapValues(table.getValues(), MAP)); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java deleted file mode 100644 index 2009268980afe..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBitVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBooleanVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDecimalVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat4VectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat8VectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertIntVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertListVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertMapVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertSmallIntVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBooleanValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArray; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDecimalValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getListValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getMapValues; - -import java.io.IOException; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Calendar; -import java.util.stream.Stream; -import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; -import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; -import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; -import org.apache.arrow.adapter.jdbc.Table; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with - * null values for H2 database. - */ -public class JdbcToArrowNullTest extends AbstractJdbcToArrowTest { - - private static final String NULL = "null"; - private static final String SELECTED_NULL_ROW = "selected_null_row"; - private static final String SELECTED_NULL_COLUMN = "selected_null_column"; - - private static final String[] testFiles = { - "h2/test1_all_datatypes_null_h2.yml", - "h2/test1_selected_datatypes_null_h2.yml", - "h2/test1_all_datatypes_selected_null_rows_h2.yml" - }; - - /** - * Get the test data as a collection of Table objects for each test iteration. - * - * @return Collection of Table objects - * @throws SQLException on error - * @throws ClassNotFoundException on error - * @throws IOException on error - */ - public static Stream getTestData() - throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowNullTest.class)).map(Arguments::of); - } - - /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null - * values. - */ - @ParameterizedTest - @MethodSource("getTestData") - @Override - public void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - testDataSets( - sqlToArrow( - conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), - false); - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), - false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), - false); - testDataSets( - sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), - false); - Calendar calendar = Calendar.getInstance(); - ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), - true); - testDataSets( - sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), - true); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testJdbcSchemaMetadata(Table table) throws SQLException, ClassNotFoundException { - this.initializeDatabase(table); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); - Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); - JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); - } - - /** - * This method calls the assert methods for various DataSets. - * - * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - @Override - public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - - switch (table.getType()) { - case NULL: - sqlToArrowTestNullValues(table.getVectors(), root, table.getRowCount()); - break; - case SELECTED_NULL_COLUMN: - sqlToArrowTestSelectedNullColumnsValues( - table.getVectors(), root, table.getRowCount(), isIncludeMapVector); - break; - case SELECTED_NULL_ROW: - testAllVectorValues(root, isIncludeMapVector); - break; - default: - // do nothing - break; - } - } - - private void testAllVectorValues(VectorSchemaRoot root, boolean isIncludeMapVector) { - JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - - assertBigIntVectorValues( - (BigIntVector) root.getVector(BIGINT), - table.getRowCount(), - getLongValues(table.getValues(), BIGINT)); - - assertTinyIntVectorValues( - (TinyIntVector) root.getVector(TINYINT), - table.getRowCount(), - getIntValues(table.getValues(), TINYINT)); - - assertSmallIntVectorValues( - (SmallIntVector) root.getVector(SMALLINT), - table.getRowCount(), - getIntValues(table.getValues(), SMALLINT)); - - assertVarBinaryVectorValues( - (VarBinaryVector) root.getVector(BINARY), - table.getRowCount(), - getBinaryValues(table.getValues(), BINARY)); - - assertVarBinaryVectorValues( - (VarBinaryVector) root.getVector(BLOB), - table.getRowCount(), - getBinaryValues(table.getValues(), BLOB)); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(CLOB), - table.getRowCount(), - getCharArray(table.getValues(), CLOB)); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(VARCHAR), - table.getRowCount(), - getCharArray(table.getValues(), VARCHAR)); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(CHAR), - table.getRowCount(), - getCharArray(table.getValues(), CHAR)); - - assertIntVectorValues( - (IntVector) root.getVector(INT), table.getRowCount(), getIntValues(table.getValues(), INT)); - - assertBitVectorValues( - (BitVector) root.getVector(BIT), table.getRowCount(), getIntValues(table.getValues(), BIT)); - - assertBooleanVectorValues( - (BitVector) root.getVector(BOOL), - table.getRowCount(), - getBooleanValues(table.getValues(), BOOL)); - - assertDateVectorValues( - (DateDayVector) root.getVector(DATE), - table.getRowCount(), - getIntValues(table.getValues(), DATE)); - - assertTimeVectorValues( - (TimeMilliVector) root.getVector(TIME), - table.getRowCount(), - getLongValues(table.getValues(), TIME)); - - assertTimeStampVectorValues( - (TimeStampVector) root.getVector(TIMESTAMP), - table.getRowCount(), - getLongValues(table.getValues(), TIMESTAMP)); - - assertDecimalVectorValues( - (DecimalVector) root.getVector(DECIMAL), - table.getRowCount(), - getDecimalValues(table.getValues(), DECIMAL)); - - assertFloat8VectorValues( - (Float8Vector) root.getVector(DOUBLE), - table.getRowCount(), - getDoubleValues(table.getValues(), DOUBLE)); - - assertFloat4VectorValues( - (Float4Vector) root.getVector(REAL), - table.getRowCount(), - getFloatValues(table.getValues(), REAL)); - - assertListVectorValues( - (ListVector) root.getVector(LIST), - table.getRowCount(), - getListValues(table.getValues(), LIST)); - if (isIncludeMapVector) { - assertMapVectorValues( - (MapVector) root.getVector(MAP), - table.getRowCount(), - getMapValues(table.getValues(), MAP)); - } - } - - /** - * This method assert tests null values in vectors for all the datatypes. - * - * @param vectors Vectors to test - * @param root VectorSchemaRoot for test - * @param rowCount number of rows - */ - public void sqlToArrowTestNullValues(String[] vectors, VectorSchemaRoot root, int rowCount) { - assertNullValues((IntVector) root.getVector(vectors[0]), rowCount); - assertNullValues((BitVector) root.getVector(vectors[1]), rowCount); - assertNullValues((TinyIntVector) root.getVector(vectors[2]), rowCount); - assertNullValues((SmallIntVector) root.getVector(vectors[3]), rowCount); - assertNullValues((BigIntVector) root.getVector(vectors[4]), rowCount); - assertNullValues((DecimalVector) root.getVector(vectors[5]), rowCount); - assertNullValues((Float8Vector) root.getVector(vectors[6]), rowCount); - assertNullValues((Float4Vector) root.getVector(vectors[7]), rowCount); - assertNullValues((TimeMilliVector) root.getVector(vectors[8]), rowCount); - assertNullValues((DateDayVector) root.getVector(vectors[9]), rowCount); - assertNullValues((TimeStampVector) root.getVector(vectors[10]), rowCount); - assertNullValues((VarBinaryVector) root.getVector(vectors[11]), rowCount); - assertNullValues((VarCharVector) root.getVector(vectors[12]), rowCount); - assertNullValues((VarBinaryVector) root.getVector(vectors[13]), rowCount); - assertNullValues((VarCharVector) root.getVector(vectors[14]), rowCount); - assertNullValues((VarCharVector) root.getVector(vectors[15]), rowCount); - assertNullValues((BitVector) root.getVector(vectors[16]), rowCount); - assertNullValues((ListVector) root.getVector(vectors[17]), rowCount); - } - - /** - * This method assert tests null values in vectors for some selected datatypes. - * - * @param vectors Vectors to test - * @param root VectorSchemaRoot for test - * @param rowCount number of rows - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - public void sqlToArrowTestSelectedNullColumnsValues( - String[] vectors, VectorSchemaRoot root, int rowCount, boolean isIncludeMapVector) { - assertNullValues((BigIntVector) root.getVector(vectors[0]), rowCount); - assertNullValues((DecimalVector) root.getVector(vectors[1]), rowCount); - assertNullValues((Float8Vector) root.getVector(vectors[2]), rowCount); - assertNullValues((Float4Vector) root.getVector(vectors[3]), rowCount); - assertNullValues((TimeMilliVector) root.getVector(vectors[4]), rowCount); - assertNullValues((DateDayVector) root.getVector(vectors[5]), rowCount); - assertNullValues((TimeStampVector) root.getVector(vectors[6]), rowCount); - assertNullValues((VarBinaryVector) root.getVector(vectors[7]), rowCount); - assertNullValues((VarCharVector) root.getVector(vectors[8]), rowCount); - assertNullValues((VarBinaryVector) root.getVector(vectors[9]), rowCount); - assertNullValues((VarCharVector) root.getVector(vectors[10]), rowCount); - assertNullValues((VarCharVector) root.getVector(vectors[11]), rowCount); - assertNullValues((BitVector) root.getVector(vectors[12]), rowCount); - assertNullValues((ListVector) root.getVector(vectors[13]), rowCount); - if (isIncludeMapVector) { - assertNullValues((MapVector) root.getVector(vectors[14]), rowCount); - } - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java deleted file mode 100644 index 2108afec4c945..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.stream.Stream; -import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; -import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; -import org.apache.arrow.adapter.jdbc.Table; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality for - * (non-)optional columns, in particular with regard to the ensuing VectorSchemaRoot's schema. - */ -public class JdbcToArrowOptionalColumnsTest extends AbstractJdbcToArrowTest { - private static final String[] testFiles = {"h2/test1_null_and_notnull.yml"}; - - /** - * Get the test data as a collection of Table objects for each test iteration. - * - * @return Collection of Table objects - * @throws SQLException on error - * @throws ClassNotFoundException on error - * @throws IOException on error - */ - public static Stream getTestData() - throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowOptionalColumnsTest.class)) - .map(Arguments::of); - } - - /** - * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable - * columns. - */ - @ParameterizedTest - @MethodSource("getTestData") - @Override - public void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - } - - /** - * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column - * becomes nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes - * non-nullable. - * - * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - @Override - public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - - assertTrue(root.getSchema().getFields().get(0).isNullable()); - assertFalse(root.getSchema().getFields().get(1).isNullable()); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java deleted file mode 100644 index bea7d4d37c50e..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.*; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.IOException; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Calendar; -import java.util.stream.Stream; -import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; -import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; -import org.apache.arrow.adapter.jdbc.JdbcToArrow; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; -import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; -import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; -import org.apache.arrow.adapter.jdbc.ResultSetUtility; -import org.apache.arrow.adapter.jdbc.Table; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with - * various data types for H2 database using single test data file. - */ -public class JdbcToArrowTest extends AbstractJdbcToArrowTest { - - private static final String[] testFiles = {"h2/test1_all_datatypes_h2.yml"}; - - /** - * Get the test data as a collection of Table objects for each test iteration. - * - * @return Collection of Table objects - * @throws SQLException on error - * @throws ClassNotFoundException on error - * @throws IOException on error - */ - public static Stream getTestData() - throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)) - .flatMap(row -> Stream.of(Arguments.of(row[0], true), Arguments.of(row[0], false))); - } - - /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one - * test data file. - */ - @ParameterizedTest - @MethodSource("getTestData") - @Override - public void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - testDataSets( - sqlToArrow( - conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), - false); - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), - false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), - false); - testDataSets( - sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), - false); - Calendar calendar = Calendar.getInstance(); - ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), - true); - testDataSets( - sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), - true); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testJdbcSchemaMetadata(Table table, boolean reuseVectorSchemaRoot) - throws SQLException, ClassNotFoundException { - this.initializeDatabase(table); - - Calendar calendar = Calendar.getInstance(); - ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); - JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); - } - - /** - * This method calls the assert methods for various DataSets. - * - * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - @Override - public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertBigIntVectorValues( - (BigIntVector) root.getVector(BIGINT), - table.getRowCount(), - getLongValues(table.getValues(), BIGINT)); - - assertTinyIntVectorValues( - (TinyIntVector) root.getVector(TINYINT), - table.getRowCount(), - getIntValues(table.getValues(), TINYINT)); - - assertSmallIntVectorValues( - (SmallIntVector) root.getVector(SMALLINT), - table.getRowCount(), - getIntValues(table.getValues(), SMALLINT)); - - assertVarBinaryVectorValues( - (VarBinaryVector) root.getVector(BINARY), - table.getRowCount(), - getBinaryValues(table.getValues(), BINARY)); - - assertVarBinaryVectorValues( - (VarBinaryVector) root.getVector(BLOB), - table.getRowCount(), - getBinaryValues(table.getValues(), BLOB)); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(CLOB), - table.getRowCount(), - getCharArray(table.getValues(), CLOB)); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(VARCHAR), - table.getRowCount(), - getCharArray(table.getValues(), VARCHAR)); - - assertVarcharVectorValues( - (VarCharVector) root.getVector(CHAR), - table.getRowCount(), - getCharArray(table.getValues(), CHAR)); - - assertIntVectorValues( - (IntVector) root.getVector(INT), table.getRowCount(), getIntValues(table.getValues(), INT)); - - assertBitVectorValues( - (BitVector) root.getVector(BIT), table.getRowCount(), getIntValues(table.getValues(), BIT)); - - assertBooleanVectorValues( - (BitVector) root.getVector(BOOL), - table.getRowCount(), - getBooleanValues(table.getValues(), BOOL)); - - assertDateVectorValues( - (DateDayVector) root.getVector(DATE), - table.getRowCount(), - getIntValues(table.getValues(), DATE)); - - assertTimeVectorValues( - (TimeMilliVector) root.getVector(TIME), - table.getRowCount(), - getLongValues(table.getValues(), TIME)); - - assertTimeStampVectorValues( - (TimeStampVector) root.getVector(TIMESTAMP), - table.getRowCount(), - getLongValues(table.getValues(), TIMESTAMP)); - - assertDecimalVectorValues( - (DecimalVector) root.getVector(DECIMAL), - table.getRowCount(), - getDecimalValues(table.getValues(), DECIMAL)); - - assertFloat8VectorValues( - (Float8Vector) root.getVector(DOUBLE), - table.getRowCount(), - getDoubleValues(table.getValues(), DOUBLE)); - - assertFloat4VectorValues( - (Float4Vector) root.getVector(REAL), - table.getRowCount(), - getFloatValues(table.getValues(), REAL)); - - assertNullVectorValues((NullVector) root.getVector(NULL), table.getRowCount()); - - assertListVectorValues( - (ListVector) root.getVector(LIST), - table.getRowCount(), - getListValues(table.getValues(), LIST)); - - if (isIncludeMapVector) { - assertMapVectorValues( - (MapVector) root.getVector(MAP), - table.getRowCount(), - getMapValues(table.getValues(), MAP)); - } - } - - @ParameterizedTest - @MethodSource("getTestData") - public void runLargeNumberOfRows(Table table, boolean reuseVectorSchemaRoot) - throws IOException, SQLException, ClassNotFoundException { - this.initializeDatabase(table); - - BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - int x = 0; - final int targetRows = 600000; - ResultSet rs = ResultSetUtility.generateBasicResultSet(targetRows); - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - - try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { - while (iter.hasNext()) { - VectorSchemaRoot root = iter.next(); - x += root.getRowCount(); - if (!reuseVectorSchemaRoot) { - root.close(); - } - } - } finally { - allocator.close(); - } - - assertEquals(targetRows, x); - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java deleted file mode 100644 index 14396997d2863..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues; - -import java.io.IOException; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Calendar; -import java.util.TimeZone; -import java.util.stream.Stream; -import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; -import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; -import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; -import org.apache.arrow.adapter.jdbc.Table; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with - * TimeZone based Date, Time and Timestamp datatypes for H2 database. - */ -public class JdbcToArrowTimeZoneTest extends AbstractJdbcToArrowTest { - - private static final String EST_DATE = "est_date"; - private static final String EST_TIME = "est_time"; - private static final String EST_TIMESTAMP = "est_timestamp"; - private static final String GMT_DATE = "gmt_date"; - private static final String GMT_TIME = "gmt_time"; - private static final String GMT_TIMESTAMP = "gmt_timestamp"; - private static final String PST_DATE = "pst_date"; - private static final String PST_TIME = "pst_time"; - private static final String PST_TIMESTAMP = "pst_timestamp"; - - private static final String[] testFiles = { - "h2/test1_est_date_h2.yml", - "h2/test1_est_time_h2.yml", - "h2/test1_est_timestamp_h2.yml", - "h2/test1_gmt_date_h2.yml", - "h2/test1_gmt_time_h2.yml", - "h2/test1_gmt_timestamp_h2.yml", - "h2/test1_pst_date_h2.yml", - "h2/test1_pst_time_h2.yml", - "h2/test1_pst_timestamp_h2.yml" - }; - - /** - * Get the test data as a collection of Table objects for each test iteration. - * - * @return Collection of Table objects - * @throws SQLException on error - * @throws ClassNotFoundException on error - * @throws IOException on error - */ - public static Stream getTestData() - throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTimeZoneTest.class)) - .map(Arguments::of); - } - - /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone - * based Date, Time and Timestamp datatype. - */ - @ParameterizedTest - @MethodSource("getTestData") - @Override - public void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - testDataSets( - sqlToArrow( - conn, - table.getQuery(), - new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), - false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), - false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), - false); - testDataSets( - sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))) - .build()), - false); - testDataSets( - sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder( - new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))) - .build()), - false); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testJdbcSchemaMetadata(Table table) throws SQLException, ClassNotFoundException { - this.initializeDatabase(table); - - Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())); - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); - ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); - Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); - JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); - } - - /** - * This method calls the assert methods for various DataSets. - * - * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' - * mapping declared in configuration only manually - */ - @Override - public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - - switch (table.getType()) { - case EST_DATE: - case GMT_DATE: - case PST_DATE: - assertDateVectorValues( - (DateDayVector) root.getVector(table.getVector()), - table.getValues().length, - table.getIntValues()); - break; - case EST_TIME: - case GMT_TIME: - case PST_TIME: - assertTimeVectorValues( - (TimeMilliVector) root.getVector(table.getVector()), - table.getValues().length, - table.getLongValues()); - break; - case EST_TIMESTAMP: - case GMT_TIMESTAMP: - case PST_TIMESTAMP: - assertTimeStampVectorValues( - (TimeStampVector) root.getVector(table.getVector()), - table.getValues().length, - table.getLongValues()); - break; - default: - // do nothing - break; - } - } -} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java deleted file mode 100644 index de9eff327ef6f..0000000000000 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java +++ /dev/null @@ -1,579 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc.h2; - -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBooleanValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArray; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDecimalValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getListValues; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import java.math.BigDecimal; -import java.sql.SQLException; -import java.sql.Types; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Calendar; -import java.util.List; -import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; -import org.apache.arrow.adapter.jdbc.JdbcToArrow; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; -import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; -import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; -import org.apache.arrow.adapter.jdbc.Table; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; - -public class JdbcToArrowVectorIteratorTest extends JdbcToArrowTest { - - @ParameterizedTest - @MethodSource("getTestData") - @Override - public void testJdbcToArrowValues(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setTargetBatchSize(3) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - - ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator( - conn.createStatement().executeQuery(table.getQuery()), config); - - validate(iterator); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testVectorSchemaRootReuse(Table table, boolean reuseVectorSchemaRoot) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - Integer[][] intValues = { - {101, 102, 103}, - {104, null, null}, - {107, 108, 109}, - {110} - }; - Integer[][][] listValues = { - {{1, 2, 3}, {1, 2}, {1}}, - {{2, 3, 4}, {2, 3}, {2}}, - {{3, 4, 5}, {3, 4}, {3}}, - {{}} - }; - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - - ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator( - conn.createStatement().executeQuery(table.getQuery()), config); - - int batchCount = 0; - VectorSchemaRoot prev = null; - VectorSchemaRoot cur = null; - while (iterator.hasNext()) { - cur = iterator.next(); - assertNotNull(cur); - - // verify the first column, with may contain nulls. - List intVectors = new ArrayList<>(); - intVectors.add((IntVector) cur.getVector(0)); - assertIntVectorValues(intVectors, intValues[batchCount].length, intValues[batchCount]); - - // verify arrays are handled correctly - List listVectors = new ArrayList<>(); - listVectors.add((ListVector) cur.getVector(18)); - assertListVectorValues(listVectors, listValues[batchCount].length, listValues[batchCount]); - - if (prev != null) { - // skip the first iteration - - if (reuseVectorSchemaRoot) { - // when reuse is enabled, different iterations are based on the same vector schema root. - assertTrue(prev == cur); - } else { - // when reuse is enabled, a new vector schema root is created in each iteration. - assertFalse(prev == cur); - if (batchCount < 3) { - cur.close(); - } - } - } - - prev = cur; - batchCount += 1; - } - - iterator.close(); - if (!reuseVectorSchemaRoot) { - assertNotNull(cur); - // test that closing the iterator does not close the vectors held by the consumers - assertNotEquals(cur.getVector(0).getValueCount(), 0); - cur.close(); - } - // make sure we have at least two batches, so the above test paths are actually covered - assertTrue(batchCount > 1); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testJdbcToArrowValuesNoLimit(Table table) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - - ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator( - conn.createStatement().executeQuery(table.getQuery()), config); - - validate(iterator); - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testTimeStampConsumer(Table table, boolean reuseVectorSchemaRoot) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - final String sql = "select timestamp_field11 from table1"; - - // first experiment, with calendar and time zone. - JdbcToArrowConfig config = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - assertNotNull(config.getCalendar()); - - try (ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { - VectorSchemaRoot root = iterator.next(); - assertEquals(1, root.getFieldVectors().size()); - - // vector with time zone info. - assertTrue(root.getVector(0) instanceof TimeStampMilliTZVector); - } - - // second experiment, without calendar and time zone. - config = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), null) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); - assertNull(config.getCalendar()); - - try (ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { - VectorSchemaRoot root = iterator.next(); - assertEquals(1, root.getFieldVectors().size()); - - // vector without time zone info. - assertTrue(root.getVector(0) instanceof TimeStampMilliVector); - } - } - - private void validate(ArrowVectorIterator iterator) throws SQLException, IOException { - - List bigIntVectors = new ArrayList<>(); - List tinyIntVectors = new ArrayList<>(); - List intVectors = new ArrayList<>(); - List smallIntVectors = new ArrayList<>(); - List vectorsForBinary = new ArrayList<>(); - List vectorsForBlob = new ArrayList<>(); - List vectorsForClob = new ArrayList<>(); - List vectorsForVarChar = new ArrayList<>(); - List vectorsForChar = new ArrayList<>(); - List vectorsForBit = new ArrayList<>(); - List vectorsForBool = new ArrayList<>(); - List dateDayVectors = new ArrayList<>(); - List timeMilliVectors = new ArrayList<>(); - List timeStampVectors = new ArrayList<>(); - List decimalVectors = new ArrayList<>(); - List float4Vectors = new ArrayList<>(); - List float8Vectors = new ArrayList<>(); - List listVectors = new ArrayList<>(); - - List roots = new ArrayList<>(); - while (iterator.hasNext()) { - VectorSchemaRoot root = iterator.next(); - roots.add(root); - - JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - - bigIntVectors.add((BigIntVector) root.getVector(BIGINT)); - tinyIntVectors.add((TinyIntVector) root.getVector(TINYINT)); - intVectors.add((IntVector) root.getVector(INT)); - smallIntVectors.add((SmallIntVector) root.getVector(SMALLINT)); - vectorsForBinary.add((VarBinaryVector) root.getVector(BINARY)); - vectorsForBlob.add((VarBinaryVector) root.getVector(BLOB)); - vectorsForClob.add((VarCharVector) root.getVector(CLOB)); - vectorsForVarChar.add((VarCharVector) root.getVector(VARCHAR)); - vectorsForChar.add((VarCharVector) root.getVector(CHAR)); - vectorsForBit.add((BitVector) root.getVector(BIT)); - vectorsForBool.add((BitVector) root.getVector(BOOL)); - dateDayVectors.add((DateDayVector) root.getVector(DATE)); - timeMilliVectors.add((TimeMilliVector) root.getVector(TIME)); - timeStampVectors.add((TimeStampVector) root.getVector(TIMESTAMP)); - decimalVectors.add((DecimalVector) root.getVector(DECIMAL)); - float4Vectors.add((Float4Vector) root.getVector(REAL)); - float8Vectors.add((Float8Vector) root.getVector(DOUBLE)); - listVectors.add((ListVector) root.getVector(LIST)); - } - assertBigIntVectorValues( - bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues( - tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT)); - assertIntVectorValues(intVectors, table.getRowCount(), getIntValues(table.getValues(), INT)); - assertSmallIntVectorValues( - smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertBinaryVectorValues( - vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertBinaryVectorValues( - vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarCharVectorValues( - vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarCharVectorValues( - vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarCharVectorValues( - vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR)); - assertBitVectorValues(vectorsForBit, table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues( - vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateDayVectorValues( - dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE)); - assertTimeMilliVectorValues( - timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues( - timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues( - decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat4VectorValues( - float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL)); - assertFloat8VectorValues( - float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertListVectorValues( - listVectors, table.getRowCount(), getListValues(table.getValues(), LIST)); - - roots.forEach(root -> root.close()); - } - - private void assertFloat8VectorValues(List vectors, int rowCount, Double[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (Float8Vector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].doubleValue(), vector.get(i), 0.01); - } - } - } - - private void assertFloat4VectorValues(List vectors, int rowCount, Float[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (Float4Vector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].floatValue(), vector.get(i), 0.01); - } - } - } - - private void assertDecimalVectorValues( - List vectors, int rowCount, BigDecimal[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (DecimalVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertNotNull(vector.getObject(i)); - assertEquals(values[index++].doubleValue(), vector.getObject(i).doubleValue(), 0); - } - } - } - - private void assertTimeStampVectorValues( - List vectors, int rowCount, Long[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (TimeStampVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].longValue(), vector.get(i)); - } - } - } - - private void assertTimeMilliVectorValues( - List vectors, int rowCount, Long[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (TimeMilliVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].longValue(), vector.get(i)); - } - } - } - - private void assertDateDayVectorValues(List vectors, int rowCount, Long[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (DateDayVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].longValue(), vector.get(i)); - } - } - } - - private void assertBitVectorValues(List vectors, int rowCount, Integer[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (BitVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].intValue(), vector.get(i)); - } - } - } - - private void assertBooleanVectorValues(List vectors, int rowCount, Boolean[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (BitVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++], vector.get(i) == 1); - } - } - } - - private void assertVarCharVectorValues( - List vectors, int rowCount, byte[][] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (VarCharVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertArrayEquals(values[index++], vector.get(i)); - } - } - } - - private void assertBinaryVectorValues( - List vectors, int rowCount, byte[][] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (VarBinaryVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertArrayEquals(values[index++], vector.get(i)); - } - } - } - - private void assertSmallIntVectorValues( - List vectors, int rowCount, Integer[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (SmallIntVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].intValue(), vector.get(i)); - } - } - } - - private void assertTinyIntVectorValues( - List vectors, int rowCount, Integer[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (TinyIntVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].intValue(), vector.get(i)); - } - } - } - - private void assertBigIntVectorValues(List vectors, int rowCount, Long[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (BigIntVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(values[index++].longValue(), vector.get(i)); - } - } - } - - private void assertIntVectorValues(List vectors, int rowCount, Integer[] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (IntVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - if (values[index] == null) { - assertTrue(vector.isNull(i)); - } else { - assertEquals(values[index].longValue(), vector.get(i)); - } - index++; - } - } - } - - public static void assertListVectorValues( - List vectors, int rowCount, Integer[][] values) { - int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); - assertEquals(rowCount, valueCount); - - int index = 0; - for (ListVector vector : vectors) { - for (int i = 0; i < vector.getValueCount(); i++) { - if (values[index] == null) { - assertTrue(vector.isNull(i)); - } else { - List list = (List) vector.getObject(i); - assertEquals(Arrays.asList(values[index]), list); - } - index++; - } - } - } - - /** Runs a simple query, and encapsulates the result into a field vector. */ - private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException, IOException { - ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator( - conn.createStatement().executeQuery("select real_field8 from table1"), config); - - VectorSchemaRoot root = iterator.next(); - - // only one vector, since there is one column in the select statement. - assertEquals(1, root.getFieldVectors().size()); - FieldVector result = root.getVector(0); - - // make sure some data is actually read - assertTrue(result.getValueCount() > 0); - - return result; - } - - @ParameterizedTest - @MethodSource("getTestData") - public void testJdbcToArrowCustomTypeConversion(Table table, boolean reuseVectorSchemaRoot) - throws SQLException, IOException, ClassNotFoundException { - this.initializeDatabase(table); - - JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP); - - // first experiment, using default type converter - JdbcToArrowConfig config = builder.build(); - - try (FieldVector vector = getQueryResult(config)) { - // the default converter translates real to float4 - assertTrue(vector instanceof Float4Vector); - } - - // second experiment, using customized type converter - builder.setJdbcToArrowTypeConverter( - (fieldInfo) -> { - switch (fieldInfo.getJdbcType()) { - case Types.REAL: - // this is different from the default type converter - return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - default: - return null; - } - }); - config = builder.build(); - - try (FieldVector vector = getQueryResult(config)) { - // the customized converter translates real to float8 - assertTrue(vector instanceof Float8Vector); - } - } -} diff --git a/java/adapter/jdbc/src/test/resources/h2/comment.sql b/java/adapter/jdbc/src/test/resources/h2/comment.sql deleted file mode 100644 index db8964fe1d4ac..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/comment.sql +++ /dev/null @@ -1,21 +0,0 @@ ---Licensed to the Apache Software Foundation (ASF) under one or more contributor ---license agreements. See the NOTICE file distributed with this work for additional ---information regarding copyright ownership. The ASF licenses this file to ---You under the Apache License, Version 2.0 (the "License"); you may not use ---this file except in compliance with the License. You may obtain a copy of ---the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required ---by applicable law or agreed to in writing, software distributed under the ---License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS ---OF ANY KIND, either express or implied. See the License for the specific ---language governing permissions and limitations under the License. -create table table1( - id bigint primary key, - name varchar(255), - column1 boolean, - columnN int - ); - -COMMENT ON TABLE table1 IS 'This is super special table with valuable data'; -COMMENT ON COLUMN table1.id IS 'Record identifier'; -COMMENT ON COLUMN table1.name IS 'Name of record'; -COMMENT ON COLUMN table1.columnN IS 'Informative description of columnN'; \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml deleted file mode 100644 index c4f0017095df0..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml +++ /dev/null @@ -1,123 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_all_datatypes_h2' - -create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT, - decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP, - binary_field12 VARBINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(14), bit_field17 BIT, - null_field18 NULL, list_field19 INT ARRAY, map_field20 VARCHAR(256));' - -data: - - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[1, 2, 3], ''{"a":"b","key":"12345"}'');' - - - 'INSERT INTO table1 VALUES (102, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[1, 2],''{"c":"d"}'');' - - - 'INSERT INTO table1 VALUES (103, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[1],''{"e":"f"}'');' - - - 'INSERT INTO table1 VALUES (104, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[2, 3, 4],''{"g":"h"}'');' - - - 'INSERT INTO table1 VALUES (null, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[2, 3],''{"i":"j"}'');' - - - 'INSERT INTO table1 VALUES (null, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[2],''{"k":"l"}'');' - - - 'INSERT INTO table1 VALUES (107, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[3, 4, 5],''{"m":"n"}'');' - - - 'INSERT INTO table1 VALUES (108, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[3, 4],''{"o":"p"}'');' - - - 'INSERT INTO table1 VALUES (109, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[3],''{"q":"r"}'');' - - - 'INSERT INTO table1 VALUES (110, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[],''{"s":"t"}'');' - -query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8, - time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17, null_field18, list_field19, map_field20 from table1' - -drop: 'DROP table table1;' - -rowCount: '10' - -values: - - 'INT_FIELD1=101,102,103,104,null,null,107,108,109,110' - - 'BOOL_FIELD2=1,1,1,1,1,1,1,1,1,1' - - 'BIT_FIELD17=1,1,1,1,1,1,1,1,1,1' - - 'TINYINT_FIELD3=45,45,45,45,45,45,45,45,45,45' - - 'SMALLINT_FIELD4=12000,12000,12000,12000,12000,12000,12000,12000,12000,12000' - - 'BIGINT_FIELD5=92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720' - - 'REAL_FIELD8=56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f' - - 'DECIMAL_FIELD6=17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23' - - 'DOUBLE_FIELD7=56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345' - - 'TIME_FIELD9=45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000' - - 'DATE_FIELD10=17574,17574,17574,17574,17574,17574,17574,17574,17574,17574' - - 'TIMESTAMP_FIELD11=1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000' - - 'CHAR_FIELD16=some char text,some char text,some char text,some char text,some char text, - some char text,some char text,some char text,some char text,some char text' - - 'VARCHAR_FIELD13=some text that needs to be converted to varchar,some text that needs to be converted to varchar, - some text that needs to be converted to varchar,some text that needs to be converted to varchar, - some text that needs to be converted to varchar,some text that needs to be converted to varchar, - some text that needs to be converted to varchar,some text that needs to be converted to varchar, - some text that needs to be converted to varchar,some text that needs to be converted to varchar' - - 'BINARY_FIELD12=736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - 'BLOB_FIELD14=736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - 'CLOB_FIELD15=some text that needs to be converted to clob,some text that needs to be converted to clob, - some text that needs to be converted to clob,some text that needs to be converted to clob, - some text that needs to be converted to clob,some text that needs to be converted to clob, - some text that needs to be converted to clob,some text that needs to be converted to clob, - some text that needs to be converted to clob,some text that needs to be converted to clob' - - 'LIST_FIELD19=(1;2;3),(1;2),(1),(2;3;4),(2;3),(2),(3;4;5),(3;4),(3),()' - - 'MAP_FIELD20={"a":"b"|"key":"12345"},{"c":"d"},{"e":"f"},{"g":"h"},{"i":"j"},{"k":"l"},{"m":"n"},{"o":"p"},{"q":"r"},{"s":"t"}' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml deleted file mode 100644 index 9be76229dab82..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml +++ /dev/null @@ -1,55 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_all_datatypes_null_h2' - -type: 'null' - -vectors: - - 'INT_FIELD1' - - 'BOOL_FIELD2' - - 'TINYINT_FIELD3' - - 'SMALLINT_FIELD4' - - 'BIGINT_FIELD5' - - 'DECIMAL_FIELD6' - - 'DOUBLE_FIELD7' - - 'REAL_FIELD8' - - 'TIME_FIELD9' - - 'DATE_FIELD10' - - 'TIMESTAMP_FIELD11' - - 'BINARY_FIELD12' - - 'VARCHAR_FIELD13' - - 'BLOB_FIELD14' - - 'CLOB_FIELD15' - - 'CHAR_FIELD16' - - 'BIT_FIELD17' - - 'LIST_FIELD19' - - 'MAP_FIELD20' - -rowCount: '5' - -create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT, - decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP, - binary_field12 VARBINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(14), bit_field17 BIT, - list_field19 INT ARRAY, map_field20 VARCHAR(256));' - -data: - - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' - - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' - - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' - - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' - - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' - -query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8, - time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17, - list_field19 from table1' - -drop: 'DROP table table1;' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml deleted file mode 100644 index fda31da150775..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml +++ /dev/null @@ -1,91 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_all_datatypes_selected_null_rows_h2' - -type: 'selected_null_row' - -vectors: - - 'INT_FIELD1' - - 'BOOL_FIELD2' - - 'TINYINT_FIELD3' - - 'SMALLINT_FIELD4' - - 'BIGINT_FIELD5' - - 'DECIMAL_FIELD6' - - 'DOUBLE_FIELD7' - - 'REAL_FIELD8' - - 'TIME_FIELD9' - - 'DATE_FIELD10' - - 'TIMESTAMP_FIELD11' - - 'BINARY_FIELD12' - - 'VARCHAR_FIELD13' - - 'BLOB_FIELD14' - - 'CLOB_FIELD15' - - 'CHAR_FIELD16' - - 'BIT_FIELD17' - - 'LIST_FIELD19' - - 'MAP_FIELD20' - -create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT, - decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP, - binary_field12 VARBINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(14), bit_field17 BIT, - list_field19 INT ARRAY, map_field20 VARCHAR(256));' - -data: - - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' - - - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', - 1, ARRAY[1, 2, 3],''{"a":"b"}'');' - - - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' - - - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), - PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', - ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', - 1, ARRAY[1, 2, 3],''{"c":"d"}'');' - - - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' - -query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8, - time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17, - list_field19, map_field20 from table1' - -drop: 'DROP table table1;' - -rowCount: '5' - -values: - - 'INT_FIELD1=null,101,null,101,null' - - 'BOOL_FIELD2=null,1,null,1,null' - - 'BIT_FIELD17=null,1,null,1,null' - - 'TINYINT_FIELD3=null,45,null,45,null' - - 'SMALLINT_FIELD4=null,12000,null,12000,null' - - 'BIGINT_FIELD5=null,92233720,null,92233720,null' - - 'REAL_FIELD8=null,56478356785.345f,null,56478356785.345f,null' - - 'DECIMAL_FIELD6=null,17345667789.23,null,17345667789.23,null' - - 'DOUBLE_FIELD7=null,56478356785.345,null,56478356785.345,null' - - 'TIME_FIELD9=null,45935000,null,45935000,null' - - 'DATE_FIELD10=null,17574,null,17574,null' - - 'TIMESTAMP_FIELD11=null,1518439535000,null,1518439535000,null' - - 'CHAR_FIELD16=null,some char text,null,some char text,null' - - 'VARCHAR_FIELD13=null,some text that needs to be converted to varchar,null, - some text that needs to be converted to varchar,null' - - 'BINARY_FIELD12=null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,null' - - 'BLOB_FIELD14=null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, - null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,null' - - 'CLOB_FIELD15=null,some text that needs to be converted to clob,null,some text that needs to be converted to clob,null' - - 'LIST_FIELD19=null,(1;2;3),null,(1;2;3),null' - - 'MAP_FIELD20=null,{"a":"b"},null,{"c":"d"},null' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml deleted file mode 100644 index d9b39dfa204ac..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_bigint_h2' - -type: 'big_int' - -vector: 'BIGINT_FIELD5' - -create: 'CREATE TABLE table1 (bigint_field5 BIGINT);' - -data: - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - - 'INSERT INTO table1 VALUES (92233720);' - -query: 'select bigint_field5 from table1;' - -drop: 'DROP table table1;' - -values: - - '92233720' - - '92233720' - - '92233720' - - '92233720' - - '92233720' - - '92233720' - - '92233720' - - '92233720' - - '92233720' - - '92233720' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml deleted file mode 100644 index 3d7b1ec658ef7..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_binary_h2' - -type: 'binary' - -vector: 'BINARY_FIELD12' - -create: 'CREATE TABLE table1 (binary_field12 VARBINARY(100));' - -data: - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - -query: 'select binary_field12 from table1;' - -drop: 'DROP table table1;' - -values: - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml deleted file mode 100644 index 7e6e07cab3d9c..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_bit_h2' - -type: 'bit' - -vector: 'BIT_FIELD17' - -create: 'CREATE TABLE table1 (bit_field17 BIT);' - -data: - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - -query: 'select bit_field17 from table1;' - -drop: 'DROP table table1;' - -values: - - '1' - - '1' - - '1' - - '1' - - '1' - - '1' - - '1' - - '1' - - '1' - - '1' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml deleted file mode 100644 index df445120f6752..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_blob_h2' - -type: 'blob' - -vector: 'BLOB_FIELD14' - -create: 'CREATE TABLE table1 (blob_field14 BLOB);' - -data: - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' - -query: 'select blob_field14 from table1;' - -drop: 'DROP table table1;' - -values: - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' - - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml deleted file mode 100644 index 2284b9fae4a46..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_bool_h2' - -type: 'bool' - -vector: 'BOOL_FIELD2' - -create: 'CREATE TABLE table1 (bool_field2 BOOLEAN);' - -data: - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - - 'INSERT INTO table1 VALUES (1);' - -query: 'select bool_field2 from table1;' - -drop: 'DROP table table1;' - -values: - - 'true' - - 'true' - - 'true' - - 'true' - - 'true' - - 'true' - - 'true' - - 'true' - - 'true' - - 'true' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml deleted file mode 100644 index 588df7bff4df6..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_char_h2' - -type: 'char' - -vector: 'CHAR_FIELD16' - -create: 'CREATE TABLE table1 (char_field16 CHAR(14));' - -data: - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - - 'INSERT INTO table1 VALUES (''some char text'');' - -query: 'select char_field16 from table1;' - -drop: 'DROP table table1;' - -values: - - 'some char text' - - 'some char text' - - 'some char text' - - 'some char text' - - 'some char text' - - 'some char text' - - 'some char text' - - 'some char text' - - 'some char text' - - 'some char text' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml deleted file mode 100644 index 2e60a4af5a970..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml +++ /dev/null @@ -1,43 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_charset_ch_h2' - -type: 'charset' - -vectors: - - 'VARCHAR_FIELD13' - - 'CHAR_FIELD16' - - 'CLOB_FIELD15' - -rowCount: '5' - -charSet: 'GBK' - -create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(13));' - -data: - - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' - - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' - - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' - - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' - - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' - -query: 'select varchar_field13, clob_field15, char_field16 from table1' - -drop: 'DROP table table1;' - -values: - - 'VARCHAR_FIELD13=一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar, - 一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar' - - 'CLOB_FIELD15=一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob, - 一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob' - - 'CHAR_FIELD16=一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml deleted file mode 100644 index 383681e5b3b41..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml +++ /dev/null @@ -1,53 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_charset_h2' - -type: 'charset' - -vectors: - - 'VARCHAR_FIELD13' - - 'CHAR_FIELD16' - - 'CLOB_FIELD15' - -rowCount: '10' - -create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(33));' - -data: - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' - -query: 'select varchar_field13, clob_field15, char_field16 from table1' - -drop: 'DROP table table1;' - -values: - - 'VARCHAR_FIELD13=some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar, - some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar, - some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar, - some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar, - some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar' - - 'CLOB_FIELD15=some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob, - some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob, - some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob, - some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob, - some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob' - - 'CHAR_FIELD16=some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding, - some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml deleted file mode 100644 index 9b3cf9a18fe01..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml +++ /dev/null @@ -1,43 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_charset_jp_h2' - -type: 'charset' - -vectors: - - 'VARCHAR_FIELD13' - - 'CHAR_FIELD16' - - 'CLOB_FIELD15' - -rowCount: '5' - -charSet: 'SJIS' - -create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(23));' - -data: - - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' - - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' - - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' - - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' - - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' - -query: 'select varchar_field13, clob_field15, char_field16 from table1' - -drop: 'DROP table table1;' - -values: - - 'VARCHAR_FIELD13=varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト, - varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト' - - 'CLOB_FIELD15=charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある, - charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある' - - 'CHAR_FIELD16=charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml deleted file mode 100644 index d6e051c094fbe..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml +++ /dev/null @@ -1,43 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_charset_kr_h2' - -type: 'charset' - -vectors: - - 'VARCHAR_FIELD13' - - 'CHAR_FIELD16' - - 'CLOB_FIELD15' - -rowCount: '5' - -charSet: 'EUC-KR' - -create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(22));' - -data: - - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' - - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' - - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' - - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' - - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' - -query: 'select varchar_field13, clob_field15, char_field16 from table1' - -drop: 'DROP table table1;' - -values: - - 'VARCHAR_FIELD13=char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다., - char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.' - - 'CLOB_FIELD15=clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트, - clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트' - - 'CHAR_FIELD16=char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml deleted file mode 100644 index ca9f2a41973c2..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_clob_h2' - -type: 'clob' - -vector: 'CLOB_FIELD15' - -create: 'CREATE TABLE table1 (clob_field15 CLOB);' - -data: - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' - -query: 'select CLOB_FIELD15 from table1;' - -drop: 'DROP table table1;' - -values: - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' - - 'some text that needs to be converted to clob' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml deleted file mode 100644 index de4e66b522c02..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_date_h2' - -type: 'date' - -vector: 'DATE_FIELD10' - -create: 'CREATE TABLE table1 (date_field10 DATE);' - -data: - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''1969-01-01'');' - -query: 'select date_field10 from table1;' - -drop: 'DROP table table1;' - -values: - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '-365' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml deleted file mode 100644 index ff2edd4fe0342..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_decimal_h2' - -type: 'decimal' - -vector: 'DECIMAL_FIELD6' - -create: 'CREATE TABLE table1 (decimal_field6 DECIMAL(20,2));' - -data: - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - - 'INSERT INTO table1 VALUES (17345667789.23);' - -query: 'select decimal_field6 from table1;' - -drop: 'DROP table table1;' - -values: - - '17345667789.23' - - '17345667789.23' - - '17345667789.23' - - '17345667789.23' - - '17345667789.23' - - '17345667789.23' - - '17345667789.23' - - '17345667789.23' - - '17345667789.23' - - '17345667789.23' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml deleted file mode 100644 index e7fedd88daf0c..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_double_h2' - -type: 'double' - -vector: 'DOUBLE_FIELD7' - -create: 'CREATE TABLE table1 (double_field7 DOUBLE);' - -data: - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - -query: 'select double_field7 from table1;' - -drop: 'DROP table table1;' - -values: - - '56478356785.345' - - '56478356785.345' - - '56478356785.345' - - '56478356785.345' - - '56478356785.345' - - '56478356785.345' - - '56478356785.345' - - '56478356785.345' - - '56478356785.345' - - '56478356785.345' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml deleted file mode 100644 index a6fa0fdf202bc..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_est_date_h2' - -type: 'est_date' - -timezone: 'EST' - -vector: 'DATE_FIELD10' - -create: 'CREATE TABLE table1 (date_field10 DATE);' - -data: - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - -query: 'select date_field10 from table1;' - -drop: 'DROP table table1;' - -values: - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml deleted file mode 100644 index 31a1137c64a58..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_est_time_h2' - -type: 'est_time' - -timezone: 'EST' - -vector: 'TIME_FIELD9' - -create: 'CREATE TABLE table1 (time_field9 TIME);' - -data: - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - -query: 'select time_field9 from table1;' - -drop: 'DROP table table1;' - -values: - - '63935000' - - '63935000' - - '63935000' - - '63935000' - - '63935000' - - '63935000' - - '63935000' - - '63935000' - - '63935000' - - '63935000' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml deleted file mode 100644 index 318a2c5851cc6..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml +++ /dev/null @@ -1,49 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_est_timestamp_h2' - -type: 'est_timestamp' - -timezone: 'EST' - -vector: 'TIMESTAMP_FIELD11' - -create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);' - -data: - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - -query: 'select timestamp_field11 from table1;' - -drop: 'DROP table table1;' - -values: - - '1518457535000' - - '1518457535000' - - '1518457535000' - - '1518457535000' - - '1518457535000' - - '1518457535000' - - '1518457535000' - - '1518457535000' - - '1518457535000' - - '1518457535000' - \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml deleted file mode 100644 index 66bfc6b44f710..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_gmt_date_h2' - -type: 'gmt_date' - -timezone: 'GMT' - -vector: 'DATE_FIELD10' - -create: 'CREATE TABLE table1 (date_field10 DATE);' - -data: - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - -query: 'select date_field10 from table1;' - -drop: 'DROP table table1;' - -values: - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml deleted file mode 100644 index 1a6902d45d947..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_gmt_time_h2' - -type: 'gmt_time' - -timezone: 'GMT' - -vector: 'TIME_FIELD9' - -create: 'CREATE TABLE table1 (time_field9 TIME);' - -data: - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - -query: 'select time_field9 from table1;' - -drop: 'DROP table table1;' - -values: - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml deleted file mode 100644 index b3059a07218ca..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_gmt_timestamp_h2' - -type: 'gmt_timestamp' - -timezone: 'GMT' - -vector: 'TIMESTAMP_FIELD11' - -create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);' - -data: - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - -query: 'select timestamp_field11 from table1;' - -drop: 'DROP table table1;' - -values: - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml deleted file mode 100644 index 8e8a8c4931ec9..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_int_h2' - -type: 'int' - -vector: 'INT_FIELD1' - -create: 'CREATE TABLE table1 (int_field1 INT);' - -data: - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - - 'INSERT INTO table1 VALUES (101);' - -values: - - '101' - - '101' - - '101' - - '101' - - '101' - - '101' - - '101' - - '101' - - '101' - - '101' - -query: 'select int_field1 from table1;' - -drop: 'DROP table table1;' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_list_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_list_h2.yml deleted file mode 100644 index 044c22182af58..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_list_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_list_h2' - -type: 'list' - -vector: 'LIST_FIELD19' - -create: 'CREATE TABLE table1 (list_field19 INT ARRAY);' - -data: - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);' - -query: 'select list_field19 from table1;' - -drop: 'DROP table table1;' - -values: - - '(1;2;3)' - - '(1;2;3)' - - '(1;2;3)' - - '(1;2;3)' - - '(1;2;3)' - - '(1;2;3)' - - '(1;2;3)' - - '(1;2;3)' - - '(1;2;3)' - - '(1;2;3)' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_map_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_map_h2.yml deleted file mode 100644 index a1800d20af601..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_map_h2.yml +++ /dev/null @@ -1,33 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_map_h2' - -type: 'map' - -vector: 'MAP_FIELD20' - -create: 'CREATE TABLE table1 (map_field20 OTHER);' - -rowCount: '4' - -data: - - 'INSERT INTO table1 VALUES (X''aced00057372002e6f72672e6170616368652e6172726f772e766563746f722e7574696c2e4a736f6e537472696e67486173684d61709819d7169e7a2ecf020000787200176a6176612e7574696c2e4c696e6b6564486173684d617034c04e5c106cc0fb0200015a000b6163636573734f72646572787200116a6176612e7574696c2e486173684d61700507dac1c31660d103000246000a6c6f6164466163746f724900097468726573686f6c6478703f4000000000000c7708000000100000000374000161740001627400033132337400067177657274797400057a78637662740001217800'');' - - 'INSERT INTO table1 VALUES (X''aced00057372002e6f72672e6170616368652e6172726f772e766563746f722e7574696c2e4a736f6e537472696e67486173684d61709819d7169e7a2ecf020000787200176a6176612e7574696c2e4c696e6b6564486173684d617034c04e5c106cc0fb0200015a000b6163636573734f72646572787200116a6176612e7574696c2e486173684d61700507dac1c31660d103000246000a6c6f6164466163746f724900097468726573686f6c6478703f4000000000000c77080000001000000003740001617400016274000163740001647400033132337400067177657274797800'');' - - 'INSERT INTO table1 VALUES (X''aced00057372002e6f72672e6170616368652e6172726f772e766563746f722e7574696c2e4a736f6e537472696e67486173684d61709819d7169e7a2ecf020000787200176a6176612e7574696c2e4c696e6b6564486173684d617034c04e5c106cc0fb0200015a000b6163636573734f72646572787200116a6176612e7574696c2e486173684d61700507dac1c31660d103000246000a6c6f6164466163746f724900097468726573686f6c6478703f4000000000000c7708000000100000000174000074000576616c75657800'');' - - 'INSERT INTO table1 VALUES (X''aced00057372002e6f72672e6170616368652e6172726f772e766563746f722e7574696c2e4a736f6e537472696e67486173684d61709819d7169e7a2ecf020000787200176a6176612e7574696c2e4c696e6b6564486173684d617034c04e5c106cc0fb0200015a000b6163636573734f72646572787200116a6176612e7574696c2e486173684d61700507dac1c31660d103000246000a6c6f6164466163746f724900097468726573686f6c6478703f4000000000000c7708000000100000000274000b6e6f6e456d7074794b65797074000c736f6d654f746865724b65797400007800'');' - -query: 'select map_field20 from table1;' - -drop: 'DROP table table1;' - -values: - - 'MAP_FIELD20={"a":"b"|"123":"qwerty"|"zxcvb":"!"},{"a":"b"|"123":"qwerty"|"c":"d"},{"":"value"},{"nonEmptyKey":null|"someOtherKey":""}' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml b/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml deleted file mode 100644 index 904b27783a38a..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml +++ /dev/null @@ -1,26 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_null_and_notnull' - -type: 'nullnotnull' - -create: 'CREATE TABLE table1 (int_field_null INT, int_field_notnull INT NOT NULL);' - -data: - - 'INSERT INTO table1 VALUES (0, 0);' - - 'INSERT INTO table1 VALUES (1, 1);' - -rowCount: '2' - -query: 'select int_field_null, int_field_notnull from table1;' - -drop: 'DROP table table1;' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml deleted file mode 100644 index a33b18105a88d..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml +++ /dev/null @@ -1,36 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_null_h2' - -type: 'null' - -vector: 'NULL_FIELD18' - -create: 'CREATE TABLE table1 (null_field18 NULL);' - -rowCount: '10' - -data: - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - - 'INSERT INTO table1 VALUES (null);' - -query: 'select null_field18 from table1;' - -drop: 'DROP table table1;' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml deleted file mode 100644 index 6923537c6cee8..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_pst_date_h2' - -type: 'pst_date' - -timezone: 'PST' - -vector: 'DATE_FIELD10' - -create: 'CREATE TABLE table1 (date_field10 DATE);' - -data: - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - - 'INSERT INTO table1 VALUES (''2018-02-12'');' - -query: 'select date_field10 from table1;' - -drop: 'DROP table table1;' - -values: - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' - - '17574' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml deleted file mode 100644 index f8faaead34c5e..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_pst_time_h2' - -type: 'pst_time' - -timezone: 'PST' - -vector: 'TIME_FIELD9' - -create: 'CREATE TABLE table1 (time_field9 TIME);' - -data: - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - -query: 'select time_field9 from table1;' - -drop: 'DROP table table1;' - -values: - - '74735000' - - '74735000' - - '74735000' - - '74735000' - - '74735000' - - '74735000' - - '74735000' - - '74735000' - - '74735000' - - '74735000' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml deleted file mode 100644 index 632479fce2e90..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml +++ /dev/null @@ -1,48 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_pst_timestamp_h2' - -type: 'pst_timestamp' - -timezone: 'PST' - -vector: 'TIMESTAMP_FIELD11' - -create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);' - -data: - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - -query: 'select timestamp_field11 from table1;' - -drop: 'DROP table table1;' - -values: - - '1518468335000' - - '1518468335000' - - '1518468335000' - - '1518468335000' - - '1518468335000' - - '1518468335000' - - '1518468335000' - - '1518468335000' - - '1518468335000' - - '1518468335000' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml deleted file mode 100644 index 8ca58c126b1ba..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_real_h2' - -type: 'real' - -vector: 'REAL_FIELD8' - -create: 'CREATE TABLE table1 (real_field8 REAL);' - -data: - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - - 'INSERT INTO table1 VALUES (56478356785.345);' - -query: 'select real_field8 from table1;' - -drop: 'DROP table table1;' - -values: - - '56478356785.345f' - - '56478356785.345f' - - '56478356785.345f' - - '56478356785.345f' - - '56478356785.345f' - - '56478356785.345f' - - '56478356785.345f' - - '56478356785.345f' - - '56478356785.345f' - - '56478356785.345f' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml deleted file mode 100644 index 60a4462272c7f..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml +++ /dev/null @@ -1,49 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_selected_datatypes_null_h2' - -type: 'selected_null_column' - -vectors: - - 'BIGINT_FIELD5' - - 'DECIMAL_FIELD6' - - 'DOUBLE_FIELD7' - - 'REAL_FIELD8' - - 'TIME_FIELD9' - - 'DATE_FIELD10' - - 'TIMESTAMP_FIELD11' - - 'BINARY_FIELD12' - - 'VARCHAR_FIELD13' - - 'BLOB_FIELD14' - - 'CLOB_FIELD15' - - 'CHAR_FIELD16' - - 'BIT_FIELD17' - - 'LIST_FIELD19' - - 'MAP_FIELD20' - -rowCount: '5' - -create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT, - decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP, - binary_field12 VARBINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(14), bit_field17 BIT, - list_field19 INT ARRAY, map_field20 VARCHAR(256));' - -data: - - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' - - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' - - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' - - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' - - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' - -query: 'select bigint_field5, decimal_field6, double_field7, real_field8, time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17, list_field19, map_field20 from table1' - -drop: 'DROP table table1;' \ No newline at end of file diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml deleted file mode 100644 index 2be83f8884ab9..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_smallint_h2' - -type: 'small_int' - -vector: 'SMALLINT_FIELD4' - -create: 'CREATE TABLE table1 (smallint_field4 SMALLINT);' - -data: - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - - 'INSERT INTO table1 VALUES (12000);' - -query: 'select smallint_field4 from table1;' - -drop: 'DROP table table1;' - -values: - - '12000' - - '12000' - - '12000' - - '12000' - - '12000' - - '12000' - - '12000' - - '12000' - - '12000' - - '12000' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml deleted file mode 100644 index 323c971b9c114..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_time_h2' - -type: 'time' - -vector: 'TIME_FIELD9' - -create: 'CREATE TABLE table1 (time_field9 TIME);' - -data: - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - - 'INSERT INTO table1 VALUES (''12:45:35'');' - -query: 'select time_field9 from table1;' - -drop: 'DROP table table1;' - -values: - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' - - '45935000' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml deleted file mode 100644 index 380bed20c0918..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_timestamp_h2' - -type: 'timestamp' - -vector: 'TIMESTAMP_FIELD11' - -create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);' - -data: - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' - -query: 'select timestamp_field11 from table1;' - -drop: 'DROP table table1;' - -values: - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' - - '1518439535000' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml deleted file mode 100644 index 04dad785539fb..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_tinyint_h2' - -type: 'tinyint' - -vector: 'TINYINT_FIELD3' - -create: 'CREATE TABLE table1 (tinyint_field3 TINYINT);' - -data: - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - - 'INSERT INTO table1 VALUES (45);' - -query: 'select tinyint_field3 from table1;' - -drop: 'DROP table table1;' - -values: - - '45' - - '45' - - '45' - - '45' - - '45' - - '45' - - '45' - - '45' - - '45' - - '45' diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml deleted file mode 100644 index 6e6768002cfae..0000000000000 --- a/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml +++ /dev/null @@ -1,46 +0,0 @@ -#Licensed to the Apache Software Foundation (ASF) under one or more contributor -#license agreements. See the NOTICE file distributed with this work for additional -#information regarding copyright ownership. The ASF licenses this file to -#You under the Apache License, Version 2.0 (the "License"); you may not use -#this file except in compliance with the License. You may obtain a copy of -#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required -#by applicable law or agreed to in writing, software distributed under the -#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS -#OF ANY KIND, either express or implied. See the License for the specific -#language governing permissions and limitations under the License. - -name: 'test1_varchar_h2' - -type: 'varchar' - -vector: 'VARCHAR_FIELD13' - -create: 'CREATE TABLE table1 (varchar_field13 VARCHAR(256));' - -data: - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' - -query: 'select varchar_field13 from table1;' - -drop: 'DROP table table1;' - -values: - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' - - 'some text that needs to be converted to varchar' diff --git a/java/adapter/jdbc/src/test/resources/logback.xml b/java/adapter/jdbc/src/test/resources/logback.xml deleted file mode 100644 index 4c54d18a210ff..0000000000000 --- a/java/adapter/jdbc/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/adapter/orc/CMakeLists.txt b/java/adapter/orc/CMakeLists.txt deleted file mode 100644 index d29856ff8cd5e..0000000000000 --- a/java/adapter/orc/CMakeLists.txt +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -find_package(Arrow REQUIRED) - -include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} - ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) - -add_jar(arrow_java_jni_orc_jar - src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java - src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java - src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java - src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java - src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java - src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java - GENERATE_NATIVE_HEADERS - arrow_java_jni_orc_headers - DESTINATION - ${JNI_HEADERS_DIR}) - -add_library(arrow_java_jni_orc SHARED src/main/cpp/jni_wrapper.cpp) -set_property(TARGET arrow_java_jni_orc PROPERTY OUTPUT_NAME "arrow_orc_jni") -target_link_libraries(arrow_java_jni_orc arrow_java_jni_orc_headers jni - Arrow::arrow_static) - -set(ARROW_JAVA_JNI_ORC_LIBDIR - "${CMAKE_INSTALL_PREFIX}/lib/arrow_orc_jni/${ARROW_JAVA_JNI_ARCH_DIR}") -set(ARROW_JAVA_JNI_ORC_BINDIR - "${CMAKE_INSTALL_PREFIX}/bin/arrow_orc_jni/${ARROW_JAVA_JNI_ARCH_DIR}") - -install(TARGETS arrow_java_jni_orc - LIBRARY DESTINATION ${ARROW_JAVA_JNI_ORC_LIBDIR} - RUNTIME DESTINATION ${ARROW_JAVA_JNI_ORC_BINDIR}) diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml deleted file mode 100644 index 6061feb4ad202..0000000000000 --- a/java/adapter/orc/pom.xml +++ /dev/null @@ -1,174 +0,0 @@ - - - - 4.0.0 - - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - ../../pom.xml - - - org.apache.arrow.orc - arrow-orc - jar - Arrow Orc Adapter - (Experimental/Contrib)A JNI wrapper for the C++ ORC reader implementation. - - - ../../../cpp/release-build/ - - - - - org.apache.arrow - arrow-memory-core - compile - - - org.apache.arrow - arrow-memory-netty - runtime - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - compile - - - org.immutables - value-annotations - - - org.apache.orc - orc-core - 1.9.5 - test - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - commons-logging - commons-logging - - - - - org.apache.hadoop - hadoop-client-runtime - ${dep.hadoop.version} - test - - - commons-logging - commons-logging - - - - - org.apache.hadoop - hadoop-client-api - ${dep.hadoop.version} - - - org.apache.hadoop - hadoop-common - ${dep.hadoop.version} - test - - - commons-logging - commons-logging - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - javax.servlet - servlet-api - - - org.slf4j - slf4j-reload4j - - - ch.qos.reload4j - reload4j - - - - - org.apache.hive - hive-storage-api - 4.0.1 - test - - - - - - - ${arrow.cpp.build.dir} - - **/*arrow_orc_jni.* - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - - - - org.apache.arrow:arrow-format - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Werror - - - - - - diff --git a/java/adapter/orc/src/main/cpp/concurrent_map.h b/java/adapter/orc/src/main/cpp/concurrent_map.h deleted file mode 100644 index b560886628bb2..0000000000000 --- a/java/adapter/orc/src/main/cpp/concurrent_map.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - */ - -#pragma once - -#include -#include -#include -#include - -#include "arrow/util/macros.h" - -namespace arrow { -namespace jni { - -/** - * An utility class that map module id to module pointers. - * @tparam Holder class of the object to hold. - */ -template -class ConcurrentMap { - public: - ConcurrentMap() : module_id_(init_module_id_) {} - - jlong Insert(Holder holder) { - std::lock_guard lock(mtx_); - jlong result = module_id_++; - map_.insert(std::pair(result, holder)); - return result; - } - - void Erase(jlong module_id) { - std::lock_guard lock(mtx_); - map_.erase(module_id); - } - - Holder Lookup(jlong module_id) { - std::lock_guard lock(mtx_); - auto it = map_.find(module_id); - if (it != map_.end()) { - return it->second; - } - return NULLPTR; - } - - void Clear() { - std::lock_guard lock(mtx_); - map_.clear(); - } - - private: - // Initialize the module id starting value to a number greater than zero - // to allow for easier debugging of uninitialized java variables. - static constexpr int init_module_id_ = 4; - - int64_t module_id_; - std::mutex mtx_; - // map from module ids returned to Java and module pointers - std::unordered_map map_; -}; - -} // namespace jni -} // namespace arrow diff --git a/java/adapter/orc/src/main/cpp/jni_wrapper.cpp b/java/adapter/orc/src/main/cpp/jni_wrapper.cpp deleted file mode 100644 index 6acf1084c3337..0000000000000 --- a/java/adapter/orc/src/main/cpp/jni_wrapper.cpp +++ /dev/null @@ -1,306 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "org_apache_arrow_adapter_orc_OrcMemoryJniWrapper.h" -#include "org_apache_arrow_adapter_orc_OrcReaderJniWrapper.h" -#include "org_apache_arrow_adapter_orc_OrcStripeReaderJniWrapper.h" - -#include "./concurrent_map.h" - -using ORCFileReader = arrow::adapters::orc::ORCFileReader; -using RecordBatchReader = arrow::RecordBatchReader; - -static jclass io_exception_class; -static jclass illegal_access_exception_class; -static jclass illegal_argument_exception_class; - -static jclass orc_field_node_class; -static jmethodID orc_field_node_constructor; - -static jclass orc_memory_class; -static jmethodID orc_memory_constructor; - -static jclass record_batch_class; -static jmethodID record_batch_constructor; - -static jint JNI_VERSION = JNI_VERSION_10; - -using arrow::internal::checked_cast; -using arrow::jni::ConcurrentMap; - -static ConcurrentMap> buffer_holder_; -static ConcurrentMap> orc_stripe_reader_holder_; -static ConcurrentMap> orc_reader_holder_; - -jclass CreateGlobalClassReference(JNIEnv* env, const char* class_name) { - jclass local_class = env->FindClass(class_name); - jclass global_class = (jclass)env->NewGlobalRef(local_class); - env->DeleteLocalRef(local_class); - return global_class; -} - -jmethodID GetMethodID(JNIEnv* env, jclass this_class, const char* name, const char* sig) { - jmethodID ret = env->GetMethodID(this_class, name, sig); - if (ret == nullptr) { - std::string error_message = "Unable to find method " + std::string(name) + - " within signature" + std::string(sig); - env->ThrowNew(illegal_access_exception_class, error_message.c_str()); - } - - return ret; -} - -std::string JStringToCString(JNIEnv* env, jstring string) { - int32_t jlen, clen; - clen = env->GetStringUTFLength(string); - jlen = env->GetStringLength(string); - std::vector buffer(clen); - env->GetStringUTFRegion(string, 0, jlen, buffer.data()); - return std::string(buffer.data(), clen); -} - -std::shared_ptr GetFileReader(JNIEnv* env, jlong id) { - auto reader = orc_reader_holder_.Lookup(id); - if (!reader) { - std::string error_message = "invalid reader id " + std::to_string(id); - env->ThrowNew(illegal_argument_exception_class, error_message.c_str()); - } - - return reader; -} - -std::shared_ptr GetStripeReader(JNIEnv* env, jlong id) { - auto reader = orc_stripe_reader_holder_.Lookup(id); - if (!reader) { - std::string error_message = "invalid stripe reader id " + std::to_string(id); - env->ThrowNew(illegal_argument_exception_class, error_message.c_str()); - } - - return reader; -} - -#ifdef __cplusplus -extern "C" { -#endif - -jint JNI_OnLoad(JavaVM* vm, void* reserved) { - JNIEnv* env; - if (vm->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) { - return JNI_ERR; - } - - io_exception_class = CreateGlobalClassReference(env, "Ljava/io/IOException;"); - illegal_access_exception_class = - CreateGlobalClassReference(env, "Ljava/lang/IllegalAccessException;"); - illegal_argument_exception_class = - CreateGlobalClassReference(env, "Ljava/lang/IllegalArgumentException;"); - - orc_field_node_class = - CreateGlobalClassReference(env, "Lorg/apache/arrow/adapter/orc/OrcFieldNode;"); - orc_field_node_constructor = GetMethodID(env, orc_field_node_class, "", "(II)V"); - - orc_memory_class = CreateGlobalClassReference( - env, "Lorg/apache/arrow/adapter/orc/OrcMemoryJniWrapper;"); - orc_memory_constructor = GetMethodID(env, orc_memory_class, "", "(JJJJ)V"); - - record_batch_class = - CreateGlobalClassReference(env, "Lorg/apache/arrow/adapter/orc/OrcRecordBatch;"); - record_batch_constructor = - GetMethodID(env, record_batch_class, "", - "(I[Lorg/apache/arrow/adapter/orc/OrcFieldNode;" - "[Lorg/apache/arrow/adapter/orc/OrcMemoryJniWrapper;)V"); - - env->ExceptionDescribe(); - - return JNI_VERSION; -} - -void JNI_OnUnload(JavaVM* vm, void* reserved) { - JNIEnv* env; - vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); - env->DeleteGlobalRef(io_exception_class); - env->DeleteGlobalRef(illegal_access_exception_class); - env->DeleteGlobalRef(illegal_argument_exception_class); - env->DeleteGlobalRef(orc_field_node_class); - env->DeleteGlobalRef(orc_memory_class); - env->DeleteGlobalRef(record_batch_class); - - buffer_holder_.Clear(); - orc_stripe_reader_holder_.Clear(); - orc_reader_holder_.Clear(); -} - -JNIEXPORT jlong JNICALL Java_org_apache_arrow_adapter_orc_OrcReaderJniWrapper_open( - JNIEnv* env, jobject this_obj, jstring file_path) { - std::string path = JStringToCString(env, file_path); - - if (path.find("hdfs://") == 0) { - env->ThrowNew(io_exception_class, "hdfs path not supported yet."); - } - auto maybe_file = arrow::io::ReadableFile::Open(path); - - if (!maybe_file.ok()) { - return -static_cast(maybe_file.status().code()); - } - auto maybe_reader = ORCFileReader::Open(*maybe_file, arrow::default_memory_pool()); - if (!maybe_reader.ok()) { - env->ThrowNew(io_exception_class, std::string("Failed open file" + path).c_str()); - } - return orc_reader_holder_.Insert( - std::shared_ptr(*std::move(maybe_reader))); -} - -JNIEXPORT void JNICALL Java_org_apache_arrow_adapter_orc_OrcReaderJniWrapper_close( - JNIEnv* env, jobject this_obj, jlong id) { - orc_reader_holder_.Erase(id); -} - -JNIEXPORT jboolean JNICALL Java_org_apache_arrow_adapter_orc_OrcReaderJniWrapper_seek( - JNIEnv* env, jobject this_obj, jlong id, jint row_number) { - auto reader = GetFileReader(env, id); - return reader->Seek(row_number).ok(); -} - -JNIEXPORT jint JNICALL -Java_org_apache_arrow_adapter_orc_OrcReaderJniWrapper_getNumberOfStripes(JNIEnv* env, - jobject this_obj, - jlong id) { - auto reader = GetFileReader(env, id); - return reader->NumberOfStripes(); -} - -JNIEXPORT jlong JNICALL -Java_org_apache_arrow_adapter_orc_OrcReaderJniWrapper_nextStripeReader(JNIEnv* env, - jobject this_obj, - jlong id, - jlong batch_size) { - auto reader = GetFileReader(env, id); - - auto maybe_stripe_reader = reader->NextStripeReader(batch_size); - if (!maybe_stripe_reader.ok()) { - return static_cast(maybe_stripe_reader.status().code()) * -1; - } - if (*maybe_stripe_reader == nullptr) { - return static_cast(arrow::StatusCode::Invalid) * -1; - } - - return orc_stripe_reader_holder_.Insert(*maybe_stripe_reader); -} - -JNIEXPORT jbyteArray JNICALL -Java_org_apache_arrow_adapter_orc_OrcStripeReaderJniWrapper_getSchema(JNIEnv* env, - jclass this_cls, - jlong id) { - auto stripe_reader = GetStripeReader(env, id); - - auto schema = stripe_reader->schema(); - - auto maybe_buffer = arrow::ipc::SerializeSchema(*schema, arrow::default_memory_pool()); - if (!maybe_buffer.ok()) { - return nullptr; - } - auto buffer = *std::move(maybe_buffer); - - jbyteArray ret = env->NewByteArray(buffer->size()); - auto src = reinterpret_cast(buffer->data()); - env->SetByteArrayRegion(ret, 0, buffer->size(), src); - return ret; -} - -JNIEXPORT jobject JNICALL -Java_org_apache_arrow_adapter_orc_OrcStripeReaderJniWrapper_next(JNIEnv* env, - jclass this_cls, - jlong id) { - auto stripe_reader = GetStripeReader(env, id); - - std::shared_ptr record_batch; - auto status = stripe_reader->ReadNext(&record_batch); - if (!status.ok() || !record_batch) { - return nullptr; - } - - auto schema = stripe_reader->schema(); - - // TODO: ARROW-4714 Ensure JVM has sufficient capacity to create local references - // create OrcFieldNode[] - jobjectArray field_array = - env->NewObjectArray(schema->num_fields(), orc_field_node_class, nullptr); - - std::vector> buffers; - for (int i = 0; i < schema->num_fields(); ++i) { - auto column = record_batch->column(i); - auto dataArray = column->data(); - jobject field = env->NewObject(orc_field_node_class, orc_field_node_constructor, - column->length(), column->null_count()); - env->SetObjectArrayElement(field_array, i, field); - - for (auto& buffer : dataArray->buffers) { - buffers.push_back(buffer); - } - } - - // create OrcMemoryJniWrapper[] - jobjectArray memory_array = - env->NewObjectArray(buffers.size(), orc_memory_class, nullptr); - - for (size_t j = 0; j < buffers.size(); ++j) { - auto buffer = buffers[j]; - uint8_t* data = nullptr; - int size = 0; - int64_t capacity = 0; - if (buffer != nullptr) { - data = (uint8_t*)buffer->data(); - size = (int)buffer->size(); - capacity = buffer->capacity(); - } - jobject memory = env->NewObject(orc_memory_class, orc_memory_constructor, - buffer_holder_.Insert(buffer), data, size, capacity); - env->SetObjectArrayElement(memory_array, j, memory); - } - - // create OrcRecordBatch - jobject ret = env->NewObject(record_batch_class, record_batch_constructor, - record_batch->num_rows(), field_array, memory_array); - - return ret; -} - -JNIEXPORT void JNICALL Java_org_apache_arrow_adapter_orc_OrcStripeReaderJniWrapper_close( - JNIEnv* env, jclass this_cls, jlong id) { - orc_stripe_reader_holder_.Erase(id); -} - -JNIEXPORT void JNICALL Java_org_apache_arrow_adapter_orc_OrcMemoryJniWrapper_release( - JNIEnv* env, jobject this_obj, jlong id) { - buffer_holder_.Erase(id); -} - -#ifdef __cplusplus -} -#endif diff --git a/java/adapter/orc/src/main/java/module-info.java b/java/adapter/orc/src/main/java/module-info.java deleted file mode 100644 index d18a978e93fa8..0000000000000 --- a/java/adapter/orc/src/main/java/module-info.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -open module org.apache.arrow.adapter.orc { - exports org.apache.arrow.adapter.orc; - - requires hadoop.client.api; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java deleted file mode 100644 index faf48e19445ae..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -/** Metadata about Vectors/Arrays that is passed via JNI interface. */ -class OrcFieldNode { - - private final int length; - private final int nullCount; - - /** - * Construct a new instance. - * - * @param length the number of values written. - * @param nullCount the number of null values. - */ - public OrcFieldNode(int length, int nullCount) { - this.length = length; - this.nullCount = nullCount; - } - - int getLength() { - return length; - } - - int getNullCount() { - return nullCount; - } -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java deleted file mode 100644 index 692b0c061839c..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; -import java.util.Locale; - -/** Helper class for JNI related operations. */ -class OrcJniUtils { - private static final String LIBRARY_NAME = "arrow_orc_jni"; - private static boolean isLoaded = false; - - private OrcJniUtils() {} - - static void loadOrcAdapterLibraryFromJar() throws IOException, IllegalAccessException { - synchronized (OrcJniUtils.class) { - if (!isLoaded) { - final String libraryToLoad = - LIBRARY_NAME + "/" + getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME); - final File libraryFile = - moveFileFromJarToTemp( - System.getProperty("java.io.tmpdir"), libraryToLoad, LIBRARY_NAME); - System.load(libraryFile.getAbsolutePath()); - isLoaded = true; - } - } - } - - private static String getNormalizedArch() { - String arch = System.getProperty("os.arch").toLowerCase(Locale.US); - switch (arch) { - case "amd64": - arch = "x86_64"; - break; - case "aarch64": - arch = "aarch_64"; - break; - default: - break; - } - return arch; - } - - private static File moveFileFromJarToTemp( - final String tmpDir, String libraryToLoad, String libraryName) throws IOException { - final File temp = File.createTempFile(tmpDir, libraryName); - try (final InputStream is = - OrcReaderJniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) { - if (is == null) { - throw new FileNotFoundException(libraryToLoad); - } else { - Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); - } - } - return temp; - } -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java deleted file mode 100644 index 70f2a655654c6..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -/** Wrapper for orc memory allocated by native code. */ -class OrcMemoryJniWrapper implements AutoCloseable { - - private final long nativeInstanceId; - - private final long memoryAddress; - - private final long size; - - private final long capacity; - - /** - * Construct a new instance. - * - * @param nativeInstanceId unique id of the underlying memory. - * @param memoryAddress starting memory address of the underlying memory. - * @param size size of the valid data. - * @param capacity allocated memory size. - */ - OrcMemoryJniWrapper(long nativeInstanceId, long memoryAddress, long size, long capacity) { - this.nativeInstanceId = nativeInstanceId; - this.memoryAddress = memoryAddress; - this.size = size; - this.capacity = capacity; - } - - /** - * Return the size of underlying chunk of memory that has valid data. - * - * @return valid data size - */ - long getSize() { - return size; - } - - /** - * Return the size of underlying chunk of memory managed by this OrcMemoryJniWrapper. - * - * @return underlying memory size - */ - long getCapacity() { - return capacity; - } - - /** - * Return the memory address of underlying chunk of memory. - * - * @return memory address - */ - long getMemoryAddress() { - return memoryAddress; - } - - @Override - public void close() { - release(nativeInstanceId); - } - - private native void release(long id); -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java deleted file mode 100644 index ca9b44e7e8123..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -import java.io.IOException; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.ArrowReader; - -/** - * Orc Reader that allow accessing orc stripes in Orc file. This orc reader basically acts like an - * ArrowReader iterator that iterate over orc stripes. Each stripe will be accessed via an - * ArrowReader. - */ -public class OrcReader implements AutoCloseable { - private final OrcReaderJniWrapper jniWrapper; - private BufferAllocator allocator; - - /** reference to native reader instance. */ - private final long nativeInstanceId; - - /** - * Create an OrcReader that iterate over orc stripes. - * - * @param filePath file path to target file, currently only support local file. - * @param allocator allocator provided to ArrowReader. - * @throws IOException throws exception in case of file not found - */ - public OrcReader(String filePath, BufferAllocator allocator) - throws IOException, IllegalAccessException { - this.allocator = allocator; - this.jniWrapper = OrcReaderJniWrapper.getInstance(); - this.nativeInstanceId = jniWrapper.open(filePath); - } - - /** - * Seek to designated row. Invoke NextStripeReader() after seek will return stripe reader starting - * from designated row. - * - * @param rowNumber the rows number to seek - * @return true if seek operation is succeeded - */ - public boolean seek(int rowNumber) throws IllegalArgumentException { - return jniWrapper.seek(nativeInstanceId, rowNumber); - } - - /** - * Get a stripe level ArrowReader with specified batchSize in each record batch. - * - * @param batchSize the number of rows loaded on each iteration - * @return ArrowReader that iterate over current stripes - */ - public ArrowReader nextStripeReader(long batchSize) throws IllegalArgumentException { - long stripeReaderId = jniWrapper.nextStripeReader(nativeInstanceId, batchSize); - if (stripeReaderId < 0) { - return null; - } - - return new OrcStripeReader(stripeReaderId, allocator); - } - - /** - * The number of stripes in the file. - * - * @return number of stripes - */ - public int getNumberOfStripes() throws IllegalArgumentException { - return jniWrapper.getNumberOfStripes(nativeInstanceId); - } - - @Override - public void close() { - jniWrapper.close(nativeInstanceId); - } -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java deleted file mode 100644 index be57485005fbf..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -import java.io.IOException; - -/** JNI wrapper for Orc reader. */ -class OrcReaderJniWrapper { - - private static volatile OrcReaderJniWrapper INSTANCE; - - static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessException { - if (INSTANCE == null) { - synchronized (OrcReaderJniWrapper.class) { - if (INSTANCE == null) { - OrcJniUtils.loadOrcAdapterLibraryFromJar(); - INSTANCE = new OrcReaderJniWrapper(); - } - } - } - - return INSTANCE; - } - - /** - * Construct a orc file reader over the target file. - * - * @param fileName absolute file path of target file - * @return id of the orc reader instance if file opened successfully, otherwise return error code - * * -1. - */ - native long open(String fileName); - - /** - * Release resources associated with designated reader instance. - * - * @param readerId id of the reader instance. - */ - native void close(long readerId); - - /** - * Seek to designated row. Invoke nextStripeReader() after seek will return id of stripe reader - * starting from designated row. - * - * @param readerId id of the reader instance - * @param rowNumber the rows number to seek - * @return true if seek operation is succeeded - */ - native boolean seek(long readerId, int rowNumber); - - /** - * The number of stripes in the file. - * - * @param readerId id of the reader instance - * @return number of stripes - */ - native int getNumberOfStripes(long readerId); - - /** - * Get a stripe level ArrowReader with specified batchSize in each record batch. - * - * @param readerId id of the reader instance - * @param batchSize the number of rows loaded on each iteration - * @return id of the stripe reader instance. - */ - native long nextStripeReader(long readerId, long batchSize); -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java deleted file mode 100644 index f78898df2205d..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -import java.util.Arrays; -import java.util.List; - -/** Wrapper for record batch meta and native memory. */ -class OrcRecordBatch { - final int length; - - /** Nodes correspond to the pre-ordered flattened logical schema. */ - final List nodes; - - final List buffers; - - /** - * Construct a new instance. - * - * @param length number of records included in current batch - * @param nodes meta data for each fields - * @param buffers buffers for underlying data - */ - OrcRecordBatch(int length, OrcFieldNode[] nodes, OrcMemoryJniWrapper[] buffers) { - this.length = length; - this.nodes = Arrays.asList(nodes); - this.buffers = Arrays.asList(buffers); - } -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java deleted file mode 100644 index 38233a0493bef..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OwnershipTransferResult; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.util.Preconditions; - -/** - * A simple reference manager implementation for memory allocated by native code. The underlying - * memory will be released when reference count reach zero. - */ -public class OrcReferenceManager implements ReferenceManager { - private final AtomicInteger bufRefCnt = new AtomicInteger(0); - - private OrcMemoryJniWrapper memory; - - OrcReferenceManager(OrcMemoryJniWrapper memory) { - this.memory = memory; - } - - @Override - public int getRefCount() { - return bufRefCnt.get(); - } - - @Override - public boolean release() { - return release(1); - } - - @Override - public boolean release(int decrement) { - Preconditions.checkState( - decrement >= 1, "ref count decrement should be greater than or equal to 1"); - // decrement the ref count - final int refCnt; - synchronized (this) { - refCnt = bufRefCnt.addAndGet(-decrement); - if (refCnt == 0) { - // refcount of this reference manager has dropped to 0 - // release the underlying memory - memory.close(); - } - } - // the new ref count should be >= 0 - Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative"); - return refCnt == 0; - } - - @Override - public void retain() { - retain(1); - } - - @Override - public void retain(int increment) { - Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment); - bufRefCnt.addAndGet(increment); - } - - @Override - public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) { - retain(); - return srcBuffer; - } - - @Override - public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) { - final long derivedBufferAddress = sourceBuffer.memoryAddress() + index; - - // create new ArrowBuf - final ArrowBuf derivedBuf = - new ArrowBuf( - this, - null, - length, // length (in bytes) in the underlying memory chunk for this new ArrowBuf - derivedBufferAddress // starting byte address in the underlying memory for this new - // ArrowBuf, - ); - - return derivedBuf; - } - - @Override - public OwnershipTransferResult transferOwnership( - ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { - throw new UnsupportedOperationException(); - } - - @Override - public BufferAllocator getAllocator() { - return null; - } - - @Override - public long getSize() { - return memory.getSize(); - } - - @Override - public long getAccountedSize() { - return 0; - } -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java deleted file mode 100644 index 52f5cf429a48d..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.stream.Collectors; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.MessageChannelReader; -import org.apache.arrow.vector.ipc.message.MessageResult; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; - -/** Orc stripe that load data into ArrowRecordBatch. */ -public class OrcStripeReader extends ArrowReader { - /** reference to native stripe reader instance. */ - private final long nativeInstanceId; - - /** - * Construct a new instance. - * - * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by calling - * nextStripeReader from OrcReaderJniWrapper - * @param allocator memory allocator for accounting. - */ - OrcStripeReader(long nativeInstanceId, BufferAllocator allocator) { - super(allocator); - this.nativeInstanceId = nativeInstanceId; - } - - @Override - public boolean loadNextBatch() throws IOException { - OrcRecordBatch recordBatch = OrcStripeReaderJniWrapper.next(nativeInstanceId); - if (recordBatch == null) { - return false; - } - - ArrayList buffers = new ArrayList<>(); - for (OrcMemoryJniWrapper buffer : recordBatch.buffers) { - buffers.add( - new ArrowBuf( - new OrcReferenceManager(buffer), - null, - (int) buffer.getSize(), - buffer.getMemoryAddress())); - } - - loadRecordBatch( - new ArrowRecordBatch( - recordBatch.length, - recordBatch.nodes.stream() - .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount())) - .collect(Collectors.toList()), - buffers)); - return true; - } - - @Override - public long bytesRead() { - return 0; - } - - @Override - protected void closeReadSource() throws IOException { - OrcStripeReaderJniWrapper.close(nativeInstanceId); - } - - @Override - protected Schema readSchema() throws IOException { - byte[] schemaBytes = OrcStripeReaderJniWrapper.getSchema(nativeInstanceId); - - try (MessageChannelReader schemaReader = - new MessageChannelReader( - new ReadChannel(new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) { - - MessageResult result = schemaReader.readNext(); - if (result == null) { - throw new IOException("Unexpected end of input. Missing schema."); - } - - return MessageSerializer.deserializeSchema(result.getMessage()); - } - } -} diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java deleted file mode 100644 index e7b691087fb96..0000000000000 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -/** JNI wrapper for orc stripe reader. */ -class OrcStripeReaderJniWrapper { - - /** - * Get the schema of current stripe. - * - * @param readerId id of the stripe reader instance. - * @return serialized schema. - */ - static native byte[] getSchema(long readerId); - - /** - * Load next record batch. - * - * @param readerId id of the stripe reader instance. - * @return loaded record batch, return null when reached the end of current stripe. - */ - static native OrcRecordBatch next(long readerId); - - /** - * Release resources of underlying reader. - * - * @param readerId id of the stripe reader instance. - */ - static native void close(long readerId); -} diff --git a/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java b/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java deleted file mode 100644 index f8eb91a1cc758..0000000000000 --- a/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.orc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; - -import java.io.File; -import java.nio.charset.StandardCharsets; -import java.util.List; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.orc.OrcFile; -import org.apache.orc.TypeDescription; -import org.apache.orc.Writer; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class OrcReaderTest { - - @TempDir public File testFolder; - - private static final int MAX_ALLOCATION = 8 * 1024; - private static RootAllocator allocator; - - @BeforeAll - public static void beforeClass() { - allocator = new RootAllocator(MAX_ALLOCATION); - } - - @Test - public void testOrcJniReader() throws Exception { - TypeDescription schema = TypeDescription.fromString("struct"); - File testFile = new File(testFolder, "test-orc"); - - Writer writer = - OrcFile.createWriter( - new Path(testFile.getAbsolutePath()), - OrcFile.writerOptions(new Configuration()).setSchema(schema)); - VectorizedRowBatch batch = schema.createRowBatch(); - LongColumnVector longColumnVector = (LongColumnVector) batch.cols[0]; - BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[1]; - for (int r = 0; r < 1024; ++r) { - int row = batch.size++; - longColumnVector.vector[row] = r; - byte[] buffer = ("Last-" + (r * 3)).getBytes(StandardCharsets.UTF_8); - bytesColumnVector.setRef(row, buffer, 0, buffer.length); - } - writer.addRowBatch(batch); - writer.close(); - - OrcReader reader = new OrcReader(testFile.getAbsolutePath(), allocator); - assertEquals(1, reader.getNumberOfStripes()); - - ArrowReader stripeReader = reader.nextStripeReader(1024); - VectorSchemaRoot schemaRoot = stripeReader.getVectorSchemaRoot(); - stripeReader.loadNextBatch(); - - List fields = schemaRoot.getFieldVectors(); - assertEquals(2, fields.size()); - - IntVector intVector = (IntVector) fields.get(0); - VarCharVector varCharVector = (VarCharVector) fields.get(1); - for (int i = 0; i < 1024; ++i) { - assertEquals(i, intVector.get(i)); - assertEquals("Last-" + (i * 3), new String(varCharVector.get(i), StandardCharsets.UTF_8)); - } - - assertFalse(stripeReader.loadNextBatch()); - assertNull(reader.nextStripeReader(1024)); - - stripeReader.close(); - reader.close(); - } -} diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml deleted file mode 100644 index 898c2605b6281..0000000000000 --- a/java/algorithm/pom.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - arrow-algorithm - Arrow Algorithms - (Experimental/Contrib) A collection of algorithms for working with ValueVectors. - - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - org.apache.arrow - arrow-vector - ${project.version} - test-jar - test - - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - arrow-memory-netty - test - - - org.immutables - value-annotations - - - diff --git a/java/algorithm/src/main/java/module-info.java b/java/algorithm/src/main/java/module-info.java deleted file mode 100644 index b347f55aa4d00..0000000000000 --- a/java/algorithm/src/main/java/module-info.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.algorithm { - exports org.apache.arrow.algorithm.search; - exports org.apache.arrow.algorithm.deduplicate; - exports org.apache.arrow.algorithm.dictionary; - exports org.apache.arrow.algorithm.rank; - exports org.apache.arrow.algorithm.misc; - exports org.apache.arrow.algorithm.sort; - - requires jdk.unsupported; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java deleted file mode 100644 index e9364b2a85b7b..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.deduplicate; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; - -/** Utilities for vector deduplication. */ -class DeduplicationUtils { - - /** - * Gets the start positions of the first distinct values in a vector. - * - * @param vector the target vector. - * @param runStarts the bit set to hold the start positions. - * @param vector type. - */ - public static void populateRunStartIndicators( - V vector, ArrowBuf runStarts) { - int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount()); - Preconditions.checkArgument(runStarts.capacity() >= bufSize); - runStarts.setZero(0, bufSize); - - BitVectorHelper.setBit(runStarts, 0); - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, vector, null); - Range range = new Range(0, 0, 1); - for (int i = 1; i < vector.getValueCount(); i++) { - range.setLeftStart(i).setRightStart(i - 1); - if (!visitor.rangeEquals(range)) { - BitVectorHelper.setBit(runStarts, i); - } - } - } - - /** - * Gets the run lengths, given the start positions. - * - * @param runStarts the bit set for start positions. - * @param runLengths the run length vector to populate. - * @param valueCount the number of values in the bit set. - */ - public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths, int valueCount) { - int curStart = 0; - int lengthIndex = 0; - for (int i = 1; i < valueCount; i++) { - if (BitVectorHelper.get(runStarts, i) != 0) { - // we get a new distinct value - runLengths.setSafe(lengthIndex++, i - curStart); - curStart = i; - } - } - - // process the last value - runLengths.setSafe(lengthIndex++, valueCount - curStart); - runLengths.setValueCount(lengthIndex); - } - - /** - * Gets distinct values from the input vector by removing adjacent duplicated values. - * - * @param indicators the bit set containing the start positions of distinct values. - * @param inputVector the input vector. - * @param outputVector the output vector. - * @param vector type. - */ - public static void populateDeduplicatedValues( - ArrowBuf indicators, V inputVector, V outputVector) { - int dstIdx = 0; - for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) { - if (BitVectorHelper.get(indicators, srcIdx) != 0) { - outputVector.copyFromSafe(srcIdx, dstIdx++, inputVector); - } - } - outputVector.setValueCount(dstIdx); - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java deleted file mode 100644 index 4e49de14f5956..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.deduplicate; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; - -/** - * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated - * values in the vector. - * - * @param vector type. - */ -public class VectorRunDeduplicator implements AutoCloseable { - - /** - * Bit set for distinct values. If the value at some index is not equal to the previous value, its - * bit is set to 1, otherwise its bit is set to 0. - */ - private ArrowBuf distinctValueBuffer; - - /** The vector to deduplicate. */ - private final V vector; - - private final BufferAllocator allocator; - - /** - * Constructs a vector run deduplicator for a given vector. - * - * @param vector the vector to deduplicate. Ownership is NOT taken. - * @param allocator the allocator used for allocating buffers for start indices. - */ - public VectorRunDeduplicator(V vector, BufferAllocator allocator) { - this.vector = vector; - this.allocator = allocator; - } - - private void createDistinctValueBuffer() { - Preconditions.checkArgument(distinctValueBuffer == null); - int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount()); - distinctValueBuffer = allocator.buffer(bufSize); - DeduplicationUtils.populateRunStartIndicators(vector, distinctValueBuffer); - } - - /** - * Gets the number of values which are different from their predecessor. - * - * @return the run count. - */ - public int getRunCount() { - if (distinctValueBuffer == null) { - createDistinctValueBuffer(); - } - return vector.getValueCount() - - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); - } - - /** - * Gets the vector with deduplicated adjacent values removed. - * - * @param outVector the output vector. - */ - public void populateDeduplicatedValues(V outVector) { - if (distinctValueBuffer == null) { - createDistinctValueBuffer(); - } - - DeduplicationUtils.populateDeduplicatedValues(distinctValueBuffer, vector, outVector); - } - - /** - * Gets the length of each distinct value. - * - * @param lengthVector the vector for holding length values. - */ - public void populateRunLengths(IntVector lengthVector) { - if (distinctValueBuffer == null) { - createDistinctValueBuffer(); - } - - DeduplicationUtils.populateRunLengths( - distinctValueBuffer, lengthVector, vector.getValueCount()); - } - - @Override - public void close() { - if (distinctValueBuffer != null) { - distinctValueBuffer.close(); - distinctValueBuffer = null; - } - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java deleted file mode 100644 index 88c4e4dc65450..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import org.apache.arrow.vector.ValueVector; - -/** - * A dictionary builder is intended for the scenario frequently encountered in practice: the - * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value - * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected, - * otherwise, it is added to the dictionary. - * - *

The dictionary builder is intended to build a single dictionary. So it cannot be used for - * different dictionaries. - * - *

Below gives the sample code for using the dictionary builder - * - *

{@code
- * DictionaryBuilder dictionaryBuilder = ...
- * ...
- * dictionaryBuild.addValue(newValue);
- * ...
- * }
- * - *

With the above code, the dictionary vector will be populated, and it can be retrieved by the - * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed - * with the populated dictionary.. - * - * @param the dictionary vector type. - */ -public interface DictionaryBuilder { - - /** - * Try to add all values from the target vector to the dictionary. - * - * @param targetVector the target vector containing values to probe. - * @return the number of values actually added to the dictionary. - */ - int addValues(V targetVector); - - /** - * Try to add an element from the target vector to the dictionary. - * - * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. - * @return the index of the new element in the dictionary. - */ - int addValue(V targetVector, int targetIndex); - - /** - * Gets the dictionary built. - * - * @return the dictionary. - */ - V getDictionary(); -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java deleted file mode 100644 index 16e27c3a23e72..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.ValueVector; - -/** - * A dictionary encoder translates one vector into another one based on a dictionary vector. - * According to Arrow specification, the encoded vector must be an integer based vector, which is - * the index of the original vector element in the dictionary. - * - * @param type of the encoded vector. - * @param type of the vector to encode. It is also the type of the dictionary vector. - */ -public interface DictionaryEncoder { - - /** - * Translates an input vector into an output vector. - * - * @param input the input vector. - * @param output the output vector. Note that it must be in a fresh state. At least, all its - * validity bits should be clear. - */ - void encode(D input, E output); -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java deleted file mode 100644 index e090682dbe0e8..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableBasedDictionaryBuilder.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import java.util.HashMap; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.vector.ElementAddressableVector; - -/** - * This class builds the dictionary based on a hash table. Each add operation can be finished in - * O(1) time, where n is the current dictionary size. - * - * @param the dictionary vector type. - */ -public class HashTableBasedDictionaryBuilder - implements DictionaryBuilder { - - /** The dictionary to be built. */ - private final V dictionary; - - /** If null should be encoded. */ - private final boolean encodeNull; - - /** - * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, - * whereas the value is the index in the dictionary. - */ - private HashMap hashMap = new HashMap<>(); - - /** The hasher used for calculating the hash code. */ - private final ArrowBufHasher hasher; - - /** Next pointer to try to add to the hash table. */ - private ArrowBufPointer nextPointer; - - /** - * Constructs a hash table based dictionary builder. - * - * @param dictionary the dictionary to populate. - */ - public HashTableBasedDictionaryBuilder(V dictionary) { - this(dictionary, false); - } - - /** - * Constructs a hash table based dictionary builder. - * - * @param dictionary the dictionary to populate. - * @param encodeNull if null values should be added to the dictionary. - */ - public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) { - this(dictionary, encodeNull, SimpleHasher.INSTANCE); - } - - /** - * Constructs a hash table based dictionary builder. - * - * @param dictionary the dictionary to populate. - * @param encodeNull if null values should be added to the dictionary. - * @param hasher the hasher used to compute the hash code. - */ - public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) { - this.dictionary = dictionary; - this.encodeNull = encodeNull; - this.hasher = hasher; - this.nextPointer = new ArrowBufPointer(hasher); - } - - /** - * Gets the dictionary built. - * - * @return the dictionary. - */ - @Override - public V getDictionary() { - return dictionary; - } - - /** - * Try to add all values from the target vector to the dictionary. - * - * @param targetVector the target vector containing values to probe. - * @return the number of values actually added to the dictionary. - */ - @Override - public int addValues(V targetVector) { - int oldDictSize = dictionary.getValueCount(); - for (int i = 0; i < targetVector.getValueCount(); i++) { - if (!encodeNull && targetVector.isNull(i)) { - continue; - } - addValue(targetVector, i); - } - - return dictionary.getValueCount() - oldDictSize; - } - - /** - * Try to add an element from the target vector to the dictionary. - * - * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. - * @return the index of the new element in the dictionary. - */ - @Override - public int addValue(V targetVector, int targetIndex) { - targetVector.getDataPointer(targetIndex, nextPointer); - - Integer index = hashMap.get(nextPointer); - if (index == null) { - // a new dictionary element is found - - // insert it to the dictionary - int dictSize = dictionary.getValueCount(); - dictionary.copyFromSafe(targetIndex, dictSize, targetVector); - dictionary.setValueCount(dictSize + 1); - dictionary.getDataPointer(dictSize, nextPointer); - - // insert it to the hash map - hashMap.put(nextPointer, dictSize); - nextPointer = new ArrowBufPointer(hasher); - - return dictSize; - } - return index; - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java deleted file mode 100644 index ac7a7d32bf597..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import java.util.HashMap; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.ElementAddressableVector; - -/** - * Dictionary encoder based on hash table. - * - * @param encoded vector type. - * @param decoded vector type, which is also the dictionary type. - */ -public class HashTableDictionaryEncoder - implements DictionaryEncoder { - - /** The dictionary for encoding/decoding. It must be sorted. */ - private final D dictionary; - - /** The hasher used to compute the hash code. */ - private final ArrowBufHasher hasher; - - /** A flag indicating if null should be encoded. */ - private final boolean encodeNull; - - /** - * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, - * whereas the value is the index in the dictionary. - */ - private HashMap hashMap = new HashMap<>(); - - /** The pointer used to probe each element to encode. */ - private ArrowBufPointer reusablePointer; - - /** - * Constructs a dictionary encoder. - * - * @param dictionary the dictionary. - */ - public HashTableDictionaryEncoder(D dictionary) { - this(dictionary, false); - } - - /** - * Constructs a dictionary encoder. - * - * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for - * processing null values in the input during encoding/decoding. - *

  • For encoding, when a null is encountered in the input, 1) If the flag is set to true, - * the encoder searches for the value in the dictionary, and outputs the index in the - * dictionary. 2) If the flag is set to false, the encoder simply produces a null in the - * output. - *
  • For decoding, when a null is encountered in the input, 1) If the flag is set to true, - * the decoder should never expect a null in the input. 2) If set to false, the decoder - * simply produces a null in the output. - */ - public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { - this(dictionary, encodeNull, SimpleHasher.INSTANCE); - } - - /** - * Constructs a dictionary encoder. - * - * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for - * processing null values in the input during encoding. When a null is encountered in the - * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply - * produces a null in the output. - * @param hasher the hasher used to calculate the hash code. - */ - public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) { - this.dictionary = dictionary; - this.hasher = hasher; - this.encodeNull = encodeNull; - - reusablePointer = new ArrowBufPointer(hasher); - - buildHashMap(); - } - - private void buildHashMap() { - for (int i = 0; i < dictionary.getValueCount(); i++) { - ArrowBufPointer pointer = new ArrowBufPointer(hasher); - dictionary.getDataPointer(i, pointer); - hashMap.put(pointer, i); - } - } - - /** - * Encodes an input vector by a hash table. So the algorithm takes O(n) time, where n is the - * length of the input vector. - * - * @param input the input vector. - * @param output the output vector. - */ - @Override - public void encode(D input, E output) { - for (int i = 0; i < input.getValueCount(); i++) { - if (!encodeNull && input.isNull(i)) { - continue; - } - - input.getDataPointer(i, reusablePointer); - Integer index = hashMap.get(reusablePointer); - - if (index == null) { - throw new IllegalArgumentException("The data element is not found in the dictionary"); - } - output.setWithPossibleTruncate(i, index); - } - output.setValueCount(input.getValueCount()); - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java deleted file mode 100644 index 9aeff22005751..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; - -/** - * Dictionary encoder based on linear search. - * - * @param encoded vector type. - * @param decoded vector type, which is also the dictionary type. - */ -public class LinearDictionaryEncoder - implements DictionaryEncoder { - - /** The dictionary for encoding. */ - private final D dictionary; - - /** A flag indicating if null should be encoded. */ - private final boolean encodeNull; - - private RangeEqualsVisitor equalizer; - - private Range range; - - /** - * Constructs a dictionary encoder, with the encode null flag set to false. - * - * @param dictionary the dictionary. Its entries should be sorted in the non-increasing order of - * their frequency. Otherwise, the encoder still produces correct results, but at the expense - * of performance overhead. - */ - public LinearDictionaryEncoder(D dictionary) { - this(dictionary, false); - } - - /** - * Constructs a dictionary encoder. - * - * @param dictionary the dictionary. Its entries should be sorted in the non-increasing order of - * their frequency. Otherwise, the encoder still produces correct results, but at the expense - * of performance overhead. - * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for - * processing null values in the input during encoding. When a null is encountered in the - * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply - * produces a null in the output. - */ - public LinearDictionaryEncoder(D dictionary, boolean encodeNull) { - this.dictionary = dictionary; - this.encodeNull = encodeNull; - - // temporarily set left and right vectors to dictionary - equalizer = new RangeEqualsVisitor(dictionary, dictionary, null); - range = new Range(0, 0, 1); - } - - /** - * Encodes an input vector by linear search. When the dictionary is sorted in the non-increasing - * order of the entry frequency, it will have constant time complexity, with no extra memory - * requirement. - * - * @param input the input vector. - * @param output the output vector. Note that it must be in a fresh state. At least, all its - * validity bits should be clear. - */ - @Override - public void encode(D input, E output) { - for (int i = 0; i < input.getValueCount(); i++) { - if (!encodeNull && input.isNull(i)) { - // for this case, we should simply output a null in the output. - // by assuming the output vector is fresh, we do nothing here. - continue; - } - - int index = linearSearch(input, i); - if (index == -1) { - throw new IllegalArgumentException("The data element is not found in the dictionary: " + i); - } - output.setWithPossibleTruncate(i, index); - } - output.setValueCount(input.getValueCount()); - } - - private int linearSearch(D input, int index) { - range.setLeftStart(index); - for (int i = 0; i < dictionary.getValueCount(); i++) { - range.setRightStart(i); - if (input.accept(equalizer, range)) { - return i; - } - } - return -1; - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java deleted file mode 100644 index edfe3564f28ff..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchDictionaryEncoder.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import org.apache.arrow.algorithm.search.VectorSearcher; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.ValueVector; - -/** - * Dictionary encoder based on searching. - * - * @param encoded vector type. - * @param decoded vector type, which is also the dictionary type. - */ -public class SearchDictionaryEncoder - implements DictionaryEncoder { - - /** The dictionary for encoding/decoding. It must be sorted. */ - private final D dictionary; - - /** The criteria by which the dictionary is sorted. */ - private final VectorValueComparator comparator; - - /** A flag indicating if null should be encoded. */ - private final boolean encodeNull; - - /** - * Constructs a dictionary encoder. - * - * @param dictionary the dictionary. It must be in sorted order. - * @param comparator the criteria for sorting. - */ - public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator) { - this(dictionary, comparator, false); - } - - /** - * Constructs a dictionary encoder. - * - * @param dictionary the dictionary. It must be in sorted order. - * @param comparator the criteria for sorting. - * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for - * processing null values in the input during encoding. When a null is encountered in the - * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply - * produces a null in the output. - */ - public SearchDictionaryEncoder( - D dictionary, VectorValueComparator comparator, boolean encodeNull) { - this.dictionary = dictionary; - this.comparator = comparator; - this.encodeNull = encodeNull; - } - - /** - * Encodes an input vector by binary search. So the algorithm takes O(n * log(m)) time, where n is - * the length of the input vector, and m is the length of the dictionary. - * - * @param input the input vector. - * @param output the output vector. Note that it must be in a fresh state. At least, all its - * validity bits should be clear. - */ - @Override - public void encode(D input, E output) { - for (int i = 0; i < input.getValueCount(); i++) { - if (!encodeNull && input.isNull(i)) { - // for this case, we should simply output a null in the output. - // by assuming the output vector is fresh, we do nothing here. - continue; - } - - int index = VectorSearcher.binarySearch(dictionary, comparator, input, i); - if (index == -1) { - throw new IllegalArgumentException("The data element is not found in the dictionary: " + i); - } - output.setWithPossibleTruncate(i, index); - } - output.setValueCount(input.getValueCount()); - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java deleted file mode 100644 index fca7df067dcff..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import java.util.TreeSet; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.ValueVector; - -/** - * This class builds the dictionary based on a binary search tree. Each add operation can be - * finished in O(log(n)) time, where n is the current dictionary size. - * - * @param the dictionary vector type. - */ -public class SearchTreeBasedDictionaryBuilder - implements DictionaryBuilder { - - /** The dictionary to be built. */ - private final V dictionary; - - /** The criteria for sorting in the search tree. */ - protected final VectorValueComparator comparator; - - /** If null should be encoded. */ - private final boolean encodeNull; - - /** The search tree for storing the value index. */ - private TreeSet searchTree; - - /** - * Construct a search tree-based dictionary builder. - * - * @param dictionary the dictionary vector. - * @param comparator the criteria for value equality. - */ - public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator comparator) { - this(dictionary, comparator, false); - } - - /** - * Construct a search tree-based dictionary builder. - * - * @param dictionary the dictionary vector. - * @param comparator the criteria for value equality. - * @param encodeNull if null values should be added to the dictionary. - */ - public SearchTreeBasedDictionaryBuilder( - V dictionary, VectorValueComparator comparator, boolean encodeNull) { - this.dictionary = dictionary; - this.comparator = comparator; - this.encodeNull = encodeNull; - this.comparator.attachVector(dictionary); - - searchTree = new TreeSet<>((index1, index2) -> comparator.compare(index1, index2)); - } - - /** - * Gets the dictionary built. Please note that the dictionary is not in sorted order. Instead, its - * order is determined by the order of element insertion. To get the dictionary in sorted order, - * please use {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. - * - * @return the dictionary. - */ - @Override - public V getDictionary() { - return dictionary; - } - - /** - * Try to add all values from the target vector to the dictionary. - * - * @param targetVector the target vector containing values to probe. - * @return the number of values actually added to the dictionary. - */ - @Override - public int addValues(V targetVector) { - int oldDictSize = dictionary.getValueCount(); - for (int i = 0; i < targetVector.getValueCount(); i++) { - if (!encodeNull && targetVector.isNull(i)) { - continue; - } - addValue(targetVector, i); - } - return dictionary.getValueCount() - oldDictSize; - } - - /** - * Try to add an element from the target vector to the dictionary. - * - * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. - * @return the index of the new element in the dictionary. - */ - @Override - public int addValue(V targetVector, int targetIndex) { - // first copy the value to the end of the dictionary - int dictSize = dictionary.getValueCount(); - dictionary.copyFromSafe(targetIndex, dictSize, targetVector); - - // try to add the value to the dictionary, - // if an equal element does not exist. - // this operation can be done in O(log(n)) time. - if (searchTree.add(dictSize)) { - // the element is successfully added - dictionary.setValueCount(dictSize + 1); - return dictSize; - } else { - // the element is already in the dictionary - // find its index in O(log(n)) time. - return searchTree.ceiling(dictSize); - } - } - - /** - * Gets the sorted dictionary. Note that given the binary search tree, the sort can finish in - * O(n). - */ - public void populateSortedDictionary(V sortedDictionary) { - int idx = 0; - for (Integer dictIdx : searchTree) { - sortedDictionary.copyFromSafe(dictIdx, idx++, dictionary); - } - - sortedDictionary.setValueCount(dictionary.getValueCount()); - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java deleted file mode 100644 index 5492676af1a2e..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.misc; - -import org.apache.arrow.vector.BaseIntVector; - -/** Partial sum related utilities. */ -public class PartialSumUtils { - - /** - * Converts an input vector to a partial sum vector. This is an inverse operation of {@link - * PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. Suppose we have input vector a - * and output vector b. Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). - * - * @param deltaVector the input vector. - * @param partialSumVector the output vector. - * @param sumBase the base of the partial sums. - */ - public static void toPartialSumVector( - BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { - long sum = sumBase; - partialSumVector.setWithPossibleTruncate(0, sumBase); - - for (int i = 0; i < deltaVector.getValueCount(); i++) { - sum += deltaVector.getValueAsLong(i); - partialSumVector.setWithPossibleTruncate(i + 1, sum); - } - partialSumVector.setValueCount(deltaVector.getValueCount() + 1); - } - - /** - * Converts an input vector to the delta vector. This is an inverse operation of {@link - * PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. Suppose we have input - * vector a and output vector b. Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). - * - * @param partialSumVector the input vector. - * @param deltaVector the output vector. - */ - public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector deltaVector) { - for (int i = 0; i < partialSumVector.getValueCount() - 1; i++) { - long delta = partialSumVector.getValueAsLong(i + 1) - partialSumVector.getValueAsLong(i); - deltaVector.setWithPossibleTruncate(i, delta); - } - deltaVector.setValueCount(partialSumVector.getValueCount() - 1); - } - - /** - * Given a value and a partial sum vector, finds its position in the partial sum vector. In - * particular, given an integer value a and partial sum vector v, we try to find a position i, so - * that v(i) <= a < v(i + 1). The algorithm is based on binary search, so it takes O(log(n)) time, - * where n is the length of the partial sum vector. - * - * @param partialSumVector the input partial sum vector. - * @param value the value to search. - * @return the position in the partial sum vector, if any, or -1, if none is found. - */ - public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) { - if (value < partialSumVector.getValueAsLong(0) - || value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { - return -1; - } - - int low = 0; - int high = partialSumVector.getValueCount() - 1; - while (low <= high) { - int mid = low + (high - low) / 2; - long midValue = partialSumVector.getValueAsLong(mid); - - if (midValue <= value) { - if (mid == partialSumVector.getValueCount() - 1) { - // the mid is the last element, we have found it - return mid; - } - long nextMidValue = partialSumVector.getValueAsLong(mid + 1); - if (value < nextMidValue) { - // midValue <= value < nextMidValue - // this is exactly what we want. - return mid; - } else { - // value >= nextMidValue - // continue to search from the next value on the right - low = mid + 1; - } - } else { - // midValue > value - long prevMidValue = partialSumVector.getValueAsLong(mid - 1); - if (prevMidValue <= value) { - // prevMidValue <= value < midValue - // this is exactly what we want - return mid - 1; - } else { - // prevMidValue > value - // continue to search from the previous value on the left - high = mid - 1; - } - } - } - throw new IllegalStateException("Should never get here"); - } - - private PartialSumUtils() {} -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java deleted file mode 100644 index baa2058ffc51f..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.rank; - -import java.util.stream.IntStream; -import org.apache.arrow.algorithm.sort.IndexSorter; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; - -/** - * Utility for calculating ranks of vector elements. - * - * @param the vector type - */ -public class VectorRank { - - private VectorValueComparator comparator; - - /** Vector indices. */ - private IntVector indices; - - private final BufferAllocator allocator; - - /** - * Constructs a vector rank utility. - * - * @param allocator the allocator to use. - */ - public VectorRank(BufferAllocator allocator) { - this.allocator = allocator; - } - - /** - * Given a rank r, gets the index of the element that is the rth smallest in the vector. The - * operation is performed without changing the vector, and takes O(n) time, where n is the length - * of the vector. - * - * @param vector the vector from which to get the element index. - * @param comparator the criteria for vector element comparison. - * @param rank the rank to determine. - * @return the element index with the given rank. - */ - public int indexAtRank(V vector, VectorValueComparator comparator, int rank) { - Preconditions.checkArgument(rank >= 0 && rank < vector.getValueCount()); - try { - indices = new IntVector("index vector", allocator); - indices.allocateNew(vector.getValueCount()); - IntStream.range(0, vector.getValueCount()).forEach(i -> indices.set(i, i)); - - comparator.attachVector(vector); - this.comparator = comparator; - - int pos = getRank(0, vector.getValueCount() - 1, rank); - return indices.get(pos); - } finally { - indices.close(); - } - } - - private int getRank(int low, int high, int rank) { - int mid = IndexSorter.partition(low, high, indices, comparator); - if (mid < rank) { - return getRank(mid + 1, high, rank); - } else if (mid > rank) { - return getRank(low, mid - 1, rank); - } else { - // mid == rank - return mid; - } - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java deleted file mode 100644 index 6a48019edc3eb..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.search; - -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; - -/** - * Search for a value in the vector by multiple threads. This is often used in scenarios where the - * vector is large or low response time is required. - * - * @param the vector type. - */ -public class ParallelSearcher { - - /** The target vector to search. */ - private final V vector; - - /** The thread pool. */ - private final ExecutorService threadPool; - - /** The number of threads to use. */ - private final int numThreads; - - /** The position of the key in the target vector, if any. */ - private volatile int keyPosition = -1; - - /** - * Constructs a parallel searcher. - * - * @param vector the vector to search. - * @param threadPool the thread pool to use. - * @param numThreads the number of threads to use. - */ - public ParallelSearcher(V vector, ExecutorService threadPool, int numThreads) { - this.vector = vector; - this.threadPool = threadPool; - this.numThreads = numThreads; - } - - private CompletableFuture[] initSearch() { - keyPosition = -1; - final CompletableFuture[] futures = new CompletableFuture[numThreads]; - for (int i = 0; i < futures.length; i++) { - futures[i] = new CompletableFuture<>(); - } - return futures; - } - - /** - * Search for the key in the target vector. The element-wise comparison is based on {@link - * RangeEqualsVisitor}, so there are two possible results for each element-wise comparison: equal - * and un-equal. - * - * @param keyVector the vector containing the search key. - * @param keyIndex the index of the search key in the key vector. - * @return the position of a matched value in the target vector, or -1 if none is found. Please - * note that if there are multiple matches of the key in the target vector, this method makes - * no guarantees about which instance is returned. For an alternative search implementation - * that always finds the first match of the key, see {@link - * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. - * @throws ExecutionException if an exception occurs in a thread. - * @throws InterruptedException if a thread is interrupted. - */ - public int search(V keyVector, int keyIndex) throws ExecutionException, InterruptedException { - final CompletableFuture[] futures = initSearch(); - final int valueCount = vector.getValueCount(); - for (int i = 0; i < numThreads; i++) { - final int tid = i; - Future unused = - threadPool.submit( - () -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); - Range range = new Range(0, 0, 1); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - range.setLeftStart(pos).setRightStart(keyIndex); - if (visitor.rangeEquals(range)) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); - } - - CompletableFuture.allOf(futures).get(); - return keyPosition; - } - - /** - * Search for the key in the target vector. The element-wise comparison is based on {@link - * VectorValueComparator}, so there are three possible results for each element-wise comparison: - * less than, equal to and greater than. - * - * @param keyVector the vector containing the search key. - * @param keyIndex the index of the search key in the key vector. - * @param comparator the comparator for comparing the key against vector elements. - * @return the position of a matched value in the target vector, or -1 if none is found. Please - * note that if there are multiple matches of the key in the target vector, this method makes - * no guarantees about which instance is returned. For an alternative search implementation - * that always finds the first match of the key, see {@link - * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. - * @throws ExecutionException if an exception occurs in a thread. - * @throws InterruptedException if a thread is interrupted. - */ - public int search(V keyVector, int keyIndex, VectorValueComparator comparator) - throws ExecutionException, InterruptedException { - final CompletableFuture[] futures = initSearch(); - final int valueCount = vector.getValueCount(); - for (int i = 0; i < numThreads; i++) { - final int tid = i; - Future unused = - threadPool.submit( - () -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - VectorValueComparator localComparator = comparator.createNew(); - localComparator.attachVectors(vector, keyVector); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - if (localComparator.compare(pos, keyIndex) == 0) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); - } - - CompletableFuture.allOf(futures).get(); - return keyPosition; - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java deleted file mode 100644 index c7905dd8956c8..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.search; - -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.ValueVector; - -/** Search for the range of a particular element in the target vector. */ -public class VectorRangeSearcher { - - /** Result returned when a search fails. */ - public static final int SEARCH_FAIL_RESULT = -1; - - /** - * Search for the first occurrence of an element. The search is based on the binary search - * algorithm. So the target vector must be sorted. - * - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the first matched element if any, and -1 otherwise. - */ - public static int getFirstMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found - // continue to go left-ward - ret = mid; - high = mid - 1; - } - } - return ret; - } - - /** - * Search for the last occurrence of an element. The search is based on the binary search - * algorithm. So the target vector must be sorted. - * - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the last matched element if any, and -1 otherwise. - */ - public static int getLastMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found, - // continue to go right-ward - ret = mid; - low = mid + 1; - } - } - return ret; - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java deleted file mode 100644 index dd0b4de5d8677..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.search; - -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.ValueVector; - -/** Search for a particular element in the vector. */ -public final class VectorSearcher { - - /** Result returned when a search fails. */ - public static final int SEARCH_FAIL_RESULT = -1; - - /** - * Search for a particular element from the key vector in the target vector by binary search. The - * target vector must be sorted. - * - * @param targetVector the vector from which to perform the sort. - * @param comparator the criterion for the sort. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of a matched element if any, and -1 otherwise. - */ - public static int binarySearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - // perform binary search - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int cmp = comparator.compare(keyIndex, mid); - if (cmp < 0) { - high = mid - 1; - } else if (cmp > 0) { - low = mid + 1; - } else { - return mid; - } - } - return SEARCH_FAIL_RESULT; - } - - /** - * Search for a particular element from the key vector in the target vector by traversing the - * vector in sequence. - * - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for element equality. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of a matched element if any, and -1 otherwise. - */ - public static int linearSearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - for (int i = 0; i < targetVector.getValueCount(); i++) { - if (comparator.compare(keyIndex, i) == 0) { - return i; - } - } - return SEARCH_FAIL_RESULT; - } - - private VectorSearcher() {} -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java deleted file mode 100644 index 77093d87bc489..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.vector.ValueVector; - -/** - * A composite vector comparator compares a number of vectors by a number of inner comparators. - * - *

    It works by first using the first comparator, if a non-zero value is returned, it simply - * returns it. Otherwise, it uses the second comparator, and so on, until a non-zero value is - * produced, or all inner comparators have been used. - */ -public class CompositeVectorComparator extends VectorValueComparator { - - private final VectorValueComparator[] innerComparators; - - public CompositeVectorComparator(VectorValueComparator[] innerComparators) { - this.innerComparators = innerComparators; - } - - @Override - public int compareNotNull(int index1, int index2) { - // short-cut for scenarios when the caller can be sure that the vectors are non-nullable. - for (int i = 0; i < innerComparators.length; i++) { - int result = innerComparators[i].compareNotNull(index1, index2); - if (result != 0) { - return result; - } - } - return 0; - } - - @Override - public int compare(int index1, int index2) { - for (int i = 0; i < innerComparators.length; i++) { - int result = innerComparators[i].compare(index1, index2); - if (result != 0) { - return result; - } - } - return 0; - } - - @Override - public VectorValueComparator createNew() { - VectorValueComparator[] newInnerComparators = - new VectorValueComparator[innerComparators.length]; - for (int i = 0; i < innerComparators.length; i++) { - newInnerComparators[i] = innerComparators[i].createNew(); - } - return new CompositeVectorComparator(newInnerComparators); - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java deleted file mode 100644 index ec650cd9dc88b..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ /dev/null @@ -1,858 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; - -import java.math.BigDecimal; -import java.time.Duration; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VariableWidthVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.RepeatedValueVector; -import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; - -/** Default comparator implementations for different types of vectors. */ -public class DefaultVectorComparators { - - /** - * Create the default comparator for the vector. - * - * @param vector the vector. - * @param the vector type. - * @return the default comparator. - */ - public static VectorValueComparator createDefaultComparator(T vector) { - if (vector instanceof BaseFixedWidthVector) { - if (vector instanceof TinyIntVector) { - return (VectorValueComparator) new ByteComparator(); - } else if (vector instanceof SmallIntVector) { - return (VectorValueComparator) new ShortComparator(); - } else if (vector instanceof IntVector) { - return (VectorValueComparator) new IntComparator(); - } else if (vector instanceof BigIntVector) { - return (VectorValueComparator) new LongComparator(); - } else if (vector instanceof Float4Vector) { - return (VectorValueComparator) new Float4Comparator(); - } else if (vector instanceof Float8Vector) { - return (VectorValueComparator) new Float8Comparator(); - } else if (vector instanceof UInt1Vector) { - return (VectorValueComparator) new UInt1Comparator(); - } else if (vector instanceof UInt2Vector) { - return (VectorValueComparator) new UInt2Comparator(); - } else if (vector instanceof UInt4Vector) { - return (VectorValueComparator) new UInt4Comparator(); - } else if (vector instanceof UInt8Vector) { - return (VectorValueComparator) new UInt8Comparator(); - } else if (vector instanceof BitVector) { - return (VectorValueComparator) new BitComparator(); - } else if (vector instanceof DateDayVector) { - return (VectorValueComparator) new DateDayComparator(); - } else if (vector instanceof DateMilliVector) { - return (VectorValueComparator) new DateMilliComparator(); - } else if (vector instanceof Decimal256Vector) { - return (VectorValueComparator) new Decimal256Comparator(); - } else if (vector instanceof DecimalVector) { - return (VectorValueComparator) new DecimalComparator(); - } else if (vector instanceof DurationVector) { - return (VectorValueComparator) new DurationComparator(); - } else if (vector instanceof IntervalDayVector) { - return (VectorValueComparator) new IntervalDayComparator(); - } else if (vector instanceof IntervalMonthDayNanoVector) { - throw new IllegalArgumentException( - "No default comparator for " + vector.getClass().getCanonicalName()); - } else if (vector instanceof TimeMicroVector) { - return (VectorValueComparator) new TimeMicroComparator(); - } else if (vector instanceof TimeMilliVector) { - return (VectorValueComparator) new TimeMilliComparator(); - } else if (vector instanceof TimeNanoVector) { - return (VectorValueComparator) new TimeNanoComparator(); - } else if (vector instanceof TimeSecVector) { - return (VectorValueComparator) new TimeSecComparator(); - } else if (vector instanceof TimeStampVector) { - return (VectorValueComparator) new TimeStampComparator(); - } else if (vector instanceof FixedSizeBinaryVector) { - return (VectorValueComparator) new FixedSizeBinaryComparator(); - } - } else if (vector instanceof VariableWidthVector) { - return (VectorValueComparator) new VariableWidthComparator(); - } else if (vector instanceof RepeatedValueVector) { - VectorValueComparator innerComparator = - createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); - return new RepeatedValueComparator(innerComparator); - } else if (vector instanceof FixedSizeListVector) { - VectorValueComparator innerComparator = - createDefaultComparator(((FixedSizeListVector) vector).getDataVector()); - return new FixedSizeListComparator(innerComparator); - } else if (vector instanceof NullVector) { - return (VectorValueComparator) new NullComparator(); - } - - throw new IllegalArgumentException( - "No default comparator for " + vector.getClass().getCanonicalName()); - } - - /** Default comparator for bytes. The comparison is based on values, with null comes first. */ - public static class ByteComparator extends VectorValueComparator { - - public ByteComparator() { - super(Byte.SIZE / 8); - } - - @Override - public int compareNotNull(int index1, int index2) { - byte value1 = vector1.get(index1); - byte value2 = vector2.get(index2); - return value1 - value2; - } - - @Override - public VectorValueComparator createNew() { - return new ByteComparator(); - } - } - - /** - * Default comparator for short integers. The comparison is based on values, with null comes - * first. - */ - public static class ShortComparator extends VectorValueComparator { - - public ShortComparator() { - super(Short.SIZE / 8); - } - - @Override - public int compareNotNull(int index1, int index2) { - short value1 = vector1.get(index1); - short value2 = vector2.get(index2); - return value1 - value2; - } - - @Override - public VectorValueComparator createNew() { - return new ShortComparator(); - } - } - - /** - * Default comparator for 32-bit integers. The comparison is based on int values, with null comes - * first. - */ - public static class IntComparator extends VectorValueComparator { - - public IntComparator() { - super(Integer.SIZE / 8); - } - - @Override - public int compareNotNull(int index1, int index2) { - int value1 = vector1.get(index1); - int value2 = vector2.get(index2); - return Integer.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new IntComparator(); - } - } - - /** - * Default comparator for long integers. The comparison is based on values, with null comes first. - */ - public static class LongComparator extends VectorValueComparator { - - public LongComparator() { - super(Long.SIZE / 8); - } - - @Override - public int compareNotNull(int index1, int index2) { - long value1 = vector1.get(index1); - long value2 = vector2.get(index2); - - return Long.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new LongComparator(); - } - } - - /** - * Default comparator for unsigned bytes. The comparison is based on values, with null comes - * first. - */ - public static class UInt1Comparator extends VectorValueComparator { - - public UInt1Comparator() { - super(1); - } - - @Override - public int compareNotNull(int index1, int index2) { - byte value1 = vector1.get(index1); - byte value2 = vector2.get(index2); - - return (value1 & 0xff) - (value2 & 0xff); - } - - @Override - public VectorValueComparator createNew() { - return new UInt1Comparator(); - } - } - - /** - * Default comparator for unsigned short integer. The comparison is based on values, with null - * comes first. - */ - public static class UInt2Comparator extends VectorValueComparator { - - public UInt2Comparator() { - super(2); - } - - @Override - public int compareNotNull(int index1, int index2) { - char value1 = vector1.get(index1); - char value2 = vector2.get(index2); - - // please note that we should not use the built-in - // Character#compare method here, as that method - // essentially compares char values as signed integers. - return (value1 & 0xffff) - (value2 & 0xffff); - } - - @Override - public VectorValueComparator createNew() { - return new UInt2Comparator(); - } - } - - /** - * Default comparator for unsigned integer. The comparison is based on values, with null comes - * first. - */ - public static class UInt4Comparator extends VectorValueComparator { - - public UInt4Comparator() { - super(4); - } - - @Override - public int compareNotNull(int index1, int index2) { - int value1 = vector1.get(index1); - int value2 = vector2.get(index2); - return ByteFunctionHelpers.unsignedIntCompare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new UInt4Comparator(); - } - } - - /** - * Default comparator for unsigned long integer. The comparison is based on values, with null - * comes first. - */ - public static class UInt8Comparator extends VectorValueComparator { - - public UInt8Comparator() { - super(8); - } - - @Override - public int compareNotNull(int index1, int index2) { - long value1 = vector1.get(index1); - long value2 = vector2.get(index2); - return ByteFunctionHelpers.unsignedLongCompare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new UInt8Comparator(); - } - } - - /** - * Default comparator for float type. The comparison is based on values, with null comes first. - */ - public static class Float4Comparator extends VectorValueComparator { - - public Float4Comparator() { - super(Float.SIZE / 8); - } - - @Override - public int compareNotNull(int index1, int index2) { - float value1 = vector1.get(index1); - float value2 = vector2.get(index2); - - boolean isNan1 = Float.isNaN(value1); - boolean isNan2 = Float.isNaN(value2); - if (isNan1 || isNan2) { - if (isNan1 && isNan2) { - return 0; - } else if (isNan1) { - // nan is greater than any normal value - return 1; - } else { - return -1; - } - } - - return (int) Math.signum(value1 - value2); - } - - @Override - public VectorValueComparator createNew() { - return new Float4Comparator(); - } - } - - /** - * Default comparator for double type. The comparison is based on values, with null comes first. - */ - public static class Float8Comparator extends VectorValueComparator { - - public Float8Comparator() { - super(Double.SIZE / 8); - } - - @Override - public int compareNotNull(int index1, int index2) { - double value1 = vector1.get(index1); - double value2 = vector2.get(index2); - - boolean isNan1 = Double.isNaN(value1); - boolean isNan2 = Double.isNaN(value2); - if (isNan1 || isNan2) { - if (isNan1 && isNan2) { - return 0; - } else if (isNan1) { - // nan is greater than any normal value - return 1; - } else { - return -1; - } - } - - return (int) Math.signum(value1 - value2); - } - - @Override - public VectorValueComparator createNew() { - return new Float8Comparator(); - } - } - - /** Default comparator for bit type. The comparison is based on values, with null comes first. */ - public static class BitComparator extends VectorValueComparator { - - public BitComparator() { - super(-1); - } - - @Override - public int compareNotNull(int index1, int index2) { - boolean value1 = vector1.get(index1) != 0; - boolean value2 = vector2.get(index2) != 0; - - return Boolean.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new BitComparator(); - } - } - - /** - * Default comparator for DateDay type. The comparison is based on values, with null comes first. - */ - public static class DateDayComparator extends VectorValueComparator { - - public DateDayComparator() { - super(DateDayVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - int value1 = vector1.get(index1); - int value2 = vector2.get(index2); - return Integer.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new DateDayComparator(); - } - } - - /** - * Default comparator for DateMilli type. The comparison is based on values, with null comes - * first. - */ - public static class DateMilliComparator extends VectorValueComparator { - - public DateMilliComparator() { - super(DateMilliVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - long value1 = vector1.get(index1); - long value2 = vector2.get(index2); - - return Long.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new DateMilliComparator(); - } - } - - /** - * Default comparator for Decimal256 type. The comparison is based on values, with null comes - * first. - */ - public static class Decimal256Comparator extends VectorValueComparator { - - public Decimal256Comparator() { - super(Decimal256Vector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - BigDecimal value1 = vector1.getObjectNotNull(index1); - BigDecimal value2 = vector2.getObjectNotNull(index2); - - return value1.compareTo(value2); - } - - @Override - public VectorValueComparator createNew() { - return new Decimal256Comparator(); - } - } - - /** - * Default comparator for Decimal type. The comparison is based on values, with null comes first. - */ - public static class DecimalComparator extends VectorValueComparator { - - public DecimalComparator() { - super(DecimalVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - BigDecimal value1 = vector1.getObjectNotNull(index1); - BigDecimal value2 = vector2.getObjectNotNull(index2); - - return value1.compareTo(value2); - } - - @Override - public VectorValueComparator createNew() { - return new DecimalComparator(); - } - } - - /** - * Default comparator for Duration type. The comparison is based on values, with null comes first. - */ - public static class DurationComparator extends VectorValueComparator { - - public DurationComparator() { - super(DurationVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - Duration value1 = vector1.getObjectNotNull(index1); - Duration value2 = vector2.getObjectNotNull(index2); - - return value1.compareTo(value2); - } - - @Override - public VectorValueComparator createNew() { - return new DurationComparator(); - } - } - - /** - * Default comparator for IntervalDay type. The comparison is based on values, with null comes - * first. - */ - public static class IntervalDayComparator extends VectorValueComparator { - - public IntervalDayComparator() { - super(IntervalDayVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - Duration value1 = vector1.getObjectNotNull(index1); - Duration value2 = vector2.getObjectNotNull(index2); - - return value1.compareTo(value2); - } - - @Override - public VectorValueComparator createNew() { - return new IntervalDayComparator(); - } - } - - /** - * Default comparator for TimeMicro type. The comparison is based on values, with null comes - * first. - */ - public static class TimeMicroComparator extends VectorValueComparator { - - public TimeMicroComparator() { - super(TimeMicroVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - long value1 = vector1.get(index1); - long value2 = vector2.get(index2); - - return Long.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new TimeMicroComparator(); - } - } - - /** - * Default comparator for TimeMilli type. The comparison is based on values, with null comes - * first. - */ - public static class TimeMilliComparator extends VectorValueComparator { - - public TimeMilliComparator() { - super(TimeMilliVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - int value1 = vector1.get(index1); - int value2 = vector2.get(index2); - - return Integer.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new TimeMilliComparator(); - } - } - - /** - * Default comparator for TimeNano type. The comparison is based on values, with null comes first. - */ - public static class TimeNanoComparator extends VectorValueComparator { - - public TimeNanoComparator() { - super(TimeNanoVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - long value1 = vector1.get(index1); - long value2 = vector2.get(index2); - - return Long.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new TimeNanoComparator(); - } - } - - /** - * Default comparator for TimeSec type. The comparison is based on values, with null comes first. - */ - public static class TimeSecComparator extends VectorValueComparator { - - public TimeSecComparator() { - super(TimeSecVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - int value1 = vector1.get(index1); - int value2 = vector2.get(index2); - - return Integer.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new TimeSecComparator(); - } - } - - /** - * Default comparator for TimeSec type. The comparison is based on values, with null comes first. - */ - public static class TimeStampComparator extends VectorValueComparator { - - public TimeStampComparator() { - super(TimeStampVector.TYPE_WIDTH); - } - - @Override - public int compareNotNull(int index1, int index2) { - long value1 = vector1.get(index1); - long value2 = vector2.get(index2); - - return Long.compare(value1, value2); - } - - @Override - public VectorValueComparator createNew() { - return new TimeStampComparator(); - } - } - - /** - * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. The comparison is - * in lexicographic order, with null comes first. - */ - public static class FixedSizeBinaryComparator - extends VectorValueComparator { - - @Override - public int compare(int index1, int index2) { - NullableFixedSizeBinaryHolder holder1 = new NullableFixedSizeBinaryHolder(); - NullableFixedSizeBinaryHolder holder2 = new NullableFixedSizeBinaryHolder(); - vector1.get(index1, holder1); - vector2.get(index2, holder2); - - return ByteFunctionHelpers.compare( - holder1.buffer, 0, holder1.byteWidth, holder2.buffer, 0, holder2.byteWidth); - } - - @Override - public int compareNotNull(int index1, int index2) { - NullableFixedSizeBinaryHolder holder1 = new NullableFixedSizeBinaryHolder(); - NullableFixedSizeBinaryHolder holder2 = new NullableFixedSizeBinaryHolder(); - vector1.get(index1, holder1); - vector2.get(index2, holder2); - - return ByteFunctionHelpers.compare( - holder1.buffer, 0, holder1.byteWidth, holder2.buffer, 0, holder2.byteWidth); - } - - @Override - public VectorValueComparator createNew() { - return new FixedSizeBinaryComparator(); - } - } - - /** Default comparator for {@link org.apache.arrow.vector.NullVector}. */ - public static class NullComparator extends VectorValueComparator { - @Override - public int compare(int index1, int index2) { - // Values are always equal (and are always null). - return 0; - } - - @Override - public int compareNotNull(int index1, int index2) { - throw new AssertionError("Cannot compare non-null values in a NullVector."); - } - - @Override - public VectorValueComparator createNew() { - return new NullComparator(); - } - } - - /** - * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. The comparison is - * in lexicographic order, with null comes first. - */ - public static class VariableWidthComparator extends VectorValueComparator { - - private final ArrowBufPointer reusablePointer1 = new ArrowBufPointer(); - - private final ArrowBufPointer reusablePointer2 = new ArrowBufPointer(); - - @Override - public int compare(int index1, int index2) { - vector1.getDataPointer(index1, reusablePointer1); - vector2.getDataPointer(index2, reusablePointer2); - return reusablePointer1.compareTo(reusablePointer2); - } - - @Override - public int compareNotNull(int index1, int index2) { - vector1.getDataPointer(index1, reusablePointer1); - vector2.getDataPointer(index2, reusablePointer2); - return reusablePointer1.compareTo(reusablePointer2); - } - - @Override - public VectorValueComparator createNew() { - return new VariableWidthComparator(); - } - } - - /** - * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector - * in a lexicographic order. - * - * @param inner vector type. - */ - public static class RepeatedValueComparator - extends VectorValueComparator { - - private final VectorValueComparator innerComparator; - - public RepeatedValueComparator(VectorValueComparator innerComparator) { - this.innerComparator = innerComparator; - } - - @Override - public int compareNotNull(int index1, int index2) { - int startIdx1 = vector1.getOffsetBuffer().getInt((long) index1 * OFFSET_WIDTH); - int startIdx2 = vector2.getOffsetBuffer().getInt((long) index2 * OFFSET_WIDTH); - - int endIdx1 = vector1.getOffsetBuffer().getInt((long) (index1 + 1) * OFFSET_WIDTH); - int endIdx2 = vector2.getOffsetBuffer().getInt((long) (index2 + 1) * OFFSET_WIDTH); - - int length1 = endIdx1 - startIdx1; - int length2 = endIdx2 - startIdx2; - - int length = Math.min(length1, length2); - - for (int i = 0; i < length; i++) { - int result = innerComparator.compare(startIdx1 + i, startIdx2 + i); - if (result != 0) { - return result; - } - } - return length1 - length2; - } - - @Override - public VectorValueComparator createNew() { - VectorValueComparator newInnerComparator = innerComparator.createNew(); - return new RepeatedValueComparator<>(newInnerComparator); - } - - @Override - public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vector2) { - this.vector1 = vector1; - this.vector2 = vector2; - - innerComparator.attachVectors((T) vector1.getDataVector(), (T) vector2.getDataVector()); - } - } - - /** - * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector - * in a lexicographic order. - * - * @param inner vector type. - */ - public static class FixedSizeListComparator - extends VectorValueComparator { - - private final VectorValueComparator innerComparator; - - public FixedSizeListComparator(VectorValueComparator innerComparator) { - this.innerComparator = innerComparator; - } - - @Override - public int compareNotNull(int index1, int index2) { - int length1 = vector1.getListSize(); - int length2 = vector2.getListSize(); - - int length = Math.min(length1, length2); - int startIdx1 = vector1.getElementStartIndex(index1); - int startIdx2 = vector2.getElementStartIndex(index2); - - for (int i = 0; i < length; i++) { - int result = innerComparator.compare(startIdx1 + i, startIdx2 + i); - if (result != 0) { - return result; - } - } - return length1 - length2; - } - - @Override - public VectorValueComparator createNew() { - VectorValueComparator newInnerComparator = innerComparator.createNew(); - return new FixedSizeListComparator<>(newInnerComparator); - } - - @Override - public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vector2) { - this.vector1 = vector1; - this.vector2 = vector2; - - innerComparator.attachVectors((T) vector1.getDataVector(), (T) vector2.getDataVector()); - } - } - - private DefaultVectorComparators() {} -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java deleted file mode 100644 index ea2b344a1eabb..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.vector.BaseFixedWidthVector; - -/** - * Default in-place sorter for fixed-width vectors. It is based on quick-sort, with average time - * complexity O(n*log(n)). - * - * @param vector type. - */ -public class FixedWidthInPlaceVectorSorter - implements InPlaceVectorSorter { - - /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort - * the data. - */ - public static final int CHANGE_ALGORITHM_THRESHOLD = 15; - - static final int STOP_CHOOSING_PIVOT_THRESHOLD = 3; - - VectorValueComparator comparator; - - /** The vector to sort. */ - V vec; - - /** The buffer to hold the pivot. It always has length 1. */ - V pivotBuffer; - - @Override - public void sortInPlace(V vec, VectorValueComparator comparator) { - try { - this.vec = vec; - this.comparator = comparator; - this.pivotBuffer = (V) vec.getField().createVector(vec.getAllocator()); - this.pivotBuffer.allocateNew(1); - this.pivotBuffer.setValueCount(1); - - comparator.attachVectors(vec, pivotBuffer); - quickSort(); - } finally { - this.pivotBuffer.close(); - } - } - - private void quickSort() { - try (OffHeapIntStack rangeStack = new OffHeapIntStack(vec.getAllocator())) { - rangeStack.push(0); - rangeStack.push(vec.getValueCount() - 1); - - while (!rangeStack.isEmpty()) { - int high = rangeStack.pop(); - int low = rangeStack.pop(); - if (low < high) { - if (high - low < CHANGE_ALGORITHM_THRESHOLD) { - // switch to insertion sort - InsertionSorter.insertionSort(vec, low, high, comparator, pivotBuffer); - continue; - } - - int mid = partition(low, high); - - // push the larger part to stack first, - // to reduce the required stack size - if (high - mid < mid - low) { - rangeStack.push(low); - rangeStack.push(mid - 1); - - rangeStack.push(mid + 1); - rangeStack.push(high); - } else { - rangeStack.push(mid + 1); - rangeStack.push(high); - - rangeStack.push(low); - rangeStack.push(mid - 1); - } - } - } - } - } - - /** Select the pivot as the median of 3 samples. */ - void choosePivot(int low, int high) { - // we need at least 3 items - if (high - low + 1 < STOP_CHOOSING_PIVOT_THRESHOLD) { - pivotBuffer.copyFrom(low, 0, vec); - return; - } - - comparator.attachVector(vec); - int mid = low + (high - low) / 2; - - // find the median by at most 3 comparisons - int medianIdx; - if (comparator.compare(low, mid) < 0) { - if (comparator.compare(mid, high) < 0) { - medianIdx = mid; - } else { - if (comparator.compare(low, high) < 0) { - medianIdx = high; - } else { - medianIdx = low; - } - } - } else { - if (comparator.compare(mid, high) > 0) { - medianIdx = mid; - } else { - if (comparator.compare(low, high) < 0) { - medianIdx = low; - } else { - medianIdx = high; - } - } - } - - // move the pivot to the low position, if necessary - if (medianIdx != low) { - pivotBuffer.copyFrom(medianIdx, 0, vec); - vec.copyFrom(low, medianIdx, vec); - vec.copyFrom(0, low, pivotBuffer); - } else { - pivotBuffer.copyFrom(low, 0, vec); - } - - comparator.attachVectors(vec, pivotBuffer); - } - - private int partition(int low, int high) { - choosePivot(low, high); - - while (low < high) { - while (low < high && comparator.compare(high, 0) >= 0) { - high -= 1; - } - vec.copyFrom(high, low, vec); - - while (low < high && comparator.compare(low, 0) <= 0) { - low += 1; - } - vec.copyFrom(low, high, vec); - } - - vec.copyFrom(0, low, pivotBuffer); - return low; - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java deleted file mode 100644 index ac8b5a4be56aa..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.IntVector; - -/** - * Default out-of-place sorter for fixed-width vectors. It is an out-of-place sort, with time - * complexity O(n*log(n)). - * - * @param vector type. - */ -public class FixedWidthOutOfPlaceVectorSorter - implements OutOfPlaceVectorSorter { - - protected IndexSorter indexSorter = new IndexSorter<>(); - - @Override - public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { - if (srcVector instanceof BitVector) { - throw new IllegalArgumentException( - "BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); - } - comparator.attachVector(srcVector); - - int valueWidth = comparator.getValueWidth(); - - // buffers referenced in the sort - ArrowBuf srcValueBuffer = srcVector.getDataBuffer(); - ArrowBuf dstValidityBuffer = dstVector.getValidityBuffer(); - ArrowBuf dstValueBuffer = dstVector.getDataBuffer(); - - // check buffer size - Preconditions.checkArgument( - dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " - + "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, - dstValidityBuffer.capacity()); - Preconditions.checkArgument( - dstValueBuffer.capacity() >= srcVector.getValueCount() * ((long) srcVector.getTypeWidth()), - "Not enough capacity for the data buffer of the dst vector. " - + "Expected capacity %s, actual capacity %s", - srcVector.getValueCount() * srcVector.getTypeWidth(), - dstValueBuffer.capacity()); - - // sort value indices - try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { - sortedIndices.allocateNew(srcVector.getValueCount()); - sortedIndices.setValueCount(srcVector.getValueCount()); - indexSorter.sort(srcVector, sortedIndices, comparator); - - // copy sorted values to the output vector - for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) { - int srcIndex = sortedIndices.get(dstIndex); - if (srcVector.isNull(srcIndex)) { - BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex); - } else { - BitVectorHelper.setBit(dstValidityBuffer, dstIndex); - MemoryUtil.copyMemory( - srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), - dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), - valueWidth); - } - } - } - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java deleted file mode 100644 index 18f5e94314f83..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; - -/** - * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). Since it - * does not make any assumptions about the memory layout of the vector, its performance can be - * sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), it - * should be used in preference. - * - * @param vector type. - */ -public class GeneralOutOfPlaceVectorSorter - implements OutOfPlaceVectorSorter { - - @Override - public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { - comparator.attachVector(srcVector); - - // check vector capacity - Preconditions.checkArgument( - dstVector.getValueCapacity() >= srcVector.getValueCount(), - "Not enough capacity for the target vector. " + "Expected capacity %s, actual capacity %s", - srcVector.getValueCount(), - dstVector.getValueCapacity()); - - // sort value indices - try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { - sortedIndices.allocateNew(srcVector.getValueCount()); - sortedIndices.setValueCount(srcVector.getValueCount()); - - IndexSorter indexSorter = new IndexSorter<>(); - indexSorter.sort(srcVector, sortedIndices, comparator); - - // copy sorted values to the output vector - for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) { - int srcIndex = sortedIndices.get(dstIndex); - - dstVector.copyFromSafe(srcIndex, dstIndex, srcVector); - } - } - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java deleted file mode 100644 index ba41bb9e4eac7..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.vector.ValueVector; - -/** - * Basic interface for sorting a vector in-place. That is, the sorting is performed by modifying the - * input vector, without creating a new sorted vector. - * - * @param the vector type. - */ -public interface InPlaceVectorSorter { - - /** - * Sort a vector in-place. - * - * @param vec the vector to sort. - * @param comparator the criteria for sort. - */ - void sortInPlace(V vec, VectorValueComparator comparator); -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java deleted file mode 100644 index b8ce3289d2889..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import java.util.stream.IntStream; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; - -/** - * Sorter for the indices of a vector. - * - * @param vector type. - */ -public class IndexSorter { - - /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort - * the data. - */ - public static final int CHANGE_ALGORITHM_THRESHOLD = 15; - - /** Comparator for vector indices. */ - private VectorValueComparator comparator; - - /** Vector indices to sort. */ - private IntVector indices; - - /** - * Sorts indices, by quick-sort. Suppose the vector is denoted by v. After calling this method, - * the following relations hold: v(indices[0]) <= v(indices[1]) <= ... - * - * @param vector the vector whose indices need to be sorted. - * @param indices the vector for storing the sorted indices. - * @param comparator the comparator to sort indices. - */ - public void sort(V vector, IntVector indices, VectorValueComparator comparator) { - comparator.attachVector(vector); - - this.indices = indices; - - IntStream.range(0, vector.getValueCount()).forEach(i -> indices.set(i, i)); - - this.comparator = comparator; - - quickSort(); - } - - private void quickSort() { - try (OffHeapIntStack rangeStack = new OffHeapIntStack(indices.getAllocator())) { - rangeStack.push(0); - rangeStack.push(indices.getValueCount() - 1); - - while (!rangeStack.isEmpty()) { - int high = rangeStack.pop(); - int low = rangeStack.pop(); - - if (low < high) { - if (high - low < CHANGE_ALGORITHM_THRESHOLD) { - InsertionSorter.insertionSort(indices, low, high, comparator); - continue; - } - - int mid = partition(low, high, indices, comparator); - - // push the larger part to stack first, - // to reduce the required stack size - if (high - mid < mid - low) { - rangeStack.push(low); - rangeStack.push(mid - 1); - - rangeStack.push(mid + 1); - rangeStack.push(high); - } else { - rangeStack.push(mid + 1); - rangeStack.push(high); - - rangeStack.push(low); - rangeStack.push(mid - 1); - } - } - } - } - } - - /** Select the pivot as the median of 3 samples. */ - static int choosePivot( - int low, int high, IntVector indices, VectorValueComparator comparator) { - // we need at least 3 items - if (high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD) { - return indices.get(low); - } - - int mid = low + (high - low) / 2; - - // find the median by at most 3 comparisons - int medianIdx; - if (comparator.compare(indices.get(low), indices.get(mid)) < 0) { - if (comparator.compare(indices.get(mid), indices.get(high)) < 0) { - medianIdx = mid; - } else { - if (comparator.compare(indices.get(low), indices.get(high)) < 0) { - medianIdx = high; - } else { - medianIdx = low; - } - } - } else { - if (comparator.compare(indices.get(mid), indices.get(high)) > 0) { - medianIdx = mid; - } else { - if (comparator.compare(indices.get(low), indices.get(high)) < 0) { - medianIdx = low; - } else { - medianIdx = high; - } - } - } - - // move the pivot to the low position, if necessary - if (medianIdx != low) { - int tmp = indices.get(medianIdx); - indices.set(medianIdx, indices.get(low)); - indices.set(low, tmp); - return tmp; - } else { - return indices.get(low); - } - } - - /** - * Partition a range of values in a vector into two parts, with elements in one part smaller than - * elements from the other part. The partition is based on the element indices, so it does not - * modify the underlying vector. - * - * @param low the lower bound of the range. - * @param high the upper bound of the range. - * @param indices vector element indices. - * @param comparator criteria for comparison. - * @param the vector type. - * @return the index of the split point. - */ - public static int partition( - int low, int high, IntVector indices, VectorValueComparator comparator) { - int pivotIndex = choosePivot(low, high, indices, comparator); - - while (low < high) { - while (low < high && comparator.compare(indices.get(high), pivotIndex) >= 0) { - high -= 1; - } - indices.set(low, indices.get(high)); - - while (low < high && comparator.compare(indices.get(low), pivotIndex) <= 0) { - low += 1; - } - indices.set(high, indices.get(low)); - } - - indices.set(low, pivotIndex); - return low; - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java deleted file mode 100644 index c058636d66d1e..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; - -/** Insertion sorter. */ -class InsertionSorter { - - /** - * Sorts the range of a vector by insertion sort. - * - * @param vector the vector to be sorted. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). - * @param buffer an extra buffer with capacity 1 to hold the current key. - * @param comparator the criteria for vector element comparison. - * @param the vector type. - */ - static void insertionSort( - V vector, int startIdx, int endIdx, VectorValueComparator comparator, V buffer) { - comparator.attachVectors(vector, buffer); - for (int i = startIdx; i <= endIdx; i++) { - buffer.copyFrom(i, 0, vector); - int j = i - 1; - while (j >= startIdx && comparator.compare(j, 0) > 0) { - vector.copyFrom(j, j + 1, vector); - j = j - 1; - } - vector.copyFrom(0, j + 1, buffer); - } - } - - /** - * Sorts the range of vector indices by insertion sort. - * - * @param indices the vector indices. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). - * @param comparator the criteria for vector element comparison. - * @param the vector type. - */ - static void insertionSort( - IntVector indices, int startIdx, int endIdx, VectorValueComparator comparator) { - for (int i = startIdx; i <= endIdx; i++) { - int key = indices.get(i); - int j = i - 1; - while (j >= startIdx && comparator.compare(indices.get(j), key) > 0) { - indices.set(j + 1, indices.get(j)); - j = j - 1; - } - indices.set(j + 1, key); - } - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java deleted file mode 100644 index ccb7bea4e2bd3..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.IntVector; - -/** An off heap implementation of stack with int elements. */ -class OffHeapIntStack implements AutoCloseable { - - private static final int INIT_SIZE = 128; - - private IntVector intVector; - - private int top = 0; - - public OffHeapIntStack(BufferAllocator allocator) { - intVector = new IntVector("int stack inner vector", allocator); - intVector.allocateNew(INIT_SIZE); - intVector.setValueCount(INIT_SIZE); - } - - public void push(int value) { - if (top == intVector.getValueCount()) { - int targetCapacity = intVector.getValueCount() * 2; - while (intVector.getValueCapacity() < targetCapacity) { - intVector.reAlloc(); - } - intVector.setValueCount(targetCapacity); - } - - intVector.set(top++, value); - } - - public int pop() { - return intVector.get(--top); - } - - public int getTop() { - return intVector.get(top - 1); - } - - public boolean isEmpty() { - return top == 0; - } - - public int getCount() { - return top; - } - - @Override - public void close() { - intVector.close(); - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java deleted file mode 100644 index b18e9b35d0895..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.vector.ValueVector; - -/** - * Basic interface for sorting a vector out-of-place. That is, the sorting is performed on a - * newly-created vector, and the original vector is not modified. - * - * @param the vector type. - */ -public interface OutOfPlaceVectorSorter { - - /** - * Sort a vector out-of-place. - * - * @param inVec the input vector. - * @param outVec the output vector, which has the same size as the input vector. - * @param comparator the criteria for sort. - */ - void sortOutOfPlace(V inVec, V outVec, VectorValueComparator comparator); -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java deleted file mode 100644 index 3fcfa5f8f215c..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.ValueVector; - -/** - * Stable sorter. It compares values like ordinary comparators. However, when values are equal, it - * breaks ties by the value indices. Therefore, sort algorithms using this comparator always produce - * stable sort results. - * - * @param type of the vector. - */ -public class StableVectorComparator extends VectorValueComparator { - - private final VectorValueComparator innerComparator; - - /** - * Constructs a stable comparator from a given comparator. - * - * @param innerComparator the comparator to convert to stable comparator.. - */ - public StableVectorComparator(VectorValueComparator innerComparator) { - this.innerComparator = innerComparator; - } - - @Override - public void attachVector(V vector) { - super.attachVector(vector); - innerComparator.attachVector(vector); - } - - @Override - public void attachVectors(V vector1, V vector2) { - Preconditions.checkArgument( - vector1 == vector2, - "Stable comparator only supports comparing values from the same vector"); - super.attachVectors(vector1, vector2); - innerComparator.attachVectors(vector1, vector2); - } - - @Override - public int compareNotNull(int index1, int index2) { - int result = innerComparator.compare(index1, index2); - return result != 0 ? result : index1 - index2; - } - - @Override - public VectorValueComparator createNew() { - return new StableVectorComparator(innerComparator.createNew()); - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java deleted file mode 100644 index a3aca83441d2f..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.IntVector; - -/** - * Default sorter for variable-width vectors. It is an out-of-place sort, with time complexity - * O(n*log(n)). - * - * @param vector type. - */ -public class VariableWidthOutOfPlaceVectorSorter - implements OutOfPlaceVectorSorter { - - protected IndexSorter indexSorter = new IndexSorter<>(); - - @Override - public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { - comparator.attachVector(srcVector); - - // buffers referenced in the sort - ArrowBuf srcValueBuffer = srcVector.getDataBuffer(); - ArrowBuf srcOffsetBuffer = srcVector.getOffsetBuffer(); - ArrowBuf dstValidityBuffer = dstVector.getValidityBuffer(); - ArrowBuf dstValueBuffer = dstVector.getDataBuffer(); - ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer(); - - // check buffer size - Preconditions.checkArgument( - dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " - + "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, - dstValidityBuffer.capacity()); - Preconditions.checkArgument( - dstOffsetBuffer.capacity() - >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), - "Not enough capacity for the offset buffer of the dst vector. " - + "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, - dstOffsetBuffer.capacity()); - long dataSize = - srcVector - .getOffsetBuffer() - .getInt(srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); - Preconditions.checkArgument( - dstValueBuffer.capacity() >= dataSize, - "No enough capacity for the data buffer of the dst vector. " - + "Expected capacity %s, actual capacity %s", - dataSize, - dstValueBuffer.capacity()); - - // sort value indices - try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { - sortedIndices.allocateNew(srcVector.getValueCount()); - sortedIndices.setValueCount(srcVector.getValueCount()); - indexSorter.sort(srcVector, sortedIndices, comparator); - - int dstOffset = 0; - dstOffsetBuffer.setInt(0, 0); - - // copy sorted values to the output vector - for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) { - int srcIndex = sortedIndices.get(dstIndex); - if (srcVector.isNull(srcIndex)) { - BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex); - } else { - BitVectorHelper.setBit(dstValidityBuffer, dstIndex); - int srcOffset = - srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); - int valueLength = - srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) - - srcOffset; - MemoryUtil.copyMemory( - srcValueBuffer.memoryAddress() + srcOffset, - dstValueBuffer.memoryAddress() + dstOffset, - valueLength); - dstOffset += valueLength; - } - dstOffsetBuffer.setInt( - (dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); - } - } - } -} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java deleted file mode 100644 index 0472f04109b1c..0000000000000 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import org.apache.arrow.vector.ValueVector; - -/** - * Compare two values at the given indices in the vectors. This is used for vector sorting. - * - * @param type of the vector. - */ -public abstract class VectorValueComparator { - - /** The first vector to compare. */ - protected V vector1; - - /** The second vector to compare. */ - protected V vector2; - - /** Width of the vector value. For variable-length vectors, this value makes no sense. */ - protected int valueWidth; - - private boolean checkNullsOnCompare = true; - - /** - * This value is true by default and re-computed when vectors are attached to the comparator. If - * both vectors cannot contain nulls then this value is {@code false} and calls to {@code - * compare(i1, i2)} are short-circuited to {@code compareNotNull(i1, i2)} thereby speeding up - * comparisons resulting in faster sorts etc. - */ - public boolean checkNullsOnCompare() { - return this.checkNullsOnCompare; - } - - /** Constructor for variable-width vectors. */ - protected VectorValueComparator() {} - - /** - * Constructor for fixed-width vectors. - * - * @param valueWidth the record width (in bytes). - */ - protected VectorValueComparator(int valueWidth) { - this.valueWidth = valueWidth; - } - - public int getValueWidth() { - return valueWidth; - } - - /** - * Attach both vectors to compare to the same input vector. - * - * @param vector the vector to attach. - */ - public void attachVector(V vector) { - attachVectors(vector, vector); - } - - /** - * Attach vectors to compare. - * - * @param vector1 the first vector to compare. - * @param vector2 the second vector to compare. - */ - public void attachVectors(V vector1, V vector2) { - this.vector1 = vector1; - this.vector2 = vector2; - - final boolean v1MayHaveNulls = mayHaveNulls(vector1); - final boolean v2MayHaveNulls = mayHaveNulls(vector2); - - this.checkNullsOnCompare = v1MayHaveNulls || v2MayHaveNulls; - } - - private boolean mayHaveNulls(V v) { - if (v.getValueCount() == 0) { - return true; - } - if (!v.getField().isNullable()) { - return false; - } - return v.getNullCount() > 0; - } - - /** - * Compare two values, given their indices. - * - * @param index1 index of the first value to compare. - * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if - * the first value is smaller; or 0, if both values are equal. - */ - public int compare(int index1, int index2) { - if (checkNullsOnCompare) { - boolean isNull1 = vector1.isNull(index1); - boolean isNull2 = vector2.isNull(index2); - - if (isNull1 || isNull2) { - if (isNull1 && isNull2) { - return 0; - } else if (isNull1) { - // null is smaller - return -1; - } else { - return 1; - } - } - } - return compareNotNull(index1, index2); - } - - /** - * Compare two values, given their indices. This is a fast path for comparing non-null values, so - * the caller must make sure that values at both indices are not null. - * - * @param index1 index of the first value to compare. - * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if - * the first value is smaller; or 0, if both values are equal. - */ - public abstract int compareNotNull(int index1, int index2); - - /** - * Creates a comparator of the same type. - * - * @return the newly created comparator. - */ - public abstract VectorValueComparator createNew(); -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java deleted file mode 100644 index 49a719bafa237..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.deduplicate; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link DeduplicationUtils}. */ -public class TestDeduplicationUtils { - - private static final int VECTOR_LENGTH = 100; - - private static final int REPETITION_COUNT = 3; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testDeduplicateFixedWidth() { - try (IntVector origVec = new IntVector("original vec", allocator); - IntVector dedupVec = new IntVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = - allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { - origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); - origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); - lengthVec.allocateNew(); - - // prepare data - for (int i = 0; i < VECTOR_LENGTH; i++) { - for (int j = 0; j < REPETITION_COUNT; j++) { - origVec.set(i * REPETITION_COUNT + j, i); - } - } - - DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals( - VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); - - DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); - assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertEquals(i, dedupVec.get(i)); - } - - DeduplicationUtils.populateRunLengths( - distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); - assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertEquals(REPETITION_COUNT, lengthVec.get(i)); - } - } - } - - @Test - public void testDeduplicateVariableWidth() { - try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = - allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { - origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); - origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); - lengthVec.allocateNew(); - - // prepare data - for (int i = 0; i < VECTOR_LENGTH; i++) { - String str = String.valueOf(i * i); - for (int j = 0; j < REPETITION_COUNT; j++) { - origVec.set(i * REPETITION_COUNT + j, str.getBytes(StandardCharsets.UTF_8)); - } - } - - DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals( - VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); - - DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); - assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(i * i).getBytes(StandardCharsets.UTF_8), dedupVec.get(i)); - } - - DeduplicationUtils.populateRunLengths( - distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); - assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertEquals(REPETITION_COUNT, lengthVec.get(i)); - } - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java deleted file mode 100644 index 7fd816ee9f9ed..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.deduplicate; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link VectorRunDeduplicator}. */ -public class TestVectorRunDeduplicator { - - private static final int VECTOR_LENGTH = 100; - - private static final int REPETITION_COUNT = 3; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testDeduplicateFixedWidth() { - try (IntVector origVec = new IntVector("original vec", allocator); - IntVector dedupVec = new IntVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { - origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); - origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); - lengthVec.allocateNew(); - - // prepare data - for (int i = 0; i < VECTOR_LENGTH; i++) { - for (int j = 0; j < REPETITION_COUNT; j++) { - origVec.set(i * REPETITION_COUNT + j, i); - } - } - - int distinctCount = deduplicator.getRunCount(); - assertEquals(VECTOR_LENGTH, distinctCount); - - dedupVec.allocateNew(distinctCount); - - deduplicator.populateDeduplicatedValues(dedupVec); - assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertEquals(i, dedupVec.get(i)); - } - - deduplicator.populateRunLengths(lengthVec); - assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertEquals(REPETITION_COUNT, lengthVec.get(i)); - } - } - } - - @Test - public void testDeduplicateVariableWidth() { - try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { - origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); - origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); - lengthVec.allocateNew(); - - // prepare data - for (int i = 0; i < VECTOR_LENGTH; i++) { - String str = String.valueOf(i * i); - for (int j = 0; j < REPETITION_COUNT; j++) { - origVec.set(i * REPETITION_COUNT + j, str.getBytes(StandardCharsets.UTF_8)); - } - } - - int distinctCount = deduplicator.getRunCount(); - assertEquals(VECTOR_LENGTH, distinctCount); - - dedupVec.allocateNew(distinctCount * 10L, distinctCount); - - deduplicator.populateDeduplicatedValues(dedupVec); - assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(i * i).getBytes(StandardCharsets.UTF_8), dedupVec.get(i)); - } - - deduplicator.populateRunLengths(lengthVec); - assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertEquals(REPETITION_COUNT, lengthVec.get(i)); - } - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java deleted file mode 100644 index 6e438c7ee2179..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Objects; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link HashTableBasedDictionaryBuilder}. */ -public class TestHashTableBasedDictionaryBuilder { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testBuildVariableWidthDictionaryWithNull() { - try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { - - vec.allocateNew(100, 10); - vec.setValueCount(10); - - dictionary.allocateNew(); - - // fill data - vec.set(0, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(1, "abc".getBytes(StandardCharsets.UTF_8)); - vec.setNull(2); - vec.set(3, "world".getBytes(StandardCharsets.UTF_8)); - vec.set(4, "12".getBytes(StandardCharsets.UTF_8)); - vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8)); - vec.setNull(6); - vec.set(7, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); - vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - - HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); - - int result = dictionaryBuilder.addValues(vec); - - assertEquals(7, result); - assertEquals(7, dictionary.getValueCount()); - - assertEquals( - "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals( - "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); - assertNull(dictionary.get(2)); - assertEquals( - "world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals( - "12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals( - "dictionary", - new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals( - "good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); - } - } - - @Test - public void testBuildVariableWidthDictionaryWithoutNull() { - try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { - - vec.allocateNew(100, 10); - vec.setValueCount(10); - - dictionary.allocateNew(); - - // fill data - vec.set(0, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(1, "abc".getBytes(StandardCharsets.UTF_8)); - vec.setNull(2); - vec.set(3, "world".getBytes(StandardCharsets.UTF_8)); - vec.set(4, "12".getBytes(StandardCharsets.UTF_8)); - vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8)); - vec.setNull(6); - vec.set(7, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); - vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - - HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); - - int result = dictionaryBuilder.addValues(vec); - - assertEquals(6, result); - assertEquals(6, dictionary.getValueCount()); - - assertEquals( - "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals( - "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals( - "world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals( - "12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals( - "dictionary", - new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals( - "good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - } - } - - @Test - public void testBuildFixedWidthDictionaryWithNull() { - try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - dictionary.allocateNew(); - - // fill data - vec.set(0, 4); - vec.set(1, 8); - vec.set(2, 32); - vec.set(3, 8); - vec.set(4, 16); - vec.set(5, 32); - vec.setNull(6); - vec.set(7, 4); - vec.set(8, 4); - vec.setNull(9); - - HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); - - int result = dictionaryBuilder.addValues(vec); - - assertEquals(5, result); - assertEquals(5, dictionary.getValueCount()); - - assertEquals(4, dictionary.get(0)); - assertEquals(8, dictionary.get(1)); - assertEquals(32, dictionary.get(2)); - assertEquals(16, dictionary.get(3)); - assertTrue(dictionary.isNull(4)); - } - } - - @Test - public void testBuildFixedWidthDictionaryWithoutNull() { - try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - dictionary.allocateNew(); - - // fill data - vec.set(0, 4); - vec.set(1, 8); - vec.set(2, 32); - vec.set(3, 8); - vec.set(4, 16); - vec.set(5, 32); - vec.setNull(6); - vec.set(7, 4); - vec.set(8, 4); - vec.setNull(9); - - HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); - - int result = dictionaryBuilder.addValues(vec); - - assertEquals(4, result); - assertEquals(4, dictionary.getValueCount()); - - assertEquals(4, dictionary.get(0)); - assertEquals(8, dictionary.get(1)); - assertEquals(32, dictionary.get(2)); - assertEquals(16, dictionary.get(3)); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java deleted file mode 100644 index 0f5ea463bd178..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Random; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link HashTableDictionaryEncoder}. */ -public class TestHashTableDictionaryEncoder { - - private final int VECTOR_LENGTH = 50; - - private final int DICTIONARY_LENGTH = 10; - - private BufferAllocator allocator; - - byte[] zero = "000".getBytes(StandardCharsets.UTF_8); - byte[] one = "111".getBytes(StandardCharsets.UTF_8); - byte[] two = "222".getBytes(StandardCharsets.UTF_8); - - byte[][] data = new byte[][] {zero, one, two}; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testEncodeAndDecode() { - Random random = new Random(); - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary - dictionary.allocateNew(); - for (int i = 0; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // set up raw vector - rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH; - rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8)); - } - rawVector.setValueCount(VECTOR_LENGTH); - - HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, false); - - // perform encoding - encodedVector.allocateNew(); - encoder.encode(rawVector, encodedVector); - - // verify encoding results - assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals( - rawVector.get(i), - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); - } - - // perform decoding - Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = - (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { - - // verify decoding results - assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals( - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), - decodedVector.get(i)); - } - } - } - } - - @Test - public void testEncodeAndDecodeWithNull() { - Random random = new Random(); - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary - dictionary.allocateNew(); - dictionary.setNull(0); - for (int i = 1; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // set up raw vector - rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - rawVector.setNull(i); - } else { - int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1; - rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8)); - } - } - rawVector.setValueCount(VECTOR_LENGTH); - - HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); - - // perform encoding - encodedVector.allocateNew(); - encoder.encode(rawVector, encodedVector); - - // verify encoding results - assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - assertEquals(0, encodedVector.get(i)); - } else { - assertArrayEquals( - rawVector.get(i), - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); - } - } - - // perform decoding - Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = - (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { - // verify decoding results - assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - assertTrue(decodedVector.isNull(i)); - } else { - assertArrayEquals( - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), - decodedVector.get(i)); - } - } - } - } - } - - @Test - public void testEncodeNullWithoutNullInDictionary() { - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary, with no null in it. - dictionary.allocateNew(); - for (int i = 0; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // the vector to encode has a null inside. - rawVector.allocateNew(1); - rawVector.setNull(0); - rawVector.setValueCount(1); - - encodedVector.allocateNew(); - - HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); - - // the encoder should encode null, but no null in the dictionary, - // so an exception should be thrown. - assertThrows( - IllegalArgumentException.class, - () -> { - encoder.encode(rawVector, encodedVector); - }); - } - } - - @Test - public void testEncodeStrings() { - // Create a new value vector - try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { - - vector.allocateNew(512, 5); - encoded.allocateNew(); - - // set some values - vector.setSafe(0, zero, 0, zero.length); - vector.setSafe(1, one, 0, one.length); - vector.setSafe(2, one, 0, one.length); - vector.setSafe(3, two, 0, two.length); - vector.setSafe(4, zero, 0, zero.length); - vector.setValueCount(5); - - // set some dictionary values - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); - encoder.encode(vector, encoded); - - // verify indices - assertEquals(5, encoded.getValueCount()); - assertEquals(0, encoded.get(0)); - assertEquals(1, encoded.get(1)); - assertEquals(1, encoded.get(2)); - assertEquals(2, encoded.get(3)); - assertEquals(0, encoded.get(4)); - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) { - - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - - @Test - public void testEncodeLargeVector() { - // Create a new value vector - try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { - vector.allocateNew(); - encoded.allocateNew(); - - int count = 10000; - - for (int i = 0; i < 10000; ++i) { - vector.setSafe(i, data[i % 3], 0, data[i % 3].length); - } - vector.setValueCount(count); - - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); - encoder.encode(vector, encoded); - - assertEquals(count, encoded.getValueCount()); - for (int i = 0; i < count; ++i) { - assertEquals(i % 3, encoded.get(i)); - } - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < count; ++i) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - - @Test - public void testEncodeBinaryVector() { - // Create a new value vector - try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { - vector.allocateNew(512, 5); - vector.allocateNew(); - encoded.allocateNew(); - - // set some values - vector.setSafe(0, zero, 0, zero.length); - vector.setSafe(1, one, 0, one.length); - vector.setSafe(2, one, 0, one.length); - vector.setSafe(3, two, 0, two.length); - vector.setSafe(4, zero, 0, zero.length); - vector.setValueCount(5); - - // set some dictionary values - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); - encoder.encode(vector, encoded); - - assertEquals(5, encoded.getValueCount()); - assertEquals(0, encoded.get(0)); - assertEquals(1, encoded.get(1)); - assertEquals(1, encoded.get(2)); - assertEquals(2, encoded.get(3)); - assertEquals(0, encoded.get(4)); - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dict)) { - - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i))); - } - } - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java deleted file mode 100644 index 9a8957ddbd089..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Random; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link LinearDictionaryEncoder}. */ -public class TestLinearDictionaryEncoder { - - private final int VECTOR_LENGTH = 50; - - private final int DICTIONARY_LENGTH = 10; - - private BufferAllocator allocator; - - byte[] zero = "000".getBytes(StandardCharsets.UTF_8); - byte[] one = "111".getBytes(StandardCharsets.UTF_8); - byte[] two = "222".getBytes(StandardCharsets.UTF_8); - - byte[][] data = new byte[][] {zero, one, two}; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testEncodeAndDecode() { - Random random = new Random(); - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary - dictionary.allocateNew(); - for (int i = 0; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // set up raw vector - rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH; - rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8)); - } - rawVector.setValueCount(VECTOR_LENGTH); - - LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, false); - - // perform encoding - encodedVector.allocateNew(); - encoder.encode(rawVector, encodedVector); - - // verify encoding results - assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals( - rawVector.get(i), - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); - } - - // perform decoding - Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = - (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { - - // verify decoding results - assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals( - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), - decodedVector.get(i)); - } - } - } - } - - @Test - public void testEncodeAndDecodeWithNull() { - Random random = new Random(); - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary - dictionary.allocateNew(); - dictionary.setNull(0); - for (int i = 1; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // set up raw vector - rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - rawVector.setNull(i); - } else { - int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1; - rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8)); - } - } - rawVector.setValueCount(VECTOR_LENGTH); - - LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); - - // perform encoding - encodedVector.allocateNew(); - encoder.encode(rawVector, encodedVector); - - // verify encoding results - assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - assertEquals(0, encodedVector.get(i)); - } else { - assertArrayEquals( - rawVector.get(i), - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); - } - } - - // perform decoding - Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = - (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { - - // verify decoding results - assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - assertTrue(decodedVector.isNull(i)); - } else { - assertArrayEquals( - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), - decodedVector.get(i)); - } - } - } - } - } - - @Test - public void testEncodeNullWithoutNullInDictionary() { - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary, with no null in it. - dictionary.allocateNew(); - for (int i = 0; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // the vector to encode has a null inside. - rawVector.allocateNew(1); - rawVector.setNull(0); - rawVector.setValueCount(1); - - encodedVector.allocateNew(); - - LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); - - // the encoder should encode null, but no null in the dictionary, - // so an exception should be thrown. - assertThrows( - IllegalArgumentException.class, - () -> { - encoder.encode(rawVector, encodedVector); - }); - } - } - - @Test - public void testEncodeStrings() { - // Create a new value vector - try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { - - vector.allocateNew(512, 5); - encoded.allocateNew(); - - // set some values - vector.setSafe(0, zero, 0, zero.length); - vector.setSafe(1, one, 0, one.length); - vector.setSafe(2, one, 0, one.length); - vector.setSafe(3, two, 0, two.length); - vector.setSafe(4, zero, 0, zero.length); - vector.setValueCount(5); - - // set some dictionary values - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); - encoder.encode(vector, encoded); - - // verify indices - assertEquals(5, encoded.getValueCount()); - assertEquals(0, encoded.get(0)); - assertEquals(1, encoded.get(1)); - assertEquals(1, encoded.get(2)); - assertEquals(2, encoded.get(3)); - assertEquals(0, encoded.get(4)); - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) { - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - - @Test - public void testEncodeLargeVector() { - // Create a new value vector - try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { - vector.allocateNew(); - encoded.allocateNew(); - - int count = 10000; - - for (int i = 0; i < 10000; ++i) { - vector.setSafe(i, data[i % 3], 0, data[i % 3].length); - } - vector.setValueCount(count); - - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); - encoder.encode(vector, encoded); - - assertEquals(count, encoded.getValueCount()); - for (int i = 0; i < count; ++i) { - assertEquals(i % 3, encoded.get(i)); - } - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < count; ++i) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - - @Test - public void testEncodeBinaryVector() { - // Create a new value vector - try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { - vector.allocateNew(512, 5); - vector.allocateNew(); - encoded.allocateNew(); - - // set some values - vector.setSafe(0, zero, 0, zero.length); - vector.setSafe(1, one, 0, one.length); - vector.setSafe(2, one, 0, one.length); - vector.setSafe(3, two, 0, two.length); - vector.setSafe(4, zero, 0, zero.length); - vector.setValueCount(5); - - // set some dictionary values - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); - encoder.encode(vector, encoded); - - assertEquals(5, encoded.getValueCount()); - assertEquals(0, encoded.get(0)); - assertEquals(1, encoded.get(1)); - assertEquals(1, encoded.get(2)); - assertEquals(2, encoded.get(3)); - assertEquals(0, encoded.get(4)); - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dict)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i))); - } - } - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java deleted file mode 100644 index 063c740a12a87..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Random; -import org.apache.arrow.algorithm.sort.DefaultVectorComparators; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link SearchDictionaryEncoder}. */ -public class TestSearchDictionaryEncoder { - - private final int VECTOR_LENGTH = 50; - - private final int DICTIONARY_LENGTH = 10; - - private BufferAllocator allocator; - - byte[] zero = "000".getBytes(StandardCharsets.UTF_8); - byte[] one = "111".getBytes(StandardCharsets.UTF_8); - byte[] two = "222".getBytes(StandardCharsets.UTF_8); - - byte[][] data = new byte[][] {zero, one, two}; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testEncodeAndDecode() { - Random random = new Random(); - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary - dictionary.allocateNew(); - for (int i = 0; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // set up raw vector - rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH; - rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8)); - } - rawVector.setValueCount(VECTOR_LENGTH); - - SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); - - // perform encoding - encodedVector.allocateNew(); - encoder.encode(rawVector, encodedVector); - - // verify encoding results - assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals( - rawVector.get(i), - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); - } - - // perform decoding - Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = - (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { - - // verify decoding results - assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals( - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), - decodedVector.get(i)); - } - } - } - } - - @Test - public void testEncodeAndDecodeWithNull() { - Random random = new Random(); - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary - dictionary.allocateNew(); - dictionary.setNull(0); - for (int i = 1; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // set up raw vector - rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - rawVector.setNull(i); - } else { - int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1; - rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8)); - } - } - rawVector.setValueCount(VECTOR_LENGTH); - - SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); - - // perform encoding - encodedVector.allocateNew(); - encoder.encode(rawVector, encodedVector); - - // verify encoding results - assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - assertEquals(0, encodedVector.get(i)); - } else { - assertArrayEquals( - rawVector.get(i), - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); - } - } - - // perform decoding - Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = - (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { - - // verify decoding results - assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 10 == 0) { - assertTrue(decodedVector.isNull(i)); - } else { - assertArrayEquals( - String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), - decodedVector.get(i)); - } - } - } - } - } - - @Test - public void testEncodeNullWithoutNullInDictionary() { - try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { - - // set up dictionary, with no null in it. - dictionary.allocateNew(); - for (int i = 0; i < DICTIONARY_LENGTH; i++) { - // encode "i" as i - dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionary.setValueCount(DICTIONARY_LENGTH); - - // the vector to encode has a null inside. - rawVector.allocateNew(1); - rawVector.setNull(0); - rawVector.setValueCount(1); - - encodedVector.allocateNew(); - - SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); - - // the encoder should encode null, but no null in the dictionary, - // so an exception should be thrown. - assertThrows( - IllegalArgumentException.class, - () -> { - encoder.encode(rawVector, encodedVector); - }); - } - } - - @Test - public void testEncodeStrings() { - // Create a new value vector - try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { - - vector.allocateNew(512, 5); - encoded.allocateNew(); - - // set some values - vector.setSafe(0, zero, 0, zero.length); - vector.setSafe(1, one, 0, one.length); - vector.setSafe(2, one, 0, one.length); - vector.setSafe(3, two, 0, two.length); - vector.setSafe(4, zero, 0, zero.length); - vector.setValueCount(5); - - // set some dictionary values - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); - encoder.encode(vector, encoded); - - // verify indices - assertEquals(5, encoded.getValueCount()); - assertEquals(0, encoded.get(0)); - assertEquals(1, encoded.get(1)); - assertEquals(1, encoded.get(2)); - assertEquals(2, encoded.get(3)); - assertEquals(0, encoded.get(4)); - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) { - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - - @Test - public void testEncodeLargeVector() { - // Create a new value vector - try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { - vector.allocateNew(); - encoded.allocateNew(); - - int count = 10000; - - for (int i = 0; i < 10000; ++i) { - vector.setSafe(i, data[i % 3], 0, data[i % 3].length); - } - vector.setValueCount(count); - - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); - encoder.encode(vector, encoded); - - assertEquals(count, encoded.getValueCount()); - for (int i = 0; i < count; ++i) { - assertEquals(i % 3, encoded.get(i)); - } - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decoded = (VarCharVector) DictionaryEncoder.decode(encoded, dict)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < count; ++i) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - - @Test - public void testEncodeBinaryVector() { - // Create a new value vector - try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { - vector.allocateNew(512, 5); - vector.allocateNew(); - encoded.allocateNew(); - - // set some values - vector.setSafe(0, zero, 0, zero.length); - vector.setSafe(1, one, 0, one.length); - vector.setSafe(2, one, 0, one.length); - vector.setSafe(3, two, 0, two.length); - vector.setSafe(4, zero, 0, zero.length); - vector.setValueCount(5); - - // set some dictionary values - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, one.length); - dictionaryVector.setSafe(1, one, 0, two.length); - dictionaryVector.setSafe(2, two, 0, zero.length); - dictionaryVector.setValueCount(3); - - SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); - encoder.encode(vector, encoded); - - assertEquals(5, encoded.getValueCount()); - assertEquals(0, encoded.get(0)); - assertEquals(1, encoded.get(1)); - assertEquals(1, encoded.get(2)); - assertEquals(2, encoded.get(3)); - assertEquals(0, encoded.get(4)); - - // now run through the decoder and verify we get the original back - Dictionary dict = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dict)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i))); - } - } - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java deleted file mode 100644 index 57e1de9497eac..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.dictionary; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Objects; -import org.apache.arrow.algorithm.sort.DefaultVectorComparators; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link SearchTreeBasedDictionaryBuilder}. */ -public class TestSearchTreeBasedDictionaryBuilder { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testBuildVariableWidthDictionaryWithNull() { - try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { - - vec.allocateNew(100, 10); - vec.setValueCount(10); - - dictionary.allocateNew(); - sortedDictionary.allocateNew(); - - // fill data - vec.set(0, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(1, "abc".getBytes(StandardCharsets.UTF_8)); - vec.setNull(2); - vec.set(3, "world".getBytes(StandardCharsets.UTF_8)); - vec.set(4, "12".getBytes(StandardCharsets.UTF_8)); - vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8)); - vec.setNull(6); - vec.set(7, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); - vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); - - int result = dictionaryBuilder.addValues(vec); - - assertEquals(7, result); - assertEquals(7, dictionary.getValueCount()); - - dictionaryBuilder.populateSortedDictionary(sortedDictionary); - - assertTrue(sortedDictionary.isNull(0)); - assertEquals( - "12", - new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals( - "abc", - new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals( - "dictionary", - new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals( - "good", - new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals( - "hello", - new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals( - "world", - new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); - } - } - - @Test - public void testBuildVariableWidthDictionaryWithoutNull() { - try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { - - vec.allocateNew(100, 10); - vec.setValueCount(10); - - dictionary.allocateNew(); - sortedDictionary.allocateNew(); - - // fill data - vec.set(0, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(1, "abc".getBytes(StandardCharsets.UTF_8)); - vec.setNull(2); - vec.set(3, "world".getBytes(StandardCharsets.UTF_8)); - vec.set(4, "12".getBytes(StandardCharsets.UTF_8)); - vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8)); - vec.setNull(6); - vec.set(7, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); - vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); - - int result = dictionaryBuilder.addValues(vec); - - assertEquals(6, result); - assertEquals(6, dictionary.getValueCount()); - - dictionaryBuilder.populateSortedDictionary(sortedDictionary); - - assertEquals( - "12", - new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals( - "abc", - new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals( - "dictionary", - new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals( - "good", - new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals( - "hello", - new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals( - "world", - new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); - } - } - - @Test - public void testBuildFixedWidthDictionaryWithNull() { - try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - dictionary.allocateNew(); - sortedDictionary.allocateNew(); - - // fill data - vec.set(0, 4); - vec.set(1, 8); - vec.set(2, 32); - vec.set(3, 8); - vec.set(4, 16); - vec.set(5, 32); - vec.setNull(6); - vec.set(7, 4); - vec.set(8, 4); - vec.setNull(9); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); - - int result = dictionaryBuilder.addValues(vec); - - assertEquals(5, result); - assertEquals(5, dictionary.getValueCount()); - - dictionaryBuilder.populateSortedDictionary(sortedDictionary); - - assertTrue(sortedDictionary.isNull(0)); - assertEquals(4, sortedDictionary.get(1)); - assertEquals(8, sortedDictionary.get(2)); - assertEquals(16, sortedDictionary.get(3)); - assertEquals(32, sortedDictionary.get(4)); - } - } - - @Test - public void testBuildFixedWidthDictionaryWithoutNull() { - try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - dictionary.allocateNew(); - sortedDictionary.allocateNew(); - - // fill data - vec.set(0, 4); - vec.set(1, 8); - vec.set(2, 32); - vec.set(3, 8); - vec.set(4, 16); - vec.set(5, 32); - vec.setNull(6); - vec.set(7, 4); - vec.set(8, 4); - vec.setNull(9); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); - - int result = dictionaryBuilder.addValues(vec); - - assertEquals(4, result); - assertEquals(4, dictionary.getValueCount()); - - dictionaryBuilder.populateSortedDictionary(sortedDictionary); - - assertEquals(4, sortedDictionary.get(0)); - assertEquals(8, sortedDictionary.get(1)); - assertEquals(16, sortedDictionary.get(2)); - assertEquals(32, sortedDictionary.get(3)); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java deleted file mode 100644 index e1b255f4f4d81..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.misc; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link PartialSumUtils}. */ -public class TestPartialSumUtils { - - private static final int PARTIAL_SUM_VECTOR_LENGTH = 101; - - private static final int DELTA_VECTOR_LENGTH = 100; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testToPartialSumVector() { - try (IntVector delta = new IntVector("delta", allocator); - IntVector partialSum = new IntVector("partial sum", allocator)) { - delta.allocateNew(DELTA_VECTOR_LENGTH); - delta.setValueCount(DELTA_VECTOR_LENGTH); - - partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH); - - // populate delta vector - for (int i = 0; i < delta.getValueCount(); i++) { - delta.set(i, 3); - } - - final long sumBase = 10; - PartialSumUtils.toPartialSumVector(delta, partialSum, sumBase); - - // verify results - assertEquals(PARTIAL_SUM_VECTOR_LENGTH, partialSum.getValueCount()); - for (int i = 0; i < partialSum.getValueCount(); i++) { - assertEquals(i * 3L + sumBase, partialSum.get(i)); - } - } - } - - @Test - public void testToDeltaVector() { - try (IntVector partialSum = new IntVector("partial sum", allocator); - IntVector delta = new IntVector("delta", allocator)) { - partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH); - partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH); - - delta.allocateNew(DELTA_VECTOR_LENGTH); - - // populate delta vector - final int sumBase = 10; - for (int i = 0; i < partialSum.getValueCount(); i++) { - partialSum.set(i, sumBase + 3 * i); - } - - PartialSumUtils.toDeltaVector(partialSum, delta); - - // verify results - assertEquals(DELTA_VECTOR_LENGTH, delta.getValueCount()); - for (int i = 0; i < delta.getValueCount(); i++) { - assertEquals(3, delta.get(i)); - } - } - } - - @Test - public void testFindPositionInPartialSumVector() { - try (IntVector partialSum = new IntVector("partial sum", allocator)) { - partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH); - partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH); - - // populate delta vector - final int sumBase = 10; - for (int i = 0; i < partialSum.getValueCount(); i++) { - partialSum.set(i, sumBase + 3 * i); - } - - // search and verify results - for (int i = 0; i < PARTIAL_SUM_VECTOR_LENGTH - 1; i++) { - assertEquals( - i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); - } - } - } - - @Test - public void testFindPositionInPartialSumVectorNegative() { - try (IntVector partialSum = new IntVector("partial sum", allocator)) { - partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH); - partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH); - - // populate delta vector - final int sumBase = 10; - for (int i = 0; i < partialSum.getValueCount(); i++) { - partialSum.set(i, sumBase + 3 * i); - } - - // search and verify results - assertEquals(0, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase)); - assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase - 1)); - assertEquals( - -1, - PartialSumUtils.findPositionInPartialSumVector( - partialSum, sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java deleted file mode 100644 index 0b70cfd297e03..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.rank; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.algorithm.sort.DefaultVectorComparators; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. */ -public class TestVectorRank { - - private BufferAllocator allocator; - - private static final int VECTOR_LENGTH = 10; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testFixedWidthRank() { - VectorRank rank = new VectorRank<>(allocator); - try (IntVector vector = new IntVector("int vec", allocator)) { - vector.allocateNew(VECTOR_LENGTH); - vector.setValueCount(VECTOR_LENGTH); - - vector.set(0, 1); - vector.set(1, 5); - vector.set(2, 3); - vector.set(3, 7); - vector.set(4, 9); - vector.set(5, 8); - vector.set(6, 2); - vector.set(7, 0); - vector.set(8, 4); - vector.set(9, 6); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - assertEquals(7, rank.indexAtRank(vector, comparator, 0)); - assertEquals(0, rank.indexAtRank(vector, comparator, 1)); - assertEquals(6, rank.indexAtRank(vector, comparator, 2)); - assertEquals(2, rank.indexAtRank(vector, comparator, 3)); - assertEquals(8, rank.indexAtRank(vector, comparator, 4)); - assertEquals(1, rank.indexAtRank(vector, comparator, 5)); - assertEquals(9, rank.indexAtRank(vector, comparator, 6)); - assertEquals(3, rank.indexAtRank(vector, comparator, 7)); - assertEquals(5, rank.indexAtRank(vector, comparator, 8)); - assertEquals(4, rank.indexAtRank(vector, comparator, 9)); - } - } - - @Test - public void testVariableWidthRank() { - VectorRank rank = new VectorRank<>(allocator); - try (VarCharVector vector = new VarCharVector("varchar vec", allocator)) { - vector.allocateNew(VECTOR_LENGTH * 5, VECTOR_LENGTH); - vector.setValueCount(VECTOR_LENGTH); - - vector.set(0, String.valueOf(1).getBytes(StandardCharsets.UTF_8)); - vector.set(1, String.valueOf(5).getBytes(StandardCharsets.UTF_8)); - vector.set(2, String.valueOf(3).getBytes(StandardCharsets.UTF_8)); - vector.set(3, String.valueOf(7).getBytes(StandardCharsets.UTF_8)); - vector.set(4, String.valueOf(9).getBytes(StandardCharsets.UTF_8)); - vector.set(5, String.valueOf(8).getBytes(StandardCharsets.UTF_8)); - vector.set(6, String.valueOf(2).getBytes(StandardCharsets.UTF_8)); - vector.set(7, String.valueOf(0).getBytes(StandardCharsets.UTF_8)); - vector.set(8, String.valueOf(4).getBytes(StandardCharsets.UTF_8)); - vector.set(9, String.valueOf(6).getBytes(StandardCharsets.UTF_8)); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - - assertEquals(7, rank.indexAtRank(vector, comparator, 0)); - assertEquals(0, rank.indexAtRank(vector, comparator, 1)); - assertEquals(6, rank.indexAtRank(vector, comparator, 2)); - assertEquals(2, rank.indexAtRank(vector, comparator, 3)); - assertEquals(8, rank.indexAtRank(vector, comparator, 4)); - assertEquals(1, rank.indexAtRank(vector, comparator, 5)); - assertEquals(9, rank.indexAtRank(vector, comparator, 6)); - assertEquals(3, rank.indexAtRank(vector, comparator, 7)); - assertEquals(5, rank.indexAtRank(vector, comparator, 8)); - assertEquals(4, rank.indexAtRank(vector, comparator, 9)); - } - } - - @Test - public void testRankNegative() { - VectorRank rank = new VectorRank<>(allocator); - try (IntVector vector = new IntVector("int vec", allocator)) { - vector.allocateNew(VECTOR_LENGTH); - vector.setValueCount(VECTOR_LENGTH); - - vector.set(0, 1); - vector.set(1, 5); - vector.set(2, 3); - vector.set(3, 7); - vector.set(4, 9); - vector.set(5, 8); - vector.set(6, 2); - vector.set(7, 0); - vector.set(8, 4); - vector.set(9, 6); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - - assertThrows( - IllegalArgumentException.class, - () -> { - rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); - }); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java deleted file mode 100644 index 24a9a6ed694c7..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.search; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.stream.Stream; -import org.apache.arrow.algorithm.sort.DefaultVectorComparators; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test cases for {@link ParallelSearcher}. */ -public class TestParallelSearcher { - - private enum ComparatorType { - EqualityComparator, - OrderingComparator; - } - - private static final int VECTOR_LENGTH = 10000; - - private BufferAllocator allocator; - - private ExecutorService threadPool; - - public static Stream getComparatorName() { - List params = new ArrayList<>(); - int[] threadCounts = {1, 2, 5, 10, 20, 50}; - for (ComparatorType type : ComparatorType.values()) { - for (int count : threadCounts) { - params.add(Arguments.of(type, count)); - } - } - return params.stream(); - } - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - if (threadPool != null) { - threadPool.shutdown(); - } - } - - @ParameterizedTest - @MethodSource("getComparatorName") - public void testParallelIntSearch(ComparatorType comparatorType, int threadCount) - throws ExecutionException, InterruptedException { - threadPool = Executors.newFixedThreadPool(threadCount); - try (IntVector targetVector = new IntVector("targetVector", allocator); - IntVector keyVector = new IntVector("keyVector", allocator)) { - targetVector.allocateNew(VECTOR_LENGTH); - keyVector.allocateNew(VECTOR_LENGTH); - - // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = - comparatorType == ComparatorType.EqualityComparator - ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - targetVector.set(i, i); - keyVector.set(i, i * 2); - } - targetVector.setValueCount(VECTOR_LENGTH); - keyVector.setValueCount(VECTOR_LENGTH); - - ParallelSearcher searcher = - new ParallelSearcher<>(targetVector, threadPool, threadCount); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = - comparator == null - ? searcher.search(keyVector, i) - : searcher.search(keyVector, i, comparator); - if (i * 2 < VECTOR_LENGTH) { - assertEquals(i * 2, pos); - } else { - assertEquals(-1, pos); - } - } - } - } - - @ParameterizedTest - @MethodSource("getComparatorName") - public void testParallelStringSearch(ComparatorType comparatorType, int threadCount) - throws ExecutionException, InterruptedException { - threadPool = Executors.newFixedThreadPool(threadCount); - try (VarCharVector targetVector = new VarCharVector("targetVector", allocator); - VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { - targetVector.allocateNew(VECTOR_LENGTH); - keyVector.allocateNew(VECTOR_LENGTH); - - // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = - comparatorType == ComparatorType.EqualityComparator - ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - targetVector.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - keyVector.setSafe(i, String.valueOf(i * 2).getBytes(StandardCharsets.UTF_8)); - } - targetVector.setValueCount(VECTOR_LENGTH); - keyVector.setValueCount(VECTOR_LENGTH); - - ParallelSearcher searcher = - new ParallelSearcher<>(targetVector, threadPool, threadCount); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = - comparator == null - ? searcher.search(keyVector, i) - : searcher.search(keyVector, i, comparator); - if (i * 2 < VECTOR_LENGTH) { - assertEquals(i * 2, pos); - } else { - assertEquals(-1, pos); - } - } - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java deleted file mode 100644 index 922ec6cbeeb82..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.search; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.stream.Stream; -import org.apache.arrow.algorithm.sort.DefaultVectorComparators; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test cases for {@link VectorRangeSearcher}. */ -public class TestVectorRangeSearcher { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @ParameterizedTest - @MethodSource("getRepeat") - public void testGetLowerBounds(int repeat) { - final int maxValue = 100; - try (IntVector intVector = new IntVector("int vec", allocator)) { - // allocate vector - intVector.allocateNew(maxValue * repeat); - intVector.setValueCount(maxValue * repeat); - - // prepare data in sorted order - // each value is repeated some times - for (int i = 0; i < maxValue; i++) { - for (int j = 0; j < repeat; j++) { - if (i == 0) { - intVector.setNull(i * repeat + j); - } else { - intVector.set(i * repeat + j, i); - } - } - } - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(intVector); - for (int i = 0; i < maxValue; i++) { - int result = - VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); - assertEquals(i * ((long) repeat), result); - } - } - } - - @ParameterizedTest - @MethodSource("getRepeat") - public void testGetLowerBoundsNegative(int repeat) { - final int maxValue = 100; - try (IntVector intVector = new IntVector("int vec", allocator); - IntVector negVector = new IntVector("neg vec", allocator)) { - // allocate vector - intVector.allocateNew(maxValue * repeat); - intVector.setValueCount(maxValue * repeat); - - negVector.allocateNew(maxValue); - negVector.setValueCount(maxValue); - - // prepare data in sorted order - // each value is repeated some times - for (int i = 0; i < maxValue; i++) { - for (int j = 0; j < repeat; j++) { - if (i == 0) { - intVector.setNull(i * repeat + j); - } else { - intVector.set(i * repeat + j, i); - } - } - negVector.set(i, maxValue + i); - } - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(intVector); - for (int i = 0; i < maxValue; i++) { - int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, negVector, i); - assertEquals(-1, result); - } - } - } - - @ParameterizedTest - @MethodSource("getRepeat") - public void testGetUpperBounds(int repeat) { - final int maxValue = 100; - try (IntVector intVector = new IntVector("int vec", allocator)) { - // allocate vector - intVector.allocateNew(maxValue * repeat); - intVector.setValueCount(maxValue * repeat); - - // prepare data in sorted order - // each value is repeated some times - for (int i = 0; i < maxValue; i++) { - for (int j = 0; j < repeat; j++) { - if (i == 0) { - intVector.setNull(i * repeat + j); - } else { - intVector.set(i * repeat + j, i); - } - } - } - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(intVector); - for (int i = 0; i < maxValue; i++) { - int result = VectorRangeSearcher.getLastMatch(intVector, comparator, intVector, i * repeat); - assertEquals((i + 1) * repeat - 1, result); - } - } - } - - @ParameterizedTest - @MethodSource("getRepeat") - public void testGetUpperBoundsNegative(int repeat) { - final int maxValue = 100; - try (IntVector intVector = new IntVector("int vec", allocator); - IntVector negVector = new IntVector("neg vec", allocator)) { - // allocate vector - intVector.allocateNew(maxValue * repeat); - intVector.setValueCount(maxValue * repeat); - - negVector.allocateNew(maxValue); - negVector.setValueCount(maxValue); - - // prepare data in sorted order - // each value is repeated some times - for (int i = 0; i < maxValue; i++) { - for (int j = 0; j < repeat; j++) { - if (i == 0) { - intVector.setNull(i * repeat + j); - } else { - intVector.set(i * repeat + j, i); - } - } - negVector.set(i, maxValue + i); - } - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(intVector); - for (int i = 0; i < maxValue; i++) { - int result = VectorRangeSearcher.getLastMatch(intVector, comparator, negVector, i); - assertEquals(-1, result); - } - } - } - - public static Stream getRepeat() { - return Stream.of(Arguments.of(1), Arguments.of(2), Arguments.of(5), Arguments.of(10)); - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java deleted file mode 100644 index f08749a8d1ef4..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.search; - -import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.algorithm.sort.DefaultVectorComparators; -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. */ -public class TestVectorSearcher { - - private final int VECTOR_LENGTH = 100; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testBinarySearchInt() { - try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { - rawVector.allocateNew(VECTOR_LENGTH); - rawVector.setValueCount(VECTOR_LENGTH); - negVector.allocateNew(1); - negVector.setValueCount(1); - - // prepare data in sorted order - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i == 0) { - rawVector.setNull(i); - } else { - rawVector.set(i, i); - } - } - negVector.set(0, -333); - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); - assertEquals(i, result); - } - - // negative case - assertEquals(-1, VectorSearcher.binarySearch(rawVector, comparator, negVector, 0)); - } - } - - @Test - public void testLinearSearchInt() { - try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { - rawVector.allocateNew(VECTOR_LENGTH); - rawVector.setValueCount(VECTOR_LENGTH); - negVector.allocateNew(1); - negVector.setValueCount(1); - - // prepare data in sorted order - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i == 0) { - rawVector.setNull(i); - } else { - rawVector.set(i, i); - } - } - negVector.set(0, -333); - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); - assertEquals(i, result); - } - - // negative case - assertEquals(-1, VectorSearcher.linearSearch(rawVector, comparator, negVector, 0)); - } - } - - @Test - public void testBinarySearchVarChar() { - try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { - rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); - rawVector.setValueCount(VECTOR_LENGTH); - negVector.allocateNew(VECTOR_LENGTH, 1); - negVector.setValueCount(1); - - byte[] content = new byte[2]; - - // prepare data in sorted order - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i == 0) { - rawVector.setNull(i); - } else { - int q = i / 10; - int r = i % 10; - - content[0] = (byte) ('a' + q); - content[1] = (byte) r; - rawVector.set(i, content); - } - } - negVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8)); - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); - assertEquals(i, result); - } - - // negative case - assertEquals(-1, VectorSearcher.binarySearch(rawVector, comparator, negVector, 0)); - } - } - - @Test - public void testLinearSearchVarChar() { - try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { - rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); - rawVector.setValueCount(VECTOR_LENGTH); - negVector.allocateNew(VECTOR_LENGTH, 1); - negVector.setValueCount(1); - - byte[] content = new byte[2]; - - // prepare data in sorted order - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i == 0) { - rawVector.setNull(i); - } else { - int q = i / 10; - int r = i % 10; - - content[0] = (byte) ('a' + q); - content[1] = (byte) r; - rawVector.set(i, content); - } - } - negVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8)); - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); - for (int i = 0; i < VECTOR_LENGTH; i++) { - int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); - assertEquals(i, result); - } - - // negative case - assertEquals(-1, VectorSearcher.linearSearch(rawVector, comparator, negVector, 0)); - } - } - - private ListVector createListVector() { - final int innerCount = 100; - final int outerCount = 10; - final int listLength = innerCount / outerCount; - - ListVector listVector = ListVector.empty("list vector", allocator); - - Types.MinorType type = Types.MinorType.INT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - listVector.allocateNew(); - - IntVector dataVector = (IntVector) listVector.getDataVector(); - - for (int i = 0; i < innerCount; i++) { - dataVector.set(i, i); - } - dataVector.setValueCount(innerCount); - - for (int i = 0; i < outerCount; i++) { - BitVectorHelper.setBit(listVector.getValidityBuffer(), i); - listVector.getOffsetBuffer().setInt(i * OFFSET_WIDTH, i * listLength); - listVector.getOffsetBuffer().setInt((i + 1) * OFFSET_WIDTH, (i + 1) * listLength); - } - listVector.setLastSet(outerCount - 1); - listVector.setValueCount(outerCount); - - return listVector; - } - - private ListVector createNegativeListVector() { - final int innerCount = 100; - final int outerCount = 10; - final int listLength = innerCount / outerCount; - - ListVector listVector = ListVector.empty("list vector", allocator); - - Types.MinorType type = Types.MinorType.INT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - listVector.allocateNew(); - - IntVector dataVector = (IntVector) listVector.getDataVector(); - - for (int i = 0; i < innerCount; i++) { - dataVector.set(i, i + 1000); - } - dataVector.setValueCount(innerCount); - - for (int i = 0; i < outerCount; i++) { - BitVectorHelper.setBit(listVector.getValidityBuffer(), i); - listVector.getOffsetBuffer().setInt(i * OFFSET_WIDTH, i * listLength); - listVector.getOffsetBuffer().setInt((i + 1) * OFFSET_WIDTH, (i + 1) * listLength); - } - listVector.setValueCount(outerCount); - - return listVector; - } - - @Test - public void testBinarySearchList() { - try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); - for (int i = 0; i < rawVector.getValueCount(); i++) { - int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); - assertEquals(i, result); - } - - // negative case - for (int i = 0; i < rawVector.getValueCount(); i++) { - int result = VectorSearcher.binarySearch(rawVector, comparator, negVector, i); - assertEquals(-1, result); - } - } - } - - @Test - public void testLinearSearchList() { - try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { - - // do search - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); - for (int i = 0; i < rawVector.getValueCount(); i++) { - int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); - assertEquals(i, result); - } - - // negative case - for (int i = 0; i < rawVector.getValueCount(); i++) { - int result = VectorSearcher.linearSearch(rawVector, comparator, negVector, i); - assertEquals(-1, result); - } - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java deleted file mode 100644 index 35f55f338992e..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link CompositeVectorComparator}. */ -public class TestCompositeVectorComparator { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testCompareVectorSchemaRoot() { - final int vectorLength = 10; - IntVector intVec1 = new IntVector("int1", allocator); - VarCharVector strVec1 = new VarCharVector("str1", allocator); - - IntVector intVec2 = new IntVector("int2", allocator); - VarCharVector strVec2 = new VarCharVector("str2", allocator); - - try (VectorSchemaRoot batch1 = new VectorSchemaRoot(Arrays.asList(intVec1, strVec1)); - VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { - - intVec1.allocateNew(vectorLength); - strVec1.allocateNew(vectorLength * 10, vectorLength); - intVec2.allocateNew(vectorLength); - strVec2.allocateNew(vectorLength * 10, vectorLength); - - for (int i = 0; i < vectorLength; i++) { - intVec1.set(i, i); - strVec1.set(i, ("a" + i).getBytes(StandardCharsets.UTF_8)); - intVec2.set(i, i); - strVec2.set(i, "a5".getBytes(StandardCharsets.UTF_8)); - } - - VectorValueComparator innerComparator1 = - DefaultVectorComparators.createDefaultComparator(intVec1); - innerComparator1.attachVectors(intVec1, intVec2); - VectorValueComparator innerComparator2 = - DefaultVectorComparators.createDefaultComparator(strVec1); - innerComparator2.attachVectors(strVec1, strVec2); - - VectorValueComparator comparator = - new CompositeVectorComparator( - new VectorValueComparator[] {innerComparator1, innerComparator2}); - - // verify results - - // both elements are equal, the result is equal - assertEquals(0, comparator.compare(5, 5)); - - // the first element being equal, the second is smaller, and the result is smaller - assertTrue(comparator.compare(1, 1) < 0); - assertTrue(comparator.compare(2, 2) < 0); - assertTrue(comparator.compare(3, 3) < 0); - - // the first element being equal, the second is larger, and the result is larger - assertTrue(comparator.compare(7, 7) > 0); - assertTrue(comparator.compare(8, 8) > 0); - assertTrue(comparator.compare(9, 9) > 0); - - // the first element is smaller, the result is always smaller - assertTrue(comparator.compare(1, 2) < 0); - assertTrue(comparator.compare(3, 7) < 0); - assertTrue(comparator.compare(4, 9) < 0); - - // the first element is larger, the result is always larger - assertTrue(comparator.compare(2, 0) > 0); - assertTrue(comparator.compare(8, 7) > 0); - assertTrue(comparator.compare(4, 1) > 0); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java deleted file mode 100644 index 2a046533e89a2..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ /dev/null @@ -1,1112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link DefaultVectorComparators}. */ -public class TestDefaultVectorComparator { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - private ListVector createListVector(int count) { - ListVector listVector = ListVector.empty("list vector", allocator); - Types.MinorType type = Types.MinorType.INT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - listVector.allocateNew(); - - IntVector dataVector = (IntVector) listVector.getDataVector(); - - for (int i = 0; i < count; i++) { - dataVector.set(i, i); - } - dataVector.setValueCount(count); - - listVector.setNotNull(0); - - listVector.getOffsetBuffer().setInt(0, 0); - listVector.getOffsetBuffer().setInt(OFFSET_WIDTH, count); - - listVector.setLastSet(0); - listVector.setValueCount(1); - - return listVector; - } - - @Test - public void testCompareLists() { - try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); - comparator.attachVectors(listVector1, listVector2); - - // prefix is smaller - assertTrue(comparator.compare(0, 0) < 0); - } - - try (ListVector listVector1 = createListVector(11); - ListVector listVector2 = createListVector(11)) { - ((IntVector) listVector2.getDataVector()).set(10, 110); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); - comparator.attachVectors(listVector1, listVector2); - - // breaking tie by the last element - assertTrue(comparator.compare(0, 0) < 0); - } - - try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(10)) { - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); - comparator.attachVectors(listVector1, listVector2); - - // list vector elements equal - assertTrue(comparator.compare(0, 0) == 0); - } - } - - @Test - public void testCopiedComparatorForLists() { - for (int i = 1; i < 10; i++) { - for (int j = 1; j < 10; j++) { - try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); - comparator.attachVectors(listVector1, listVector2); - - VectorValueComparator copyComparator = comparator.createNew(); - copyComparator.attachVectors(listVector1, listVector2); - - assertEquals(comparator.compare(0, 0), copyComparator.compare(0, 0)); - } - } - } - } - - private FixedSizeListVector createFixedSizeListVector(int count) { - FixedSizeListVector listVector = FixedSizeListVector.empty("list vector", count, allocator); - Types.MinorType type = Types.MinorType.INT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - listVector.allocateNew(); - - IntVector dataVector = (IntVector) listVector.getDataVector(); - - for (int i = 0; i < count; i++) { - dataVector.set(i, i); - } - dataVector.setValueCount(count); - - listVector.setNotNull(0); - listVector.setValueCount(1); - - return listVector; - } - - @Test - public void testCompareFixedSizeLists() { - try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); - comparator.attachVectors(listVector1, listVector2); - - // prefix is smaller - assertTrue(comparator.compare(0, 0) < 0); - } - - try (FixedSizeListVector listVector1 = createFixedSizeListVector(11); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { - ((IntVector) listVector2.getDataVector()).set(10, 110); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); - comparator.attachVectors(listVector1, listVector2); - - // breaking tie by the last element - assertTrue(comparator.compare(0, 0) < 0); - } - - try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); - comparator.attachVectors(listVector1, listVector2); - - // list vector elements equal - assertTrue(comparator.compare(0, 0) == 0); - } - } - - @Test - public void testCompareUInt1() { - try (UInt1Vector vec = new UInt1Vector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - vec.setNull(0); - vec.set(1, -2); - vec.set(2, -1); - vec.set(3, 0); - vec.set(4, 1); - vec.set(5, 2); - vec.set(6, -2); - vec.setNull(7); - vec.set(8, Byte.MAX_VALUE); - vec.set(9, Byte.MIN_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(1, 2) < 0); - assertTrue(comparator.compare(1, 3) > 0); - assertTrue(comparator.compare(2, 5) > 0); - assertTrue(comparator.compare(4, 5) < 0); - assertTrue(comparator.compare(1, 6) == 0); - assertTrue(comparator.compare(0, 7) == 0); - assertTrue(comparator.compare(8, 9) < 0); - assertTrue(comparator.compare(4, 8) < 0); - assertTrue(comparator.compare(5, 9) < 0); - assertTrue(comparator.compare(2, 9) > 0); - } - } - - @Test - public void testCompareUInt2() { - try (UInt2Vector vec = new UInt2Vector("", allocator)) { - vec.allocateNew(10); - - ValueVectorDataPopulator.setVector( - vec, - null, - (char) (Character.MAX_VALUE - 1), - Character.MAX_VALUE, - (char) 0, - (char) 1, - (char) 2, - (char) (Character.MAX_VALUE - 1), - null, - '\u7FFF', // value for the max 16-byte signed integer - '\u8000' // value for the min 16-byte signed integer - ); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(1, 2) < 0); - assertTrue(comparator.compare(1, 3) > 0); - assertTrue(comparator.compare(2, 5) > 0); - assertTrue(comparator.compare(4, 5) < 0); - assertEquals(0, comparator.compare(1, 6)); - assertEquals(0, comparator.compare(0, 7)); - assertTrue(comparator.compare(8, 9) < 0); - assertTrue(comparator.compare(4, 8) < 0); - assertTrue(comparator.compare(5, 9) < 0); - assertTrue(comparator.compare(2, 9) > 0); - } - } - - @Test - public void testCompareUInt4() { - try (UInt4Vector vec = new UInt4Vector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - vec.setNull(0); - vec.set(1, -2); - vec.set(2, -1); - vec.set(3, 0); - vec.set(4, 1); - vec.set(5, 2); - vec.set(6, -2); - vec.setNull(7); - vec.set(8, Integer.MAX_VALUE); - vec.set(9, Integer.MIN_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(1, 2) < 0); - assertTrue(comparator.compare(1, 3) > 0); - assertTrue(comparator.compare(2, 5) > 0); - assertTrue(comparator.compare(4, 5) < 0); - assertTrue(comparator.compare(1, 6) == 0); - assertTrue(comparator.compare(0, 7) == 0); - assertTrue(comparator.compare(8, 9) < 0); - assertTrue(comparator.compare(4, 8) < 0); - assertTrue(comparator.compare(5, 9) < 0); - assertTrue(comparator.compare(2, 9) > 0); - } - } - - @Test - public void testCompareUInt8() { - try (UInt8Vector vec = new UInt8Vector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - vec.setNull(0); - vec.set(1, -2); - vec.set(2, -1); - vec.set(3, 0); - vec.set(4, 1); - vec.set(5, 2); - vec.set(6, -2); - vec.setNull(7); - vec.set(8, Long.MAX_VALUE); - vec.set(9, Long.MIN_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(1, 2) < 0); - assertTrue(comparator.compare(1, 3) > 0); - assertTrue(comparator.compare(2, 5) > 0); - assertTrue(comparator.compare(4, 5) < 0); - assertTrue(comparator.compare(1, 6) == 0); - assertTrue(comparator.compare(0, 7) == 0); - assertTrue(comparator.compare(8, 9) < 0); - assertTrue(comparator.compare(4, 8) < 0); - assertTrue(comparator.compare(5, 9) < 0); - assertTrue(comparator.compare(2, 9) > 0); - } - } - - @Test - public void testCompareFloat4() { - try (Float4Vector vec = new Float4Vector("", allocator)) { - vec.allocateNew(9); - ValueVectorDataPopulator.setVector( - vec, - -1.1f, - 0.0f, - 1.0f, - null, - 1.0f, - 2.0f, - Float.NaN, - Float.NaN, - Float.POSITIVE_INFINITY, - Float.NEGATIVE_INFINITY); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - assertTrue(comparator.compare(8, 3) > 0); - - // NaN behavior. - assertTrue(comparator.compare(6, 7) == 0); - assertTrue(comparator.compare(7, 6) == 0); - assertTrue(comparator.compare(7, 7) == 0); - assertTrue(comparator.compare(6, 0) > 0); - assertTrue(comparator.compare(6, 8) > 0); - assertTrue(comparator.compare(6, 3) > 0); - } - } - - @Test - public void testCompareFloat8() { - try (Float8Vector vec = new Float8Vector("", allocator)) { - vec.allocateNew(9); - ValueVectorDataPopulator.setVector( - vec, - -1.1, - 0.0, - 1.0, - null, - 1.0, - 2.0, - Double.NaN, - Double.NaN, - Double.POSITIVE_INFINITY, - Double.NEGATIVE_INFINITY); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - assertTrue(comparator.compare(8, 3) > 0); - - // NaN behavior. - assertTrue(comparator.compare(6, 7) == 0); - assertTrue(comparator.compare(7, 6) == 0); - assertTrue(comparator.compare(7, 7) == 0); - assertTrue(comparator.compare(6, 0) > 0); - assertTrue(comparator.compare(6, 8) > 0); - assertTrue(comparator.compare(6, 3) > 0); - } - } - - @Test - public void testCompareLong() { - try (BigIntVector vec = new BigIntVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareInt() { - try (IntVector vec = new IntVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1, 0, 1, null, 1, 5, Integer.MIN_VALUE + 1, Integer.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareShort() { - try (SmallIntVector vec = new SmallIntVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, - (short) -1, - (short) 0, - (short) 1, - null, - (short) 1, - (short) 5, - (short) (Short.MIN_VALUE + 1), - Short.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareByte() { - try (TinyIntVector vec = new TinyIntVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, - (byte) -1, - (byte) 0, - (byte) 1, - null, - (byte) 1, - (byte) 5, - (byte) (Byte.MIN_VALUE + 1), - Byte.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareBit() { - try (BitVector vec = new BitVector("", allocator)) { - vec.allocateNew(6); - ValueVectorDataPopulator.setVector(vec, 1, 2, 0, 0, -1, null); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) == 0); - assertTrue(comparator.compare(0, 2) > 0); - assertTrue(comparator.compare(0, 4) == 0); - assertTrue(comparator.compare(2, 1) < 0); - assertTrue(comparator.compare(2, 4) < 0); - - // null first - assertTrue(comparator.compare(5, 0) < 0); - assertTrue(comparator.compare(5, 2) < 0); - } - } - - @Test - public void testCompareDateDay() { - try (DateDayVector vec = new DateDayVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1, 0, 1, null, 1, 5, Integer.MIN_VALUE + 1, Integer.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareDateMilli() { - try (DateMilliVector vec = new DateMilliVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareDecimal() { - try (DecimalVector vec = new DecimalVector("", allocator, 10, 1)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareDecimal256() { - try (Decimal256Vector vec = new Decimal256Vector("", allocator, 10, 1)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareDuration() { - try (DurationVector vec = - new DurationVector( - "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareIntervalDay() { - try (IntervalDayVector vec = - new IntervalDayVector( - "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { - vec.allocateNew(8); - vec.set(0, -1, 0); - vec.set(1, 0, 0); - vec.set(2, 1, 0); - vec.setNull(3); - vec.set(4, -1, -1); - vec.set(5, 1, 1); - vec.set(6, 1, 1); - vec.set(7, -1, -1); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - assertTrue(comparator.compare(2, 5) < 0); - assertTrue(comparator.compare(0, 4) > 0); - - // test equality - assertTrue(comparator.compare(5, 6) == 0); - assertTrue(comparator.compare(4, 7) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - } - } - - @Test - public void testCompareTimeMicro() { - try (TimeMicroVector vec = new TimeMicroVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareTimeMilli() { - try (TimeMilliVector vec = new TimeMilliVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1, 0, 1, null, 1, 5, Integer.MIN_VALUE + 1, Integer.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareTimeNano() { - try (TimeNanoVector vec = new TimeNanoVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareTimeSec() { - try (TimeSecVector vec = new TimeSecVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1, 0, 1, null, 1, 5, Integer.MIN_VALUE + 1, Integer.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareTimeStamp() { - try (TimeStampMilliVector vec = new TimeStampMilliVector("", allocator)) { - vec.allocateNew(8); - ValueVectorDataPopulator.setVector( - vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertTrue(comparator.compare(0, 1) < 0); - assertTrue(comparator.compare(0, 2) < 0); - assertTrue(comparator.compare(2, 1) > 0); - - // test equality - assertTrue(comparator.compare(5, 5) == 0); - assertTrue(comparator.compare(2, 4) == 0); - - // null first - assertTrue(comparator.compare(3, 4) < 0); - assertTrue(comparator.compare(5, 3) > 0); - - // potential overflow - assertTrue(comparator.compare(6, 7) < 0); - assertTrue(comparator.compare(7, 6) > 0); - assertTrue(comparator.compare(7, 7) == 0); - } - } - - @Test - public void testCompareFixedSizeBinary() { - try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 2); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { - vector1.allocateNew(); - vector2.allocateNew(); - vector1.set(0, new byte[] {1, 1}); - vector2.set(0, new byte[] {1, 1, 0}); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector1); - comparator.attachVectors(vector1, vector2); - - // prefix is smaller - assertTrue(comparator.compare(0, 0) < 0); - } - - try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { - vector1.allocateNew(); - vector2.allocateNew(); - vector1.set(0, new byte[] {1, 1, 0}); - vector2.set(0, new byte[] {1, 1, 1}); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector1); - comparator.attachVectors(vector1, vector2); - - // breaking tie by the last element - assertTrue(comparator.compare(0, 0) < 0); - } - - try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { - vector1.allocateNew(); - vector2.allocateNew(); - vector1.set(0, new byte[] {1, 1, 1}); - vector2.set(0, new byte[] {1, 1, 1}); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector1); - comparator.attachVectors(vector1, vector2); - - // list vector elements equal - assertTrue(comparator.compare(0, 0) == 0); - } - } - - @Test - public void testCompareNull() { - try (NullVector vec = - new NullVector("test", FieldType.notNullable(new ArrowType.Int(32, false)))) { - vec.setValueCount(2); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - assertEquals(DefaultVectorComparators.NullComparator.class, comparator.getClass()); - assertEquals(0, comparator.compare(0, 1)); - } - } - - @Test - public void testCheckNullsOnCompareIsFalseForNonNullableVector() { - try (IntVector vec = - new IntVector( - "not nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { - - ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); - - final VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - - assertFalse(comparator.checkNullsOnCompare()); - } - } - - @Test - public void testCheckNullsOnCompareIsTrueForNullableVector() { - try (IntVector vec = - new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); - IntVector vec2 = - new IntVector( - "not-nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { - - ValueVectorDataPopulator.setVector(vec, 1, null, 3, 4); - ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - - final VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - assertTrue(comparator.checkNullsOnCompare()); - - comparator.attachVectors(vec, vec2); - assertTrue(comparator.checkNullsOnCompare()); - } - } - - @Test - public void testCheckNullsOnCompareIsFalseWithNoNulls() { - try (IntVector vec = - new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); - IntVector vec2 = - new IntVector( - "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { - - // no null values - ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); - ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - - final VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec); - assertFalse(comparator.checkNullsOnCompare()); - - comparator.attachVectors(vec, vec2); - assertFalse(comparator.checkNullsOnCompare()); - } - } - - @Test - public void testCheckNullsOnCompareIsTrueWithEmptyVectors() { - try (IntVector vec = - new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); - IntVector vec2 = - new IntVector( - "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { - - final VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - comparator.attachVector(vec2); - assertTrue(comparator.checkNullsOnCompare()); - - comparator.attachVectors(vec, vec2); - assertTrue(comparator.checkNullsOnCompare()); - } - } - - @Test - public void testVariableWidthDefaultComparators() { - try (VarCharVector vec = new VarCharVector("test", allocator)) { - verifyVariableWidthComparatorReturned(vec); - } - try (VarBinaryVector vec = new VarBinaryVector("test", allocator)) { - verifyVariableWidthComparatorReturned(vec); - } - try (LargeVarCharVector vec = new LargeVarCharVector("test", allocator)) { - verifyVariableWidthComparatorReturned(vec); - } - try (LargeVarBinaryVector vec = new LargeVarBinaryVector("test", allocator)) { - verifyVariableWidthComparatorReturned(vec); - } - } - - private static void verifyVariableWidthComparatorReturned(V vec) { - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); - assertEquals(DefaultVectorComparators.VariableWidthComparator.class, comparator.getClass()); - } - - @Test - public void testRepeatedDefaultComparators() { - final FieldType type = FieldType.nullable(Types.MinorType.INT.getType()); - try (final LargeListVector vector = new LargeListVector("list", allocator, type, null)) { - vector.addOrGetVector(FieldType.nullable(type.getType())); - verifyRepeatedComparatorReturned(vector); - } - } - - private static void verifyRepeatedComparatorReturned(V vec) { - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); - assertEquals(DefaultVectorComparators.RepeatedValueComparator.class, comparator.getClass()); - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java deleted file mode 100644 index d0f59219a8cfc..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.stream.IntStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link FixedWidthInPlaceVectorSorter}. */ -public class TestFixedWidthInPlaceVectorSorter { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testSortInt() { - try (IntVector vec = new IntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, 10); - vec.set(1, 8); - vec.setNull(2); - vec.set(3, 10); - vec.set(4, 12); - vec.set(5, 17); - vec.setNull(6); - vec.set(7, 23); - vec.set(8, 35); - vec.set(9, 2); - - // sort the vector - FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - sorter.sortInPlace(vec, comparator); - - // verify results - assertEquals(10, vec.getValueCount()); - - assertTrue(vec.isNull(0)); - assertTrue(vec.isNull(1)); - assertEquals(2, vec.get(2)); - assertEquals(8, vec.get(3)); - assertEquals(10, vec.get(4)); - assertEquals(10, vec.get(5)); - assertEquals(12, vec.get(6)); - assertEquals(17, vec.get(7)); - assertEquals(23, vec.get(8)); - assertEquals(35, vec.get(9)); - } - } - - /** - * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is - * implemented as a recursive algorithm. - */ - @Test - public void testSortLargeIncreasingInt() { - final int vectorLength = 20000; - try (IntVector vec = new IntVector("", allocator)) { - vec.allocateNew(vectorLength); - vec.setValueCount(vectorLength); - - // fill data to sort - for (int i = 0; i < vectorLength; i++) { - vec.set(i, i); - } - - // sort the vector - FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - sorter.sortInPlace(vec, comparator); - - // verify results - assertEquals(vectorLength, vec.getValueCount()); - - for (int i = 0; i < vectorLength; i++) { - assertEquals(i, vec.get(i)); - } - } - } - - @Test - public void testChoosePivot() { - final int vectorLength = 100; - try (IntVector vec = new IntVector("", allocator)) { - vec.allocateNew(vectorLength); - - // the vector is sorted, so the pivot should be in the middle - for (int i = 0; i < vectorLength; i++) { - vec.set(i, i * 100); - } - vec.setValueCount(vectorLength); - - FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { - // setup internal data structures - pivotBuffer.allocateNew(1); - sorter.pivotBuffer = pivotBuffer; - sorter.comparator = comparator; - sorter.vec = vec; - comparator.attachVectors(vec, pivotBuffer); - - int low = 5; - int high = 6; - int pivotValue = vec.get(low); - assertTrue(high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD); - - // the range is small enough, so the pivot is simply selected as the low value - sorter.choosePivot(low, high); - assertEquals(pivotValue, vec.get(low)); - - low = 30; - high = 80; - pivotValue = vec.get((low + high) / 2); - assertTrue(high - low + 1 >= FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD); - - // the range is large enough, so the median is selected as the pivot - sorter.choosePivot(low, high); - assertEquals(pivotValue, vec.get(low)); - } - } - } - - /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ - @Test - public void testChoosePivotAllPermutes() { - try (IntVector vec = new IntVector("", allocator)) { - vec.allocateNew(3); - - FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { - // setup internal data structures - pivotBuffer.allocateNew(1); - sorter.pivotBuffer = pivotBuffer; - sorter.comparator = comparator; - sorter.vec = vec; - comparator.attachVectors(vec, pivotBuffer); - - int low = 0; - int high = 2; - - ValueVectorDataPopulator.setVector(vec, 11, 22, 33); - sorter.choosePivot(low, high); - assertEquals(22, vec.get(0)); - - ValueVectorDataPopulator.setVector(vec, 11, 33, 22); - sorter.choosePivot(low, high); - assertEquals(22, vec.get(0)); - - ValueVectorDataPopulator.setVector(vec, 22, 11, 33); - sorter.choosePivot(low, high); - assertEquals(22, vec.get(0)); - - ValueVectorDataPopulator.setVector(vec, 22, 33, 11); - sorter.choosePivot(low, high); - assertEquals(22, vec.get(0)); - - ValueVectorDataPopulator.setVector(vec, 33, 11, 22); - sorter.choosePivot(low, high); - assertEquals(22, vec.get(0)); - - ValueVectorDataPopulator.setVector(vec, 33, 22, 11); - sorter.choosePivot(low, high); - assertEquals(22, vec.get(0)); - } - } - } - - @Test - public void testSortInt2() { - try (IntVector vector = new IntVector("vector", allocator)) { - ValueVectorDataPopulator.setVector( - vector, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, - 11, 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); - - FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - - sorter.sortInPlace(vector, comparator); - - int[] actual = new int[vector.getValueCount()]; - IntStream.range(0, vector.getValueCount()).forEach(i -> actual[i] = vector.get(i)); - - assertArrayEquals( - new int[] { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 - }, - actual); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java deleted file mode 100644 index e1e5167831235..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.stream.IntStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. */ -public class TestFixedWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { - - private BufferAllocator allocator; - - OutOfPlaceVectorSorter getSorter(boolean generalSorter) { - return generalSorter - ? new GeneralOutOfPlaceVectorSorter<>() - : new FixedWidthOutOfPlaceVectorSorter<>(); - } - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @ParameterizedTest - @MethodSource("getParameter") - public void testSortByte(boolean generalSorter) { - try (TinyIntVector vec = new TinyIntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, 10); - vec.set(1, 8); - vec.setNull(2); - vec.set(3, 10); - vec.set(4, 12); - vec.set(5, 17); - vec.setNull(6); - vec.set(7, 23); - vec.set(8, 35); - vec.set(9, 2); - - // sort the vector - OutOfPlaceVectorSorter sorter = getSorter(generalSorter); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - TinyIntVector sortedVec = - (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); - sortedVec.allocateNew(vec.getValueCount()); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, comparator); - - // verify results - assertEquals(vec.getValueCount(), sortedVec.getValueCount()); - - assertTrue(sortedVec.isNull(0)); - assertTrue(sortedVec.isNull(1)); - assertEquals((byte) 2, sortedVec.get(2)); - assertEquals((byte) 8, sortedVec.get(3)); - assertEquals((byte) 10, sortedVec.get(4)); - assertEquals((byte) 10, sortedVec.get(5)); - assertEquals((byte) 12, sortedVec.get(6)); - assertEquals((byte) 17, sortedVec.get(7)); - assertEquals((byte) 23, sortedVec.get(8)); - assertEquals((byte) 35, sortedVec.get(9)); - - sortedVec.close(); - } - } - - @ParameterizedTest - @MethodSource("getParameter") - public void testSortShort(boolean generalSorter) { - try (SmallIntVector vec = new SmallIntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, 10); - vec.set(1, 8); - vec.setNull(2); - vec.set(3, 10); - vec.set(4, 12); - vec.set(5, 17); - vec.setNull(6); - vec.set(7, 23); - vec.set(8, 35); - vec.set(9, 2); - - // sort the vector - OutOfPlaceVectorSorter sorter = getSorter(generalSorter); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - SmallIntVector sortedVec = - (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); - sortedVec.allocateNew(vec.getValueCount()); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, comparator); - - // verify results - assertEquals(vec.getValueCount(), sortedVec.getValueCount()); - - assertTrue(sortedVec.isNull(0)); - assertTrue(sortedVec.isNull(1)); - assertEquals((short) 2, sortedVec.get(2)); - assertEquals((short) 8, sortedVec.get(3)); - assertEquals((short) 10, sortedVec.get(4)); - assertEquals((short) 10, sortedVec.get(5)); - assertEquals((short) 12, sortedVec.get(6)); - assertEquals((short) 17, sortedVec.get(7)); - assertEquals((short) 23, sortedVec.get(8)); - assertEquals((short) 35, sortedVec.get(9)); - - sortedVec.close(); - } - } - - @ParameterizedTest - @MethodSource("getParameter") - public void testSortInt(boolean generalSorter) { - try (IntVector vec = new IntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, 10); - vec.set(1, 8); - vec.setNull(2); - vec.set(3, 10); - vec.set(4, 12); - vec.set(5, 17); - vec.setNull(6); - vec.set(7, 23); - vec.set(8, 35); - vec.set(9, 2); - - // sort the vector - OutOfPlaceVectorSorter sorter = getSorter(generalSorter); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - IntVector sortedVec = - (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); - sortedVec.allocateNew(vec.getValueCount()); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, comparator); - - // verify results - assertEquals(vec.getValueCount(), sortedVec.getValueCount()); - - assertTrue(sortedVec.isNull(0)); - assertTrue(sortedVec.isNull(1)); - assertEquals(2, sortedVec.get(2)); - assertEquals(8, sortedVec.get(3)); - assertEquals(10, sortedVec.get(4)); - assertEquals(10, sortedVec.get(5)); - assertEquals(12, sortedVec.get(6)); - assertEquals(17, sortedVec.get(7)); - assertEquals(23, sortedVec.get(8)); - assertEquals(35, sortedVec.get(9)); - - sortedVec.close(); - } - } - - @ParameterizedTest - @MethodSource("getParameter") - public void testSortLong(boolean generalSorter) { - try (BigIntVector vec = new BigIntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, 10L); - vec.set(1, 8L); - vec.setNull(2); - vec.set(3, 10L); - vec.set(4, 12L); - vec.set(5, 17L); - vec.setNull(6); - vec.set(7, 23L); - vec.set(8, 1L << 35L); - vec.set(9, 2L); - - // sort the vector - OutOfPlaceVectorSorter sorter = getSorter(generalSorter); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - BigIntVector sortedVec = - (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); - sortedVec.allocateNew(vec.getValueCount()); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, comparator); - - // verify results - assertEquals(vec.getValueCount(), sortedVec.getValueCount()); - - assertTrue(sortedVec.isNull(0)); - assertTrue(sortedVec.isNull(1)); - assertEquals(2L, sortedVec.get(2)); - assertEquals(8L, sortedVec.get(3)); - assertEquals(10L, sortedVec.get(4)); - assertEquals(10L, sortedVec.get(5)); - assertEquals(12L, sortedVec.get(6)); - assertEquals(17L, sortedVec.get(7)); - assertEquals(23L, sortedVec.get(8)); - assertEquals(1L << 35L, sortedVec.get(9)); - - sortedVec.close(); - } - } - - @ParameterizedTest - @MethodSource("getParameter") - public void testSortFloat(boolean generalSorter) { - try (Float4Vector vec = new Float4Vector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, 10f); - vec.set(1, 8f); - vec.setNull(2); - vec.set(3, 10f); - vec.set(4, 12f); - vec.set(5, 17f); - vec.setNull(6); - vec.set(7, 23f); - vec.set(8, Float.NaN); - vec.set(9, 2f); - - // sort the vector - OutOfPlaceVectorSorter sorter = getSorter(generalSorter); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - Float4Vector sortedVec = - (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); - sortedVec.allocateNew(vec.getValueCount()); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, comparator); - - // verify results - assertEquals(vec.getValueCount(), sortedVec.getValueCount()); - - assertTrue(sortedVec.isNull(0)); - assertTrue(sortedVec.isNull(1)); - assertEquals(2f, sortedVec.get(2), 0f); - assertEquals(8f, sortedVec.get(3), 0f); - assertEquals(10f, sortedVec.get(4), 0f); - assertEquals(10f, sortedVec.get(5), 0f); - assertEquals(12f, sortedVec.get(6), 0f); - assertEquals(17f, sortedVec.get(7), 0f); - assertEquals(23f, sortedVec.get(8), 0f); - assertEquals(Float.NaN, sortedVec.get(9), 0f); - - sortedVec.close(); - } - } - - @ParameterizedTest - @MethodSource("getParameter") - public void testSortDouble(boolean generalSorter) { - try (Float8Vector vec = new Float8Vector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, 10); - vec.set(1, 8); - vec.setNull(2); - vec.set(3, 10); - vec.set(4, 12); - vec.set(5, 17); - vec.setNull(6); - vec.set(7, Double.NaN); - vec.set(8, 35); - vec.set(9, 2); - - // sort the vector - OutOfPlaceVectorSorter sorter = getSorter(generalSorter); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - Float8Vector sortedVec = - (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); - sortedVec.allocateNew(vec.getValueCount()); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, comparator); - - // verify results - assertEquals(vec.getValueCount(), sortedVec.getValueCount()); - - assertTrue(sortedVec.isNull(0)); - assertTrue(sortedVec.isNull(1)); - assertEquals(2, sortedVec.get(2), 0); - assertEquals(8, sortedVec.get(3), 0); - assertEquals(10, sortedVec.get(4), 0); - assertEquals(10, sortedVec.get(5), 0); - assertEquals(12, sortedVec.get(6), 0); - assertEquals(17, sortedVec.get(7), 0); - assertEquals(35, sortedVec.get(8), 0); - assertEquals(Double.NaN, sortedVec.get(9), 0); - - sortedVec.close(); - } - } - - @ParameterizedTest - @MethodSource("getParameter") - public void testSortInt2(boolean generalSorter) { - try (IntVector vec = new IntVector("", allocator)) { - ValueVectorDataPopulator.setVector( - vec, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, 11, - 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); - - // sort the vector - OutOfPlaceVectorSorter sorter = getSorter(generalSorter); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - try (IntVector sortedVec = - (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { - sortedVec.allocateNew(vec.getValueCount()); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, comparator); - - // verify results - int[] actual = new int[sortedVec.getValueCount()]; - IntStream.range(0, sortedVec.getValueCount()).forEach(i -> actual[i] = sortedVec.get(i)); - - assertArrayEquals( - new int[] { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 - }, - actual); - } - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java deleted file mode 100644 index d9056d08f4988..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import java.util.ArrayList; -import java.util.List; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test sorting fixed width vectors with random data. */ -public class TestFixedWidthSorting> { - - static final int[] VECTOR_LENGTHS = new int[] {2, 5, 10, 50, 100, 1000, 3000}; - - static final double[] NULL_FRACTIONS = {0, 0.1, 0.3, 0.5, 0.7, 0.9, 1}; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @ParameterizedTest - @MethodSource("getParameters") - public void testSort( - boolean inPlace, - int length, - double nullFraction, - String desc, - Function vectorGenerator, - TestSortingUtil.DataGenerator dataGenerator) { - if (inPlace) { - sortInPlace(length, nullFraction, vectorGenerator, dataGenerator); - } else { - sortOutOfPlace(length, nullFraction, vectorGenerator, dataGenerator); - } - } - - void sortInPlace( - int length, - double nullFraction, - Function vectorGenerator, - TestSortingUtil.DataGenerator dataGenerator) { - try (V vector = vectorGenerator.apply(allocator)) { - U[] array = dataGenerator.populate(vector, length, nullFraction); - TestSortingUtil.sortArray(array); - - FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - - sorter.sortInPlace(vector, comparator); - - TestSortingUtil.verifyResults(vector, array); - } - } - - void sortOutOfPlace( - int length, - double nullFraction, - Function vectorGenerator, - TestSortingUtil.DataGenerator dataGenerator) { - try (V vector = vectorGenerator.apply(allocator)) { - U[] array = dataGenerator.populate(vector, length, nullFraction); - TestSortingUtil.sortArray(array); - - // sort the vector - FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - - try (V sortedVec = - (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { - sortedVec.allocateNew(vector.getValueCount()); - sortedVec.setValueCount(vector.getValueCount()); - - sorter.sortOutOfPlace(vector, sortedVec, comparator); - - // verify results - TestSortingUtil.verifyResults(sortedVec, array); - } - } - } - - public static Stream getParameters() { - List params = new ArrayList<>(); - for (int length : VECTOR_LENGTHS) { - for (double nullFrac : NULL_FRACTIONS) { - for (boolean inPlace : new boolean[] {true, false}) { - params.add( - Arguments.of( - inPlace, - length, - nullFrac, - "TinyIntVector", - (Function) - allocator -> new TinyIntVector("vector", allocator), - TestSortingUtil.TINY_INT_GENERATOR)); - - params.add( - Arguments.of( - inPlace, - length, - nullFrac, - "SmallIntVector", - (Function) - allocator -> new SmallIntVector("vector", allocator), - TestSortingUtil.SMALL_INT_GENERATOR)); - - params.add( - Arguments.of( - inPlace, - length, - nullFrac, - "IntVector", - (Function) - allocator -> new IntVector("vector", allocator), - TestSortingUtil.INT_GENERATOR)); - - params.add( - Arguments.of( - inPlace, - length, - nullFrac, - "BigIntVector", - (Function) - allocator -> new BigIntVector("vector", allocator), - TestSortingUtil.LONG_GENERATOR)); - - params.add( - Arguments.of( - inPlace, - length, - nullFrac, - "Float4Vector", - (Function) - allocator -> new Float4Vector("vector", allocator), - TestSortingUtil.FLOAT_GENERATOR)); - - params.add( - Arguments.of( - inPlace, - length, - nullFrac, - "Float8Vector", - (Function) - allocator -> new Float8Vector("vector", allocator), - TestSortingUtil.DOUBLE_GENERATOR)); - } - } - } - return params.stream(); - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java deleted file mode 100644 index 29f3331ff0c79..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link GeneralOutOfPlaceVectorSorter}. */ -public class TestGeneralOutOfPlaceVectorSorter { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - VectorValueComparator getComparator(StructVector structVector) { - IntVector child0 = structVector.getChild("column0", IntVector.class); - VectorValueComparator childComp0 = - DefaultVectorComparators.createDefaultComparator(child0); - childComp0.attachVector(child0); - - IntVector child1 = structVector.getChild("column1", IntVector.class); - VectorValueComparator childComp1 = - DefaultVectorComparators.createDefaultComparator(child1); - childComp1.attachVector(child1); - - VectorValueComparator comp = - new VectorValueComparator() { - - @Override - public int compareNotNull(int index1, int index2) { - // compare values by lexicographic order - int result0 = childComp0.compare(index1, index2); - if (result0 != 0) { - return result0; - } - return childComp1.compare(index1, index2); - } - - @Override - public VectorValueComparator createNew() { - return this; - } - }; - - return comp; - } - - @Test - public void testSortStructVector() { - final int vectorLength = 7; - try (StructVector srcVector = StructVector.empty("src struct", allocator); - StructVector dstVector = StructVector.empty("dst struct", allocator)) { - - IntVector srcChild0 = - srcVector.addOrGet( - "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - IntVector srcChild1 = - srcVector.addOrGet( - "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - - IntVector dstChild0 = - dstVector.addOrGet( - "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - IntVector dstChild1 = - dstVector.addOrGet( - "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - - // src struct vector values: - // [ - // (2, 1) - // (3, 4) - // (5, 4) - // (null, 3) - // (7, null) - // (null, null) - // (6, 6) - // ] - - ValueVectorDataPopulator.setVector(srcChild0, 2, 3, 5, null, 7, null, 6); - ValueVectorDataPopulator.setVector(srcChild1, 1, 4, 4, 3, null, null, 6); - srcVector.setIndexDefined(0); - srcVector.setIndexDefined(1); - srcVector.setIndexDefined(2); - srcVector.setIndexDefined(3); - srcVector.setIndexDefined(4); - srcVector.setIndexDefined(6); - srcVector.setValueCount(vectorLength); - - dstChild0.allocateNew(vectorLength); - dstChild1.allocateNew(vectorLength); - dstVector.setValueCount(vectorLength); - - // construct the comparator - VectorValueComparator comp = getComparator(srcVector); - - // sort the vector - GeneralOutOfPlaceVectorSorter sorter = new GeneralOutOfPlaceVectorSorter<>(); - sorter.sortOutOfPlace(srcVector, dstVector, comp); - - // validate results - assertEquals(vectorLength, dstVector.getValueCount()); - assertEquals( - "[" - + "null, " - + "{\"column1\":3}, " - + "{\"column0\":2,\"column1\":1}, " - + "{\"column0\":3,\"column1\":4}, " - + "{\"column0\":5,\"column1\":4}, " - + "{\"column0\":6,\"column1\":6}, " - + "{\"column0\":7}" - + "]", - dstVector.toString()); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java deleted file mode 100644 index ec6d2b71a3718..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link IndexSorter}. */ -public class TestIndexSorter { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testIndexSort() { - try (IntVector vec = new IntVector("", allocator)) { - vec.allocateNew(10); - vec.setValueCount(10); - - // fill data to sort - ValueVectorDataPopulator.setVector(vec, 11, 8, 33, 10, 12, 17, null, 23, 35, 2); - - // sort the index - IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = - new DefaultVectorComparators.IntComparator(); - intComparator.attachVector(vec); - - IntVector indices = new IntVector("", allocator); - indices.setValueCount(10); - indexSorter.sort(vec, indices, intComparator); - - int[] expected = new int[] {6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; - - for (int i = 0; i < expected.length; i++) { - assertTrue(!indices.isNull(i)); - assertEquals(expected[i], indices.get(i)); - } - indices.close(); - } - } - - /** - * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is - * implemented as a recursive algorithm. - */ - @Test - public void testSortLargeIncreasingInt() { - final int vectorLength = 20000; - try (IntVector vec = new IntVector("", allocator)) { - vec.allocateNew(vectorLength); - vec.setValueCount(vectorLength); - - // fill data to sort - for (int i = 0; i < vectorLength; i++) { - vec.set(i, i); - } - - // sort the vector - IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = - new DefaultVectorComparators.IntComparator(); - intComparator.attachVector(vec); - - try (IntVector indices = new IntVector("", allocator)) { - indices.setValueCount(vectorLength); - indexSorter.sort(vec, indices, intComparator); - - for (int i = 0; i < vectorLength; i++) { - assertTrue(!indices.isNull(i)); - assertEquals(i, indices.get(i)); - } - } - } - } - - @Test - public void testChoosePivot() { - final int vectorLength = 100; - try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { - vec.allocateNew(vectorLength); - indices.allocateNew(vectorLength); - - // the vector is sorted, so the pivot should be in the middle - for (int i = 0; i < vectorLength; i++) { - vec.set(i, i * 100); - indices.set(i, i); - } - vec.setValueCount(vectorLength); - indices.setValueCount(vectorLength); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - // setup internal data structures - comparator.attachVector(vec); - - int low = 5; - int high = 6; - assertTrue(high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD); - - // the range is small enough, so the pivot is simply selected as the low value - int pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator); - assertEquals(pivotIndex, low); - assertEquals(pivotIndex, indices.get(low)); - - low = 30; - high = 80; - assertTrue(high - low + 1 >= FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD); - - // the range is large enough, so the median is selected as the pivot - pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator); - assertEquals(pivotIndex, (low + high) / 2); - assertEquals(pivotIndex, indices.get(low)); - } - } - - /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ - @Test - public void testChoosePivotAllPermutes() { - try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { - vec.allocateNew(); - indices.allocateNew(); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - // setup internal data structures - comparator.attachVector(vec); - int low = 0; - int high = 2; - - // test all the 6 permutations of 3 numbers - ValueVectorDataPopulator.setVector(indices, 0, 1, 2); - ValueVectorDataPopulator.setVector(vec, 11, 22, 33); - int pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator); - assertEquals(1, pivotIndex); - assertEquals(1, indices.get(low)); - - ValueVectorDataPopulator.setVector(indices, 0, 1, 2); - ValueVectorDataPopulator.setVector(vec, 11, 33, 22); - pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator); - assertEquals(2, pivotIndex); - assertEquals(2, indices.get(low)); - - ValueVectorDataPopulator.setVector(indices, 0, 1, 2); - ValueVectorDataPopulator.setVector(vec, 22, 11, 33); - pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator); - assertEquals(0, pivotIndex); - assertEquals(0, indices.get(low)); - - ValueVectorDataPopulator.setVector(indices, 0, 1, 2); - ValueVectorDataPopulator.setVector(vec, 22, 33, 11); - pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator); - assertEquals(0, pivotIndex); - assertEquals(0, indices.get(low)); - - ValueVectorDataPopulator.setVector(indices, 0, 1, 2); - ValueVectorDataPopulator.setVector(vec, 33, 11, 22); - pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator); - assertEquals(2, pivotIndex); - assertEquals(2, indices.get(low)); - - ValueVectorDataPopulator.setVector(indices, 0, 1, 2); - ValueVectorDataPopulator.setVector(vec, 33, 22, 11); - pivotIndex = IndexSorter.choosePivot(low, high, indices, comparator); - assertEquals(1, pivotIndex); - assertEquals(1, indices.get(low)); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java deleted file mode 100644 index d0c1f6d6e5c62..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link InsertionSorter}. */ -public class TestInsertionSorter { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - private static final int VECTOR_LENGTH = 10; - - private void testSortIntVectorRange(int start, int end, int[] expected) { - try (IntVector vector = new IntVector("vector", allocator); - IntVector buffer = new IntVector("buffer", allocator)) { - - buffer.allocateNew(1); - - ValueVectorDataPopulator.setVector(vector, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - assertEquals(VECTOR_LENGTH, vector.getValueCount()); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - InsertionSorter.insertionSort(vector, start, end, comparator, buffer); - - assertEquals(VECTOR_LENGTH, expected.length); - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertFalse(vector.isNull(i)); - assertEquals(expected[i], vector.get(i)); - } - } - } - - @Test - public void testSortIntVector() { - testSortIntVectorRange(2, 5, new int[] {9, 8, 4, 5, 6, 7, 3, 2, 1, 0}); - testSortIntVectorRange(3, 7, new int[] {9, 8, 7, 2, 3, 4, 5, 6, 1, 0}); - testSortIntVectorRange(3, 4, new int[] {9, 8, 7, 5, 6, 4, 3, 2, 1, 0}); - testSortIntVectorRange(7, 7, new int[] {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - testSortIntVectorRange(0, 5, new int[] {4, 5, 6, 7, 8, 9, 3, 2, 1, 0}); - testSortIntVectorRange(8, 9, new int[] {9, 8, 7, 6, 5, 4, 3, 2, 0, 1}); - testSortIntVectorRange(0, 9, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); - } - - private void testSortIndicesRange(int start, int end, int[] expectedIndices) { - try (IntVector vector = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { - - ValueVectorDataPopulator.setVector(vector, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - ValueVectorDataPopulator.setVector(indices, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - - assertEquals(VECTOR_LENGTH, vector.getValueCount()); - assertEquals(VECTOR_LENGTH, indices.getValueCount()); - - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - comparator.attachVector(vector); - - InsertionSorter.insertionSort(indices, start, end, comparator); - - // verify results - assertEquals(VECTOR_LENGTH, expectedIndices.length); - for (int i = 0; i < VECTOR_LENGTH; i++) { - assertFalse(indices.isNull(i)); - assertEquals(expectedIndices[i], indices.get(i)); - } - } - } - - @Test - public void testSortIndices() { - testSortIndicesRange(2, 5, new int[] {0, 1, 5, 4, 3, 2, 6, 7, 8, 9}); - testSortIndicesRange(3, 7, new int[] {0, 1, 2, 7, 6, 5, 4, 3, 8, 9}); - testSortIndicesRange(3, 4, new int[] {0, 1, 2, 4, 3, 5, 6, 7, 8, 9}); - testSortIndicesRange(7, 7, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); - testSortIndicesRange(0, 5, new int[] {5, 4, 3, 2, 1, 0, 6, 7, 8, 9}); - testSortIndicesRange(8, 9, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 9, 8}); - testSortIndicesRange(0, 9, new int[] {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}); - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java deleted file mode 100644 index f0c4bc86f7cf5..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link OffHeapIntStack}. */ -public class TestOffHeapIntStack { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testPushPop() { - try (OffHeapIntStack stack = new OffHeapIntStack(allocator)) { - assertTrue(stack.isEmpty()); - - final int elemCount = 500; - for (int i = 0; i < elemCount; i++) { - stack.push(i); - assertEquals(i, stack.getTop()); - } - - assertEquals(elemCount, stack.getCount()); - - for (int i = 0; i < elemCount; i++) { - assertEquals(elemCount - i - 1, stack.getTop()); - assertEquals(elemCount - i - 1, stack.pop()); - } - - assertTrue(stack.isEmpty()); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java deleted file mode 100644 index 9358ef93b086e..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Stream; -import org.junit.jupiter.params.provider.Arguments; - -/** Test cases for out-of-place sorters. */ -public abstract class TestOutOfPlaceVectorSorter { - - public static Stream getParameter() { - List args = new ArrayList<>(); - for (boolean generalSorter : new boolean[] {false, true}) { - args.add(Arguments.of(generalSorter)); - } - return args.stream(); - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java deleted file mode 100644 index 24b2c752d0863..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.lang.reflect.Array; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Random; -import java.util.function.BiConsumer; -import java.util.function.Supplier; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.testing.RandomDataGenerator; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; - -/** Utilities for sorting related utilities. */ -public class TestSortingUtil { - - static final Random random = new Random(0); - - static final DataGenerator TINY_INT_GENERATOR = - new DataGenerator<>( - RandomDataGenerator.TINY_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), - Byte.class); - - static final DataGenerator SMALL_INT_GENERATOR = - new DataGenerator<>( - RandomDataGenerator.SMALL_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), - Short.class); - - static final DataGenerator INT_GENERATOR = - new DataGenerator<>( - RandomDataGenerator.INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), - Integer.class); - - static final DataGenerator LONG_GENERATOR = - new DataGenerator<>( - RandomDataGenerator.LONG_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), - Long.class); - - static final DataGenerator FLOAT_GENERATOR = - new DataGenerator<>( - RandomDataGenerator.FLOAT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), - Float.class); - - static final DataGenerator DOUBLE_GENERATOR = - new DataGenerator<>( - RandomDataGenerator.DOUBLE_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), - Double.class); - - static final DataGenerator STRING_GENERATOR = - new DataGenerator<>( - () -> { - int strLength = random.nextInt(20) + 1; - return generateRandomString(strLength); - }, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), - String.class); - - private TestSortingUtil() {} - - /** Verify that a vector is equal to an array. */ - public static void verifyResults(V vector, U[] expected) { - assertEquals(vector.getValueCount(), expected.length); - for (int i = 0; i < expected.length; i++) { - assertEquals(vector.getObject(i), expected[i]); - } - } - - /** Sort an array with null values come first. */ - public static > void sortArray(U[] array) { - Arrays.sort( - array, - (a, b) -> { - if (a == null || b == null) { - if (a == null && b == null) { - return 0; - } - - // exactly one is null - if (a == null) { - return -1; - } else { - return 1; - } - } - return a.compareTo(b); - }); - } - - /** Generate a string with alphabetic characters only. */ - static String generateRandomString(int length) { - byte[] str = new byte[length]; - final int lower = 'a'; - final int upper = 'z'; - - for (int i = 0; i < length; i++) { - // make r non-negative - int r = random.nextInt() & Integer.MAX_VALUE; - str[i] = (byte) (r % (upper - lower + 1) + lower); - } - - return new String(str, StandardCharsets.UTF_8); - } - - /** - * Utility to generate data for testing. - * - * @param vector type. - * @param data element type. - */ - static class DataGenerator> { - - final Supplier dataGenerator; - - final BiConsumer vectorPopulator; - - final Class clazz; - - DataGenerator(Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { - this.dataGenerator = dataGenerator; - this.vectorPopulator = vectorPopulator; - this.clazz = clazz; - } - - /** - * Populate the vector according to the specified parameters. - * - * @param vector the vector to populate. - * @param length vector length. - * @param nullFraction the fraction of null values. - * @return An array with the same data as the vector. - */ - U[] populate(V vector, int length, double nullFraction) { - U[] array = (U[]) Array.newInstance(clazz, length); - for (int i = 0; i < length; i++) { - double r = Math.random(); - U value = r < nullFraction ? null : dataGenerator.get(); - array[i] = value; - } - vectorPopulator.accept(vector, array); - return array; - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java deleted file mode 100644 index 09ac963332e31..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Objects; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VarCharVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link StableVectorComparator}. */ -public class TestStableVectorComparator { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testCompare() { - try (VarCharVector vec = new VarCharVector("", allocator)) { - vec.allocateNew(100, 5); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, "ba".getBytes(StandardCharsets.UTF_8)); - vec.set(1, "abc".getBytes(StandardCharsets.UTF_8)); - vec.set(2, "aa".getBytes(StandardCharsets.UTF_8)); - vec.set(3, "abc".getBytes(StandardCharsets.UTF_8)); - vec.set(4, "a".getBytes(StandardCharsets.UTF_8)); - - VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = - new StableVectorComparator<>(comparator); - stableComparator.attachVector(vec); - - assertTrue(stableComparator.compare(0, 1) > 0); - assertTrue(stableComparator.compare(1, 2) < 0); - assertTrue(stableComparator.compare(2, 3) < 0); - assertTrue(stableComparator.compare(1, 3) < 0); - assertTrue(stableComparator.compare(3, 1) > 0); - assertEquals(0, stableComparator.compare(3, 3)); - } - } - - @Test - public void testStableSortString() { - try (VarCharVector vec = new VarCharVector("", allocator)) { - vec.allocateNew(100, 10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, "a".getBytes(StandardCharsets.UTF_8)); - vec.set(1, "abc".getBytes(StandardCharsets.UTF_8)); - vec.set(2, "aa".getBytes(StandardCharsets.UTF_8)); - vec.set(3, "a1".getBytes(StandardCharsets.UTF_8)); - vec.set(4, "abcdefg".getBytes(StandardCharsets.UTF_8)); - vec.set(5, "accc".getBytes(StandardCharsets.UTF_8)); - vec.set(6, "afds".getBytes(StandardCharsets.UTF_8)); - vec.set(7, "0".getBytes(StandardCharsets.UTF_8)); - vec.set(8, "01".getBytes(StandardCharsets.UTF_8)); - vec.set(9, "0c".getBytes(StandardCharsets.UTF_8)); - - // sort the vector - VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = - new StableVectorComparator<>(comparator); - - try (VarCharVector sortedVec = - (VarCharVector) - vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { - sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); - sortedVec.setLastSet(vec.getValueCount() - 1); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, stableComparator); - - // verify results - // the results are stable - assertEquals( - "0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); - assertEquals( - "01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); - assertEquals( - "0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals( - "a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals( - "abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals( - "aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals( - "a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals( - "abcdefg", - new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals( - "accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals( - "afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); - } - } - } - - /** Utility comparator that compares varchars by the first character. */ - private static class TestVarCharSorter extends VectorValueComparator { - - @Override - public int compareNotNull(int index1, int index2) { - byte b1 = vector1.get(index1)[0]; - byte b2 = vector2.get(index2)[0]; - return b1 - b2; - } - - @Override - public VectorValueComparator createNew() { - return new TestVarCharSorter(); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java deleted file mode 100644 index 6addec9e266c3..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Objects; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.VarCharVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. */ -public class TestVariableWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { - - private BufferAllocator allocator; - - OutOfPlaceVectorSorter getSorter(boolean generalSorter) { - return generalSorter - ? new GeneralOutOfPlaceVectorSorter<>() - : new VariableWidthOutOfPlaceVectorSorter(); - } - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @ParameterizedTest - @MethodSource("getParameter") - public void testSortString(boolean generalSorter) { - try (VarCharVector vec = new VarCharVector("", allocator)) { - vec.allocateNew(100, 10); - vec.setValueCount(10); - - // fill data to sort - vec.set(0, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(1, "abc".getBytes(StandardCharsets.UTF_8)); - vec.setNull(2); - vec.set(3, "world".getBytes(StandardCharsets.UTF_8)); - vec.set(4, "12".getBytes(StandardCharsets.UTF_8)); - vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8)); - vec.setNull(6); - vec.set(7, "hello".getBytes(StandardCharsets.UTF_8)); - vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); - vec.set(9, "yes".getBytes(StandardCharsets.UTF_8)); - - // sort the vector - OutOfPlaceVectorSorter sorter = getSorter(generalSorter); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); - - VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); - sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); - sortedVec.setLastSet(vec.getValueCount() - 1); - sortedVec.setValueCount(vec.getValueCount()); - - sorter.sortOutOfPlace(vec, sortedVec, comparator); - - // verify results - assertEquals(vec.getValueCount(), sortedVec.getValueCount()); - assertEquals(vec.getByteCapacity(), sortedVec.getByteCapacity()); - assertEquals(vec.getLastSet(), sortedVec.getLastSet()); - - assertTrue(sortedVec.isNull(0)); - assertTrue(sortedVec.isNull(1)); - assertEquals( - "12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals( - "abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals( - "dictionary", - new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals( - "good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals( - "hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals( - "hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals( - "world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals( - "yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); - - sortedVec.close(); - } - } -} diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java deleted file mode 100644 index 60a5600764a36..0000000000000 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.sort; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test sorting variable width vectors with random data. */ -public class TestVariableWidthSorting> { - - static final int[] VECTOR_LENGTHS = new int[] {2, 5, 10, 50, 100, 1000, 3000}; - - static final double[] NULL_FRACTIONS = {0, 0.1, 0.3, 0.5, 0.7, 0.9, 1}; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @ParameterizedTest - @MethodSource("getParameters") - public void testSort( - int length, - double nullFraction, - Function vectorGenerator, - TestSortingUtil.DataGenerator dataGenerator) { - sortOutOfPlace(length, nullFraction, vectorGenerator, dataGenerator); - } - - void sortOutOfPlace( - int length, - double nullFraction, - Function vectorGenerator, - TestSortingUtil.DataGenerator dataGenerator) { - try (V vector = vectorGenerator.apply(allocator)) { - U[] array = dataGenerator.populate(vector, length, nullFraction); - Arrays.sort(array, (Comparator) new StringComparator()); - - // sort the vector - VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); - - try (V sortedVec = - (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { - int dataSize = vector.getOffsetBuffer().getInt(vector.getValueCount() * 4L); - sortedVec.allocateNew(dataSize, vector.getValueCount()); - sortedVec.setValueCount(vector.getValueCount()); - - sorter.sortOutOfPlace(vector, sortedVec, comparator); - - // verify results - verifyResults(sortedVec, (String[]) array); - } - } - } - - public static Stream getParameters() { - List params = new ArrayList<>(); - for (int length : VECTOR_LENGTHS) { - for (double nullFrac : NULL_FRACTIONS) { - params.add( - Arguments.of( - length, - nullFrac, - (Function) - allocator -> new VarCharVector("vector", allocator), - TestSortingUtil.STRING_GENERATOR)); - } - } - return params.stream(); - } - - /** Verify results as byte arrays. */ - public static void verifyResults(V vector, String[] expected) { - assertEquals(vector.getValueCount(), expected.length); - for (int i = 0; i < expected.length; i++) { - if (expected[i] == null) { - assertTrue(vector.isNull(i)); - } else { - assertArrayEquals( - ((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); - } - } - } - - /** - * String comparator with the same behavior as that of {@link - * DefaultVectorComparators.VariableWidthComparator}. - */ - static class StringComparator implements Comparator { - - @Override - public int compare(String str1, String str2) { - if (str1 == null || str2 == null) { - if (str1 == null && str2 == null) { - return 0; - } - - return str1 == null ? -1 : 1; - } - - byte[] bytes1 = str1.getBytes(StandardCharsets.UTF_8); - byte[] bytes2 = str2.getBytes(StandardCharsets.UTF_8); - - for (int i = 0; i < bytes1.length && i < bytes2.length; i++) { - if (bytes1[i] != bytes2[i]) { - return (bytes1[i] & 0xff) < (bytes2[i] & 0xff) ? -1 : 1; - } - } - return bytes1.length - bytes2.length; - } - } -} diff --git a/java/algorithm/src/test/resources/logback.xml b/java/algorithm/src/test/resources/logback.xml deleted file mode 100644 index 4c54d18a210ff..0000000000000 --- a/java/algorithm/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/api-changes.md b/java/api-changes.md deleted file mode 100644 index 22003c3351765..0000000000000 --- a/java/api-changes.md +++ /dev/null @@ -1,32 +0,0 @@ - - -# Arrow Java API Changes - -This document tracks behavior changes to Java APIs, as listed below. - -- **[ARROW-5973](https://issues.apache.org/jira/browse/ARROW-5973)**: - * **Start date**: 2019/07/18 - * **Resolve date**: 2019/07/20 - * **Brief description**: The semantics of the get methods for [VarCharVector](./vector/scr/main/org/apache/arrow/vector/VarCharVector.java), [VarBinaryVector](./vector/scr/main/org/apache/arrow/vector/VarBinaryVector.java), and [FixedSizeBinaryVector](./vector/scr/main/org/apache/arrow/vector/FixedSizeBinaryVector.java) changes. In the past, if the validity bit is clear, the methods throw throws an IllegalStateException when NULL_CHECKING_ENABLED is set, or returns an empty object when the flag is not set. Now, the get methods return a null if the validity bit is clear. - -- **[ARROW-5842](https://issues.apache.org/jira/browse/ARROW-5842)**: - * **Start date**: 2019/07/04 - * **Resolve date**: 2019/07/11 - * **Brief description**: The semantics of lastSet member in class [ListVector](./vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java) changes. In the past, it refers to the next index that will be set. After the change it points to the last index that is actually set. diff --git a/java/bom/pom.xml b/java/bom/pom.xml deleted file mode 100644 index ccb70d5fb339d..0000000000000 --- a/java/bom/pom.xml +++ /dev/null @@ -1,283 +0,0 @@ - - - - 4.0.0 - - - org.apache - apache - 33 - - - - org.apache.arrow - arrow-bom - 19.0.0-SNAPSHOT - pom - - Arrow Bill of Materials - Arrow Bill of Materials - https://arrow.apache.org/ - - - - Developer List - dev-subscribe@arrow.apache.org - dev-unsubscribe@arrow.apache.org - dev@arrow.apache.org - https://lists.apache.org/list.html?dev@arrow.apache.org - - - Commits List - commits-subscribe@arrow.apache.org - commits-unsubscribe@arrow.apache.org - commits@arrow.apache.org - https://lists.apache.org/list.html?commits@arrow.apache.org - - - Issues List - issues-subscribe@arrow.apache.org - issues-unsubscribe@arrow.apache.org - https://lists.apache.org/list.html?issues@arrow.apache.org - - - GitHub List - github-subscribe@arrow.apache.org - github-unsubscribe@arrow.apache.org - https://lists.apache.org/list.html?github@arrow.apache.org - - - - - scm:git:https://github.com/apache/arrow.git - scm:git:https://github.com/apache/arrow.git - main - https://github.com/apache/arrow/tree/${project.scm.tag} - - - - GitHub - https://github.com/apache/arrow/issues - - - - - - 11 - 11 - 11 - 11 - - - - - - org.apache.arrow - arrow-vector - ${project.version} - - - org.apache.arrow - arrow-vector - ${project.version} - ${arrow.vector.classifier} - - - org.apache.arrow - arrow-avro - ${project.version} - - - org.apache.arrow - arrow-jdbc - ${project.version} - - - org.apache.arrow - arrow-orc - ${project.version} - - - org.apache.arrow - arrow-algorithm - ${project.version} - - - org.apache.arrow - arrow-c-data - ${project.version} - - - org.apache.arrow - arrow-compression - ${project.version} - - - org.apache.arrow - arrow-dataset - ${project.version} - - - org.apache.arrow - flight-core - ${project.version} - - - org.apache.arrow - flight-integration-tests - ${project.version} - - - org.apache.arrow - flight-sql - ${project.version} - - - org.apache.arrow - flight-sql-jdbc-core - ${project.version} - - - org.apache.arrow - flight-sql-jdbc-driver - ${project.version} - - - org.apache.arrow - arrow-format - ${project.version} - - - org.apache.arrow - arrow-gandiva - ${project.version} - - - org.apache.arrow - arrow-memory-core - ${project.version} - - - org.apache.arrow - arrow-memory-netty - ${project.version} - - - org.apache.arrow - arrow-memory-unsafe - ${project.version} - - - org.apache.arrow - arrow-performance - ${project.version} - - - org.apache.arrow - arrow-tools - ${project.version} - - - - - - - - - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 - - - org.codehaus.mojo - versions-maven-plugin - 2.18.0 - - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - org.apache.maven.plugins - maven-site-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - - ${maven.multiModuleProjectDirectory}/dev/license/asf-xml.license - (<configuration|<project) - - - - - - - spotless-check - - check - - - - - - - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - org.apache.maven.plugins - maven-site-plugin - - - - - - - apache-release - - - - org.apache.maven.plugins - maven-assembly-plugin - - - source-release-assembly - - - true - - - - - - - - - diff --git a/java/c/CMakeLists.txt b/java/c/CMakeLists.txt deleted file mode 100644 index 83909c5e13e1b..0000000000000 --- a/java/c/CMakeLists.txt +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} - ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) - -add_jar(arrow_java_jni_cdata_jar - src/main/java/org/apache/arrow/c/jni/CDataJniException.java - src/main/java/org/apache/arrow/c/jni/JniLoader.java - src/main/java/org/apache/arrow/c/jni/JniWrapper.java - src/main/java/org/apache/arrow/c/jni/PrivateData.java - GENERATE_NATIVE_HEADERS - arrow_java_jni_cdata_headers) - -add_library(arrow_java_jni_cdata SHARED src/main/cpp/jni_wrapper.cc) -set_property(TARGET arrow_java_jni_cdata PROPERTY OUTPUT_NAME "arrow_cdata_jni") -target_link_libraries(arrow_java_jni_cdata arrow_java_jni_cdata_headers jni) - -set(ARROW_JAVA_JNI_C_LIBDIR - "${CMAKE_INSTALL_PREFIX}/lib/arrow_cdata_jni/${ARROW_JAVA_JNI_ARCH_DIR}") -set(ARROW_JAVA_JNI_C_BINDIR - "${CMAKE_INSTALL_PREFIX}/bin/arrow_cdata_jni/${ARROW_JAVA_JNI_ARCH_DIR}") - -install(TARGETS arrow_java_jni_cdata - LIBRARY DESTINATION ${ARROW_JAVA_JNI_C_LIBDIR} - RUNTIME DESTINATION ${ARROW_JAVA_JNI_C_BINDIR}) diff --git a/java/c/README.md b/java/c/README.md deleted file mode 100644 index db2a2403b374f..0000000000000 --- a/java/c/README.md +++ /dev/null @@ -1,54 +0,0 @@ - - -# C Interfaces for Arrow Java - -## Setup Build Environment - -install: - - Java 8 or later - - Maven 3.3 or later - - A C++17-enabled compiler - - CMake 3.11 or later - - Make or ninja build utilities - -## Building JNI wrapper shared library - -``` -mkdir -p build -pushd build -cmake .. -cmake --build . -popd -``` - -## Building and running tests - -Run tests with - -``` -mvn test -``` - -To install Apache Arrow (Java) with this module enabled run the following from the project root directory: - -``` -cd java -mvn -Parrow-c-data install -``` diff --git a/java/c/pom.xml b/java/c/pom.xml deleted file mode 100644 index c90b6dc0efef4..0000000000000 --- a/java/c/pom.xml +++ /dev/null @@ -1,106 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - - arrow-c-data - jar - Arrow Java C Data Interface - Java implementation of C Data Interface - - - ./build - - - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - compile - - - org.apache.arrow - arrow-vector - ${project.version} - test-jar - test - - - org.apache.arrow - arrow-memory-core - compile - - - org.slf4j - slf4j-api - - - org.immutables - value-annotations - - - org.apache.arrow - arrow-memory-unsafe - test - - - org.apache.arrow - arrow-format - test - - - com.google.guava - guava - test - - - org.assertj - assertj-core - test - - - - - - ${arrow.c.jni.dist.dir} - - **/*arrow_cdata_jni.* - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Werror - - - - - - diff --git a/java/c/src/main/cpp/abi.h b/java/c/src/main/cpp/abi.h deleted file mode 100644 index d58417e6fbcf2..0000000000000 --- a/java/c/src/main/cpp/abi.h +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef ARROW_C_DATA_INTERFACE -#define ARROW_C_DATA_INTERFACE - -#define ARROW_FLAG_DICTIONARY_ORDERED 1 -#define ARROW_FLAG_NULLABLE 2 -#define ARROW_FLAG_MAP_KEYS_SORTED 4 - -struct ArrowSchema { - // Array type description - const char* format; - const char* name; - const char* metadata; - int64_t flags; - int64_t n_children; - struct ArrowSchema** children; - struct ArrowSchema* dictionary; - - // Release callback - void (*release)(struct ArrowSchema*); - // Opaque producer-specific data - void* private_data; -}; - -struct ArrowArray { - // Array data description - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void** buffers; - struct ArrowArray** children; - struct ArrowArray* dictionary; - - // Release callback - void (*release)(struct ArrowArray*); - // Opaque producer-specific data - void* private_data; -}; - -#endif // ARROW_C_DATA_INTERFACE - -#ifndef ARROW_C_STREAM_INTERFACE -#define ARROW_C_STREAM_INTERFACE - -struct ArrowArrayStream { - // Callback to get the stream type - // (will be the same for all arrays in the stream). - // - // Return value: 0 if successful, an `errno`-compatible error code otherwise. - // - // If successful, the ArrowSchema must be released independently from the stream. - int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); - - // Callback to get the next array - // (if no error and the array is released, the stream has ended) - // - // Return value: 0 if successful, an `errno`-compatible error code otherwise. - // - // If successful, the ArrowArray must be released independently from the stream. - int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); - - // Callback to get optional detailed error information. - // This must only be called if the last stream operation failed - // with a non-0 return code. - // - // Return value: pointer to a null-terminated character array describing - // the last error, or NULL if no description is available. - // - // The returned pointer is only valid until the next operation on this stream - // (including release). - const char* (*get_last_error)(struct ArrowArrayStream*); - - // Release callback: release the stream's own resources. - // Note that arrays returned by `get_next` must be individually released. - void (*release)(struct ArrowArrayStream*); - - // Opaque producer-specific data - void* private_data; -}; - -#endif // ARROW_C_STREAM_INTERFACE - -#ifdef __cplusplus -} -#endif diff --git a/java/c/src/main/cpp/jni_wrapper.cc b/java/c/src/main/cpp/jni_wrapper.cc deleted file mode 100644 index 35c2b7787e779..0000000000000 --- a/java/c/src/main/cpp/jni_wrapper.cc +++ /dev/null @@ -1,523 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include -#include -#include -#include -#include -#include - -#include "./abi.h" -#include "org_apache_arrow_c_jni_JniWrapper.h" - -namespace { - -jclass kObjectClass; -jclass kRuntimeExceptionClass; -jclass kPrivateDataClass; -jclass kCDataExceptionClass; -jclass kStreamPrivateDataClass; - -jfieldID kPrivateDataLastErrorField; - -jmethodID kObjectToStringMethod; -jmethodID kPrivateDataCloseMethod; -jmethodID kPrivateDataGetNextMethod; -jmethodID kPrivateDataGetSchemaMethod; -jmethodID kCDataExceptionConstructor; - -jint JNI_VERSION = JNI_VERSION_10; - -class JniPendingException : public std::runtime_error { - public: - explicit JniPendingException(const std::string& arg) : std::runtime_error(arg) {} -}; - -void ThrowPendingException(const std::string& message) { - throw JniPendingException(message); -} - -void JniThrow(std::string message) { ThrowPendingException(message); } - -jclass CreateGlobalClassReference(JNIEnv* env, const char* class_name) { - jclass local_class = env->FindClass(class_name); - if (!local_class) { - std::string message = "Could not find class "; - message += class_name; - ThrowPendingException(message); - } - jclass global_class = (jclass)env->NewGlobalRef(local_class); - if (!global_class) { - std::string message = "Could not create global reference to class "; - message += class_name; - ThrowPendingException(message); - } - env->DeleteLocalRef(local_class); - return global_class; -} - -jmethodID GetMethodID(JNIEnv* env, jclass this_class, const char* name, const char* sig) { - jmethodID ret = env->GetMethodID(this_class, name, sig); - if (ret == nullptr) { - std::string error_message = "Unable to find method " + std::string(name) + - " with signature " + std::string(sig); - ThrowPendingException(error_message); - } - return ret; -} - -jfieldID GetFieldID(JNIEnv* env, jclass this_class, const char* name, const char* sig) { - jfieldID fieldId = env->GetFieldID(this_class, name, sig); - if (fieldId == nullptr) { - std::string error_message = "Unable to find field " + std::string(name) + - " with signature " + std::string(sig); - ThrowPendingException(error_message); - } - return fieldId; -} - -class InnerPrivateData { - public: - InnerPrivateData(JavaVM* vm, jobject private_data) - : vm_(vm), j_private_data_(private_data) {} - - JavaVM* vm_; - jobject j_private_data_; - // Only for ArrowArrayStream - std::string last_error_; -}; - -class JNIEnvGuard { - public: - explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), should_detach_(false) { - JNIEnv* env; - jint code = vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); - if (code == JNI_EDETACHED) { - JavaVMAttachArgs args; - args.version = JNI_VERSION; - args.name = NULL; - args.group = NULL; - code = vm->AttachCurrentThread(reinterpret_cast(&env), &args); - should_detach_ = (code == JNI_OK); - } - if (code != JNI_OK) { - ThrowPendingException("Failed to attach the current thread to a Java VM"); - } - env_ = env; - } - - JNIEnv* env() { return env_; } - - ~JNIEnvGuard() { - if (should_detach_) { - vm_->DetachCurrentThread(); - should_detach_ = false; - } - } - - private: - bool should_detach_; - JavaVM* vm_; - JNIEnv* env_; -}; - -template -void release_exported(T* base) { - // This should not be called on already released structure - assert(base->release != nullptr); - - // Release children - for (int64_t i = 0; i < base->n_children; ++i) { - T* child = base->children[i]; - if (child->release != nullptr) { - child->release(child); - assert(child->release == nullptr); - } - } - - // Release dictionary - T* dict = base->dictionary; - if (dict != nullptr && dict->release != nullptr) { - dict->release(dict); - assert(dict->release == nullptr); - } - - // Release all data directly owned by the struct - InnerPrivateData* private_data = - reinterpret_cast(base->private_data); - - // It is possible for the JVM to be shut down when this is called; - // guard against that. Example: Python code using JPype may shut - // down the JVM before releasing the stream. - try { - JNIEnvGuard guard(private_data->vm_); - JNIEnv* env = guard.env(); - - env->CallObjectMethod(private_data->j_private_data_, kPrivateDataCloseMethod); - if (env->ExceptionCheck()) { - // Can't signal this to caller, so log and then try to free things - // as best we can - env->ExceptionDescribe(); - env->ExceptionClear(); - } - env->DeleteGlobalRef(private_data->j_private_data_); - } catch (const JniPendingException& e) { - std::cerr << "WARNING: Failed to release Java C Data resource: " << e.what() - << std::endl; - } - delete private_data; - base->private_data = nullptr; - - // Mark released - base->release = nullptr; -} - -// Attempt to copy the JVM-side lastError to the C++ side -void TryCopyLastError(JNIEnv* env, InnerPrivateData* private_data) { - jobject error_data = - env->GetObjectField(private_data->j_private_data_, kPrivateDataLastErrorField); - if (!error_data) { - private_data->last_error_.clear(); - return; - } - - auto arr = reinterpret_cast(error_data); - jbyte* error_bytes = env->GetByteArrayElements(arr, nullptr); - if (!error_bytes) { - private_data->last_error_.clear(); - return; - } - - char* error_str = reinterpret_cast(error_bytes); - private_data->last_error_ = std::string(error_str, std::strlen(error_str)); - - env->ReleaseByteArrayElements(arr, error_bytes, JNI_ABORT); -} - -// Normally the Java side catches all exceptions and populates -// lastError. If that fails we check for an exception and try to -// populate last_error_ ourselves. -void TryHandleUncaughtException(JNIEnv* env, InnerPrivateData* private_data, - jthrowable exc) { - jstring message = - reinterpret_cast(env->CallObjectMethod(exc, kObjectToStringMethod)); - if (!message) { - private_data->last_error_.clear(); - return; - } - const char* str = env->GetStringUTFChars(message, 0); - if (!str) { - private_data->last_error_.clear(); - return; - } - private_data->last_error_ = str; - env->ReleaseStringUTFChars(message, 0); -} - -int ArrowArrayStreamGetSchema(ArrowArrayStream* stream, ArrowSchema* out) { - assert(stream->private_data != nullptr); - InnerPrivateData* private_data = - reinterpret_cast(stream->private_data); - JNIEnvGuard guard(private_data->vm_); - JNIEnv* env = guard.env(); - - const jlong out_addr = static_cast(reinterpret_cast(out)); - const int err_code = env->CallIntMethod(private_data->j_private_data_, - kPrivateDataGetSchemaMethod, out_addr); - if (jthrowable exc = env->ExceptionOccurred()) { - TryHandleUncaughtException(env, private_data, exc); - env->ExceptionClear(); - return EIO; - } else if (err_code != 0) { - TryCopyLastError(env, private_data); - } - return err_code; -} - -int ArrowArrayStreamGetNext(ArrowArrayStream* stream, ArrowArray* out) { - assert(stream->private_data != nullptr); - InnerPrivateData* private_data = - reinterpret_cast(stream->private_data); - JNIEnvGuard guard(private_data->vm_); - JNIEnv* env = guard.env(); - - const jlong out_addr = static_cast(reinterpret_cast(out)); - const int err_code = env->CallIntMethod(private_data->j_private_data_, - kPrivateDataGetNextMethod, out_addr); - if (jthrowable exc = env->ExceptionOccurred()) { - TryHandleUncaughtException(env, private_data, exc); - env->ExceptionClear(); - return EIO; - } else if (err_code != 0) { - TryCopyLastError(env, private_data); - } - return err_code; -} - -const char* ArrowArrayStreamGetLastError(ArrowArrayStream* stream) { - assert(stream->private_data != nullptr); - InnerPrivateData* private_data = - reinterpret_cast(stream->private_data); - JNIEnvGuard guard(private_data->vm_); - JNIEnv* env = guard.env(); - - if (private_data->last_error_.empty()) return nullptr; - return private_data->last_error_.c_str(); -} - -void ArrowArrayStreamRelease(ArrowArrayStream* stream) { - // This should not be called on already released structure - assert(stream->release != nullptr); - // Release all data directly owned by the struct - InnerPrivateData* private_data = - reinterpret_cast(stream->private_data); - - // It is possible for the JVM to be shut down (see above) - try { - JNIEnvGuard guard(private_data->vm_); - JNIEnv* env = guard.env(); - - env->CallObjectMethod(private_data->j_private_data_, kPrivateDataCloseMethod); - if (env->ExceptionCheck()) { - env->ExceptionDescribe(); - env->ExceptionClear(); - } - env->DeleteGlobalRef(private_data->j_private_data_); - } catch (const JniPendingException& e) { - std::cerr << "WARNING: Failed to release Java ArrowArrayStream: " << e.what() - << std::endl; - } - delete private_data; - stream->private_data = nullptr; - - // Mark released - stream->release = nullptr; -} - -} // namespace - -#define JNI_METHOD_START try { -// macro ended - -#define JNI_METHOD_END(fallback_expr) \ - } \ - catch (JniPendingException & e) { \ - env->ThrowNew(kRuntimeExceptionClass, e.what()); \ - return fallback_expr; \ - } -// macro ended - -jint JNI_OnLoad(JavaVM* vm, void* reserved) { - JNIEnv* env; - if (vm->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) { - return JNI_ERR; - } - JNI_METHOD_START - kObjectClass = CreateGlobalClassReference(env, "Ljava/lang/Object;"); - kRuntimeExceptionClass = - CreateGlobalClassReference(env, "Ljava/lang/RuntimeException;"); - kPrivateDataClass = - CreateGlobalClassReference(env, "Lorg/apache/arrow/c/jni/PrivateData;"); - kCDataExceptionClass = - CreateGlobalClassReference(env, "Lorg/apache/arrow/c/jni/CDataJniException;"); - kStreamPrivateDataClass = CreateGlobalClassReference( - env, "Lorg/apache/arrow/c/ArrayStreamExporter$ExportedArrayStreamPrivateData;"); - - kPrivateDataLastErrorField = - GetFieldID(env, kStreamPrivateDataClass, "lastError", "[B"); - - kObjectToStringMethod = - GetMethodID(env, kObjectClass, "toString", "()Ljava/lang/String;"); - kPrivateDataCloseMethod = GetMethodID(env, kPrivateDataClass, "close", "()V"); - kPrivateDataGetNextMethod = - GetMethodID(env, kStreamPrivateDataClass, "getNext", "(J)I"); - kPrivateDataGetSchemaMethod = - GetMethodID(env, kStreamPrivateDataClass, "getSchema", "(J)I"); - kCDataExceptionConstructor = - GetMethodID(env, kCDataExceptionClass, "", "(ILjava/lang/String;)V"); - - return JNI_VERSION; - JNI_METHOD_END(JNI_ERR) -} - -void JNI_OnUnload(JavaVM* vm, void* reserved) { - JNIEnv* env; - vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); - env->DeleteGlobalRef(kObjectClass); - env->DeleteGlobalRef(kRuntimeExceptionClass); - env->DeleteGlobalRef(kPrivateDataClass); - env->DeleteGlobalRef(kCDataExceptionClass); - env->DeleteGlobalRef(kStreamPrivateDataClass); -} - -/* - * Class: org_apache_arrow_c_jni_JniWrapper - * Method: releaseSchema - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_releaseSchema( - JNIEnv* env, jobject, jlong address) { - JNI_METHOD_START - ArrowSchema* schema = reinterpret_cast(address); - if (schema->release != nullptr) { - schema->release(schema); - } - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_c_jni_JniWrapper - * Method: releaseArray - * Signature: (J)V - */ -JNIEXPORT void JNICALL -Java_org_apache_arrow_c_jni_JniWrapper_releaseArray(JNIEnv* env, jobject, jlong address) { - JNI_METHOD_START - ArrowArray* array = reinterpret_cast(address); - if (array->release != nullptr) { - array->release(array); - } - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_c_jni_JniWrapper - * Method: getNextArrayStream - * Signature: (JJ)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_getNextArrayStream( - JNIEnv* env, jobject, jlong address, jlong out_address) { - JNI_METHOD_START - auto* stream = reinterpret_cast(address); - auto* out = reinterpret_cast(out_address); - const int err_code = stream->get_next(stream, out); - if (err_code != 0) { - const char* message = stream->get_last_error(stream); - if (!message) message = std::strerror(err_code); - jstring java_message = env->NewStringUTF(message); - jthrowable exception = static_cast(env->NewObject( - kCDataExceptionClass, kCDataExceptionConstructor, err_code, java_message)); - env->Throw(exception); - } - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_c_jni_JniWrapper - * Method: getSchemaArrayStream - * Signature: (JJ)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_getSchemaArrayStream( - JNIEnv* env, jobject, jlong address, jlong out_address) { - JNI_METHOD_START - auto* stream = reinterpret_cast(address); - auto* out = reinterpret_cast(out_address); - const int err_code = stream->get_schema(stream, out); - if (err_code != 0) { - const char* message = stream->get_last_error(stream); - if (!message) message = std::strerror(err_code); - jstring java_message = env->NewStringUTF(message); - jthrowable exception = static_cast(env->NewObject( - kCDataExceptionClass, kCDataExceptionConstructor, err_code, java_message)); - env->Throw(exception); - } - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_c_jni_JniWrapper - * Method: releaseArrayStream - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_releaseArrayStream( - JNIEnv* env, jobject, jlong address) { - JNI_METHOD_START - auto* stream = reinterpret_cast(address); - if (stream->release != nullptr) { - stream->release(stream); - } - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_c_jni_JniWrapper - * Method: exportSchema - * Signature: (JLorg/apache/arrow/c/jni/PrivateData;)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_exportSchema( - JNIEnv* env, jobject, jlong address, jobject private_data) { - JNI_METHOD_START - ArrowSchema* schema = reinterpret_cast(address); - - JavaVM* vm; - if (env->GetJavaVM(&vm) != JNI_OK) { - JniThrow("Unable to get JavaVM instance"); - } - jobject private_data_ref = env->NewGlobalRef(private_data); - - schema->private_data = new InnerPrivateData(vm, private_data_ref); - schema->release = &release_exported; - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_c_jni_JniWrapper - * Method: exportArray - * Signature: (JLorg/apache/arrow/c/jni/PrivateData;)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_exportArray( - JNIEnv* env, jobject, jlong address, jobject private_data) { - JNI_METHOD_START - ArrowArray* array = reinterpret_cast(address); - - JavaVM* vm; - if (env->GetJavaVM(&vm) != JNI_OK) { - JniThrow("Unable to get JavaVM instance"); - } - jobject private_data_ref = env->NewGlobalRef(private_data); - - array->private_data = new InnerPrivateData(vm, private_data_ref); - array->release = &release_exported; - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_c_jni_JniWrapper - * Method: exportArrayStream - * Signature: (JLorg/apache/arrow/c/jni/PrivateData;)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_c_jni_JniWrapper_exportArrayStream( - JNIEnv* env, jobject, jlong address, jobject private_data) { - JNI_METHOD_START - auto* stream = reinterpret_cast(address); - - JavaVM* vm; - if (env->GetJavaVM(&vm) != JNI_OK) { - JniThrow("Unable to get JavaVM instance"); - } - jobject private_data_ref = env->NewGlobalRef(private_data); - - stream->get_schema = &ArrowArrayStreamGetSchema; - stream->get_next = &ArrowArrayStreamGetNext; - stream->get_last_error = &ArrowArrayStreamGetLastError; - stream->release = &ArrowArrayStreamRelease; - stream->private_data = new InnerPrivateData(vm, private_data_ref); - JNI_METHOD_END() -} diff --git a/java/c/src/main/java/module-info.java b/java/c/src/main/java/module-info.java deleted file mode 100644 index 48af1bc6b7215..0000000000000 --- a/java/c/src/main/java/module-info.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -open module org.apache.arrow.c { - exports org.apache.arrow.c; - exports org.apache.arrow.c.jni; - - requires flatbuffers.java; - requires jdk.unsupported; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; - requires org.slf4j; -} diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java deleted file mode 100644 index 0c6b5de4486bc..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.c.NativeUtil.addressOrNull; -import static org.apache.arrow.util.Preconditions.checkNotNull; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.c.jni.JniWrapper; -import org.apache.arrow.c.jni.PrivateData; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; - -/** Exporter for {@link ArrowArray}. */ -final class ArrayExporter { - private final BufferAllocator allocator; - - public ArrayExporter(BufferAllocator allocator) { - this.allocator = allocator; - } - - /** Private data structure for exported arrays. */ - static class ExportedArrayPrivateData implements PrivateData { - ArrowBuf buffers_ptrs; - List buffers; - ArrowBuf children_ptrs; - List children; - ArrowArray dictionary; - - @Override - public void close() { - NativeUtil.closeBuffer(buffers_ptrs); - - if (buffers != null) { - for (ArrowBuf buffer : buffers) { - NativeUtil.closeBuffer(buffer); - } - } - NativeUtil.closeBuffer(children_ptrs); - - if (children != null) { - for (ArrowArray child : children) { - child.close(); - } - } - - if (dictionary != null) { - dictionary.close(); - } - } - } - - void export(ArrowArray array, FieldVector vector, DictionaryProvider dictionaryProvider) { - List children = vector.getChildrenFromFields(); - int valueCount = vector.getValueCount(); - int nullCount = vector.getNullCount(); - DictionaryEncoding dictionaryEncoding = vector.getField().getDictionary(); - - ExportedArrayPrivateData data = new ExportedArrayPrivateData(); - try { - if (children != null) { - data.children = new ArrayList<>(children.size()); - data.children_ptrs = allocator.buffer((long) children.size() * Long.BYTES); - for (int i = 0; i < children.size(); i++) { - ArrowArray child = ArrowArray.allocateNew(allocator); - data.children.add(child); - data.children_ptrs.writeLong(child.memoryAddress()); - } - } - - data.buffers = new ArrayList<>(vector.getExportedCDataBufferCount()); - data.buffers_ptrs = - allocator.buffer((long) vector.getExportedCDataBufferCount() * Long.BYTES); - vector.exportCDataBuffers(data.buffers, data.buffers_ptrs, NULL); - - if (dictionaryEncoding != null) { - Dictionary dictionary = dictionaryProvider.lookup(dictionaryEncoding.getId()); - checkNotNull(dictionary, "Dictionary lookup failed on export of dictionary encoded array"); - - data.dictionary = ArrowArray.allocateNew(allocator); - FieldVector dictionaryVector = dictionary.getVector(); - export(data.dictionary, dictionaryVector, dictionaryProvider); - } - - ArrowArray.Snapshot snapshot = new ArrowArray.Snapshot(); - snapshot.length = valueCount; - snapshot.null_count = nullCount; - snapshot.offset = 0; - snapshot.n_buffers = (data.buffers != null) ? data.buffers.size() : 0; - snapshot.n_children = (data.children != null) ? data.children.size() : 0; - snapshot.buffers = addressOrNull(data.buffers_ptrs); - snapshot.children = addressOrNull(data.children_ptrs); - snapshot.dictionary = addressOrNull(data.dictionary); - snapshot.release = NULL; - array.save(snapshot); - - // sets release and private data - JniWrapper.get().exportArray(array.memoryAddress(), data); - } catch (Exception e) { - data.close(); - throw e; - } - - // Export children - if (children != null) { - for (int i = 0; i < children.size(); i++) { - FieldVector childVector = children.get(i); - ArrowArray child = data.children.get(i); - export(child, childVector, dictionaryProvider); - } - } - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java b/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java deleted file mode 100644 index b74fb1b47345d..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/ArrayImporter.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.util.Preconditions.checkNotNull; -import static org.apache.arrow.util.Preconditions.checkState; - -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; - -/** Importer for {@link ArrowArray}. */ -final class ArrayImporter { - private static final int MAX_IMPORT_RECURSION_LEVEL = 64; - - private final BufferAllocator allocator; - private final FieldVector vector; - private final DictionaryProvider dictionaryProvider; - - private ReferenceCountedArrowArray underlyingAllocation; - private int recursionLevel; - - ArrayImporter( - BufferAllocator allocator, FieldVector vector, DictionaryProvider dictionaryProvider) { - this.allocator = Preconditions.checkNotNull(allocator); - this.vector = Preconditions.checkNotNull(vector); - this.dictionaryProvider = dictionaryProvider; - } - - void importArray(ArrowArray src) { - ArrowArray.Snapshot snapshot = src.snapshot(); - checkState(snapshot.release != NULL, "Cannot import released ArrowArray"); - - // Move imported array - ArrowArray ownedArray = ArrowArray.allocateNew(allocator); - ownedArray.save(snapshot); - src.markReleased(); - src.close(); - - recursionLevel = 0; - - // This keeps the array alive as long as there are any buffers that need it - underlyingAllocation = new ReferenceCountedArrowArray(ownedArray); - try { - doImport(snapshot); - } finally { - underlyingAllocation.release(); - } - } - - private void importChild(ArrayImporter parent, ArrowArray src) { - ArrowArray.Snapshot snapshot = src.snapshot(); - checkState(snapshot.release != NULL, "Cannot import released ArrowArray"); - recursionLevel = parent.recursionLevel + 1; - checkState( - recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, - "Recursion level in ArrowArray struct exceeded"); - // Child buffers will keep the entire parent import alive. - underlyingAllocation = parent.underlyingAllocation; - doImport(snapshot); - } - - private void doImport(ArrowArray.Snapshot snapshot) { - // First import children (required for reconstituting parent array data) - long[] children = - NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children)); - if (children != null && children.length > 0) { - List childVectors = vector.getChildrenFromFields(); - checkState( - children.length == childVectors.size(), - "ArrowArray struct has %s children (expected %s)", - children.length, - childVectors.size()); - for (int i = 0; i < children.length; i++) { - checkState(children[i] != NULL, "ArrowArray struct has NULL child at position %s", i); - ArrayImporter childImporter = - new ArrayImporter(allocator, childVectors.get(i), dictionaryProvider); - childImporter.importChild(this, ArrowArray.wrap(children[i])); - } - } - - // Handle import of a dictionary encoded vector - if (snapshot.dictionary != NULL) { - DictionaryEncoding encoding = vector.getField().getDictionary(); - checkNotNull(encoding, "Missing encoding on import of ArrowArray with dictionary"); - - Dictionary dictionary = dictionaryProvider.lookup(encoding.getId()); - checkNotNull(dictionary, "Dictionary lookup failed on import of ArrowArray with dictionary"); - - // reset the dictionary vector to the initial state - dictionary.getVector().clear(); - - ArrayImporter dictionaryImporter = - new ArrayImporter(allocator, dictionary.getVector(), dictionaryProvider); - dictionaryImporter.importChild(this, ArrowArray.wrap(snapshot.dictionary)); - } - - // Import main data - ArrowFieldNode fieldNode = new ArrowFieldNode(snapshot.length, snapshot.null_count); - long[] bufferPointers = - NativeUtil.toJavaArray(snapshot.buffers, checkedCastToInt(snapshot.n_buffers)); - - try (final BufferImportTypeVisitor visitor = - new BufferImportTypeVisitor(allocator, underlyingAllocation, fieldNode, bufferPointers)) { - final List buffers; - if (bufferPointers == null || bufferPointers.length == 0) { - buffers = Collections.emptyList(); - } else { - buffers = vector.getField().getType().accept(visitor); - } - vector.loadFieldBuffers(fieldNode, buffers); - } catch (Exception e) { - throw new IllegalArgumentException( - "Could not load buffers for field " - + vector.getField() - + ". error message: " - + e.getMessage(), - e); - } - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrayStreamExporter.java b/java/c/src/main/java/org/apache/arrow/c/ArrayStreamExporter.java deleted file mode 100644 index cb2508c83d220..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/ArrayStreamExporter.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.nio.charset.StandardCharsets; -import org.apache.arrow.c.jni.JniWrapper; -import org.apache.arrow.c.jni.PrivateData; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Utility to export an {@link ArrowReader} as an ArrowArrayStream. */ -final class ArrayStreamExporter { - private final BufferAllocator allocator; - - ArrayStreamExporter(BufferAllocator allocator) { - this.allocator = allocator; - } - - /** Java-side state for the exported stream. */ - static class ExportedArrayStreamPrivateData implements PrivateData { - final BufferAllocator allocator; - final ArrowReader reader; - // Read by the JNI side for get_last_error - byte[] lastError; - - ExportedArrayStreamPrivateData(BufferAllocator allocator, ArrowReader reader) { - this.allocator = allocator; - this.reader = reader; - } - - private int setLastError(Throwable err) { - // Do not let exceptions propagate up to JNI - try { - StringWriter buf = new StringWriter(); - PrintWriter writer = new PrintWriter(buf); - err.printStackTrace(writer); - lastError = buf.toString().getBytes(StandardCharsets.UTF_8); - } catch (Throwable e) { - // Bail out of setting the error message - we'll still return an error code - lastError = null; - } - return 5; // = EIO - } - - @SuppressWarnings("unused") // Used by JNI - int getNext(long arrayAddress) { - try (ArrowArray out = ArrowArray.wrap(arrayAddress)) { - if (reader.loadNextBatch()) { - Data.exportVectorSchemaRoot(allocator, reader.getVectorSchemaRoot(), reader, out); - } else { - out.markReleased(); - } - return 0; - } catch (Throwable e) { - return setLastError(e); - } - } - - @SuppressWarnings("unused") // Used by JNI - int getSchema(long schemaAddress) { - try (ArrowSchema out = ArrowSchema.wrap(schemaAddress)) { - final Schema schema = reader.getVectorSchemaRoot().getSchema(); - Data.exportSchema(allocator, schema, reader, out); - return 0; - } catch (Throwable e) { - return setLastError(e); - } - } - - @Override - public void close() { - try { - reader.close(); - } catch (IOException e) { - // XXX: C Data Interface gives us no way to signal errors to the caller, - // but the JNI side will catch this and log an error. - throw new RuntimeException(e); - } - } - } - - void export(ArrowArrayStream stream, ArrowReader reader) { - ExportedArrayStreamPrivateData data = new ExportedArrayStreamPrivateData(allocator, reader); - try { - JniWrapper.get().exportArrayStream(stream.memoryAddress(), data); - } catch (Exception e) { - data.close(); - throw e; - } - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java b/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java deleted file mode 100644 index 3667d13856d04..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/ArrowArray.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.util.Preconditions.checkNotNull; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import org.apache.arrow.c.jni.JniWrapper; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.util.VisibleForTesting; - -/** - * C Data Interface ArrowArray. - * - *

    Represents a wrapper for the following C structure: - * - *

    - * struct ArrowArray {
    - *     // Array data description
    - *     int64_t length;
    - *     int64_t null_count;
    - *     int64_t offset;
    - *     int64_t n_buffers;
    - *     int64_t n_children;
    - *     const void** buffers;
    - *     struct ArrowArray** children;
    - *     struct ArrowArray* dictionary;
    - *
    - *     // Release callback
    - *     void (*release)(struct ArrowArray*);
    - *     // Opaque producer-specific data
    - *     void* private_data;
    - * };
    - * 
    - */ -public class ArrowArray implements BaseStruct { - private static final int SIZE_OF = 80; - private static final int INDEX_RELEASE_CALLBACK = 64; - - private ArrowBuf data; - - /** Snapshot of the ArrowArray raw data. */ - public static class Snapshot { - public long length; - public long null_count; - public long offset; - public long n_buffers; - public long n_children; - public long buffers; - public long children; - public long dictionary; - public long release; - public long private_data; - - /** Initialize empty ArrowArray snapshot. */ - public Snapshot() { - length = NULL; - null_count = NULL; - offset = NULL; - n_buffers = NULL; - n_children = NULL; - buffers = NULL; - children = NULL; - dictionary = NULL; - release = NULL; - private_data = NULL; - } - } - - /** - * Create ArrowArray from an existing memory address. - * - *

    The resulting ArrowArray does not own the memory. - * - * @param memoryAddress Memory address to wrap - * @return A new ArrowArray instance - */ - public static ArrowArray wrap(long memoryAddress) { - return new ArrowArray( - new ArrowBuf(ReferenceManager.NO_OP, null, ArrowArray.SIZE_OF, memoryAddress)); - } - - /** - * Create ArrowArray by allocating memory. - * - *

    The resulting ArrowArray owns the memory. - * - * @param allocator Allocator for memory allocations - * @return A new ArrowArray instance - */ - public static ArrowArray allocateNew(BufferAllocator allocator) { - ArrowArray array = new ArrowArray(allocator.buffer(ArrowArray.SIZE_OF)); - array.markReleased(); - return array; - } - - ArrowArray(ArrowBuf data) { - checkNotNull(data, "ArrowArray initialized with a null buffer"); - this.data = data; - } - - /** Mark the array as released. */ - public void markReleased() { - directBuffer().putLong(INDEX_RELEASE_CALLBACK, NULL); - } - - @Override - public long memoryAddress() { - checkNotNull(data, "ArrowArray is already closed"); - return data.memoryAddress(); - } - - @Override - public void release() { - long address = memoryAddress(); - JniWrapper.get().releaseArray(address); - } - - @Override - public void close() { - if (data != null) { - data.close(); - data = null; - } - } - - @VisibleForTesting - boolean isClosed() { - return data == null; - } - - private ByteBuffer directBuffer() { - return MemoryUtil.directBuffer(memoryAddress(), ArrowArray.SIZE_OF) - .order(ByteOrder.nativeOrder()); - } - - /** - * Take a snapshot of the ArrowArray raw values. - * - * @return snapshot - */ - public Snapshot snapshot() { - ByteBuffer data = directBuffer(); - Snapshot snapshot = new Snapshot(); - snapshot.length = data.getLong(); - snapshot.null_count = data.getLong(); - snapshot.offset = data.getLong(); - snapshot.n_buffers = data.getLong(); - snapshot.n_children = data.getLong(); - snapshot.buffers = data.getLong(); - snapshot.children = data.getLong(); - snapshot.dictionary = data.getLong(); - snapshot.release = data.getLong(); - snapshot.private_data = data.getLong(); - return snapshot; - } - - /** Write values from Snapshot to the underlying ArrowArray memory buffer. */ - public void save(Snapshot snapshot) { - directBuffer() - .putLong(snapshot.length) - .putLong(snapshot.null_count) - .putLong(snapshot.offset) - .putLong(snapshot.n_buffers) - .putLong(snapshot.n_children) - .putLong(snapshot.buffers) - .putLong(snapshot.children) - .putLong(snapshot.dictionary) - .putLong(snapshot.release) - .putLong(snapshot.private_data); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrowArrayStream.java b/java/c/src/main/java/org/apache/arrow/c/ArrowArrayStream.java deleted file mode 100644 index 45dc70084b21b..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/ArrowArrayStream.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.util.Preconditions.checkNotNull; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import org.apache.arrow.c.jni.CDataJniException; -import org.apache.arrow.c.jni.JniWrapper; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.util.MemoryUtil; - -/** - * C Stream Interface ArrowArrayStream. - * - *

    Represents a wrapper for the following C structure: - * - *

    - * struct ArrowArrayStream {
    - *   int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
    - *   int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
    - *   const char* (*get_last_error)(struct ArrowArrayStream*);
    - *   void (*release)(struct ArrowArrayStream*);
    - *   void* private_data;
    - * };
    - * 
    - */ -public class ArrowArrayStream implements BaseStruct { - private static final int SIZE_OF = 40; - private static final int INDEX_RELEASE_CALLBACK = 24; - - private ArrowBuf data; - - /** Snapshot of the ArrowArrayStream raw data. */ - public static class Snapshot { - public long get_schema; - public long get_next; - public long get_last_error; - public long release; - public long private_data; - - /** Initialize empty ArrowArray snapshot. */ - public Snapshot() { - get_schema = NULL; - get_next = NULL; - get_last_error = NULL; - release = NULL; - private_data = NULL; - } - } - - /** - * Create ArrowArrayStream from an existing memory address. - * - *

    The resulting ArrowArrayStream does not own the memory. - * - * @param memoryAddress Memory address to wrap - * @return A new ArrowArrayStream instance - */ - public static ArrowArrayStream wrap(long memoryAddress) { - return new ArrowArrayStream( - new ArrowBuf(ReferenceManager.NO_OP, null, ArrowArrayStream.SIZE_OF, memoryAddress)); - } - - /** - * Create ArrowArrayStream by allocating memory. - * - *

    The resulting ArrowArrayStream owns the memory. - * - * @param allocator Allocator for memory allocations - * @return A new ArrowArrayStream instance - */ - public static ArrowArrayStream allocateNew(BufferAllocator allocator) { - ArrowArrayStream array = new ArrowArrayStream(allocator.buffer(ArrowArrayStream.SIZE_OF)); - array.markReleased(); - return array; - } - - ArrowArrayStream(ArrowBuf data) { - checkNotNull(data, "ArrowArrayStream initialized with a null buffer"); - this.data = data; - } - - /** Mark the array as released. */ - public void markReleased() { - directBuffer().putLong(INDEX_RELEASE_CALLBACK, NULL); - } - - @Override - public long memoryAddress() { - checkNotNull(data, "ArrowArrayStream is already closed"); - return data.memoryAddress(); - } - - @Override - public void release() { - long address = memoryAddress(); - JniWrapper.get().releaseArrayStream(address); - } - - /** - * Get the schema of the stream. - * - * @param schema The ArrowSchema struct to output to - * @throws IOException if the stream returns an error - */ - public void getSchema(ArrowSchema schema) throws IOException { - long address = memoryAddress(); - try { - JniWrapper.get().getSchemaArrayStream(address, schema.memoryAddress()); - } catch (CDataJniException e) { - throw new IOException("[errno " + e.getErrno() + "] " + e.getMessage()); - } - } - - /** - * Get the next batch in the stream. - * - * @param array The ArrowArray struct to output to - * @throws IOException if the stream returns an error - */ - public void getNext(ArrowArray array) throws IOException { - long address = memoryAddress(); - try { - JniWrapper.get().getNextArrayStream(address, array.memoryAddress()); - } catch (CDataJniException e) { - throw new IOException("[errno " + e.getErrno() + "] " + e.getMessage()); - } - } - - @Override - public void close() { - if (data != null) { - data.close(); - data = null; - } - } - - private ByteBuffer directBuffer() { - return MemoryUtil.directBuffer(memoryAddress(), ArrowArrayStream.SIZE_OF) - .order(ByteOrder.nativeOrder()); - } - - /** - * Take a snapshot of the ArrowArrayStream raw values. - * - * @return snapshot - */ - public ArrowArrayStream.Snapshot snapshot() { - ByteBuffer data = directBuffer(); - ArrowArrayStream.Snapshot snapshot = new ArrowArrayStream.Snapshot(); - snapshot.get_schema = data.getLong(); - snapshot.get_next = data.getLong(); - snapshot.get_last_error = data.getLong(); - snapshot.release = data.getLong(); - snapshot.private_data = data.getLong(); - return snapshot; - } - - /** Write values from Snapshot to the underlying ArrowArrayStream memory buffer. */ - public void save(ArrowArrayStream.Snapshot snapshot) { - directBuffer() - .putLong(snapshot.get_schema) - .putLong(snapshot.get_next) - .putLong(snapshot.get_last_error) - .putLong(snapshot.release) - .putLong(snapshot.private_data); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrowArrayStreamReader.java b/java/c/src/main/java/org/apache/arrow/c/ArrowArrayStreamReader.java deleted file mode 100644 index 07a88cd8d7dab..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/ArrowArrayStreamReader.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.util.Preconditions.checkState; - -import java.io.IOException; -import java.util.Map; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.Schema; - -/** An implementation of an {@link ArrowReader} backed by an ArrowArrayStream. */ -final class ArrowArrayStreamReader extends ArrowReader { - private final ArrowArrayStream ownedStream; - private final CDataDictionaryProvider provider; - - ArrowArrayStreamReader(BufferAllocator allocator, ArrowArrayStream stream) { - super(allocator); - this.provider = new CDataDictionaryProvider(); - - ArrowArrayStream.Snapshot snapshot = stream.snapshot(); - checkState(snapshot.release != NULL, "Cannot import released ArrowArrayStream"); - - // Move imported stream - this.ownedStream = ArrowArrayStream.allocateNew(allocator); - this.ownedStream.save(snapshot); - stream.markReleased(); - stream.close(); - } - - @Override - public Map getDictionaryVectors() { - return provider.getDictionaryIds().stream() - .collect(Collectors.toMap(Function.identity(), provider::lookup)); - } - - @Override - public Dictionary lookup(long id) { - return provider.lookup(id); - } - - @Override - public boolean loadNextBatch() throws IOException { - try (ArrowArray array = ArrowArray.allocateNew(allocator)) { - ownedStream.getNext(array); - if (array.snapshot().release == NULL) { - return false; - } - Data.importIntoVectorSchemaRoot(allocator, array, getVectorSchemaRoot(), provider); - return true; - } - } - - @Override - public long bytesRead() { - return 0; - } - - @Override - protected void closeReadSource() { - ownedStream.release(); - ownedStream.close(); - provider.close(); - } - - @Override - protected Schema readSchema() throws IOException { - try (ArrowSchema schema = ArrowSchema.allocateNew(allocator)) { - ownedStream.getSchema(schema); - return Data.importSchema(allocator, schema, provider); - } - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java b/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java deleted file mode 100644 index ad9f16ae9ceed..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.util.Preconditions.checkNotNull; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import org.apache.arrow.c.jni.JniWrapper; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.util.MemoryUtil; - -/** - * C Data Interface ArrowSchema. - * - *

    Represents a wrapper for the following C structure: - * - *

    - * struct ArrowSchema {
    - *     // Array type description
    - *     const char* format;
    - *     const char* name;
    - *     const char* metadata;
    - *     int64_t flags;
    - *     int64_t n_children;
    - *     struct ArrowSchema** children;
    - *     struct ArrowSchema* dictionary;
    - *
    - *     // Release callback
    - *     void (*release)(struct ArrowSchema*);
    - *     // Opaque producer-specific data
    - *     void* private_data;
    - * };
    - * 
    - */ -public class ArrowSchema implements BaseStruct { - private static final int SIZE_OF = 72; - private static final int INDEX_RELEASE_CALLBACK = 56; - - private ArrowBuf data; - - /** Snapshot of the ArrowSchema raw data. */ - public static class Snapshot { - public long format; - public long name; - public long metadata; - public long flags; - public long n_children; - public long children; - public long dictionary; - public long release; - public long private_data; - - /** Initialize empty ArrowSchema snapshot. */ - public Snapshot() { - format = NULL; - name = NULL; - metadata = NULL; - flags = NULL; - n_children = NULL; - children = NULL; - dictionary = NULL; - release = NULL; - private_data = NULL; - } - } - - /** - * Create ArrowSchema from an existing memory address. - * - *

    The resulting ArrowSchema does not own the memory. - * - * @param memoryAddress Memory address to wrap - * @return A new ArrowSchema instance - */ - public static ArrowSchema wrap(long memoryAddress) { - return new ArrowSchema( - new ArrowBuf(ReferenceManager.NO_OP, null, ArrowSchema.SIZE_OF, memoryAddress)); - } - - /** - * Create ArrowSchema by allocating memory. - * - *

    The resulting ArrowSchema owns the memory. - * - * @param allocator Allocator for memory allocations - * @return A new ArrowSchema instance - */ - public static ArrowSchema allocateNew(BufferAllocator allocator) { - ArrowSchema schema = new ArrowSchema(allocator.buffer(ArrowSchema.SIZE_OF)); - schema.markReleased(); - return schema; - } - - ArrowSchema(ArrowBuf data) { - checkNotNull(data, "ArrowSchema initialized with a null buffer"); - this.data = data; - } - - /** Mark the schema as released. */ - public void markReleased() { - directBuffer().putLong(INDEX_RELEASE_CALLBACK, NULL); - } - - @Override - public long memoryAddress() { - checkNotNull(data, "ArrowSchema is already closed"); - return data.memoryAddress(); - } - - @Override - public void release() { - long address = memoryAddress(); - JniWrapper.get().releaseSchema(address); - } - - @Override - public void close() { - if (data != null) { - data.close(); - data = null; - } - } - - private ByteBuffer directBuffer() { - return MemoryUtil.directBuffer(memoryAddress(), ArrowSchema.SIZE_OF) - .order(ByteOrder.nativeOrder()); - } - - /** - * Take a snapshot of the ArrowSchema raw values. - * - * @return snapshot - */ - public Snapshot snapshot() { - ByteBuffer data = directBuffer(); - Snapshot snapshot = new Snapshot(); - snapshot.format = data.getLong(); - snapshot.name = data.getLong(); - snapshot.metadata = data.getLong(); - snapshot.flags = data.getLong(); - snapshot.n_children = data.getLong(); - snapshot.children = data.getLong(); - snapshot.dictionary = data.getLong(); - snapshot.release = data.getLong(); - snapshot.private_data = data.getLong(); - return snapshot; - } - - /** Write values from Snapshot to the underlying ArrowSchema memory buffer. */ - public void save(Snapshot snapshot) { - directBuffer() - .putLong(snapshot.format) - .putLong(snapshot.name) - .putLong(snapshot.metadata) - .putLong(snapshot.flags) - .putLong(snapshot.n_children) - .putLong(snapshot.children) - .putLong(snapshot.dictionary) - .putLong(snapshot.release) - .putLong(snapshot.private_data); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java b/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java deleted file mode 100644 index d64d2a8506911..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/BaseStruct.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -/** Base interface for C Data Interface structures. */ -public interface BaseStruct extends AutoCloseable { - /** - * Get memory address. - * - * @return Memory address - */ - long memoryAddress(); - - /** - * Call the release callback of an ArrowArray. - * - *

    This function must not be called for child arrays. - */ - void release(); - - /** Close to release the main buffer. */ - @Override - void close(); -} diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java deleted file mode 100644 index 2661c12cda3af..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java +++ /dev/null @@ -1,423 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.util.Preconditions.checkState; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.VisibleForTesting; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; - -/** Import buffers from a C Data Interface struct. */ -class BufferImportTypeVisitor implements ArrowType.ArrowTypeVisitor>, AutoCloseable { - private final BufferAllocator allocator; - private final ReferenceCountedArrowArray underlyingAllocation; - private final ArrowFieldNode fieldNode; - private final long[] buffers; - private final List imported; - - BufferImportTypeVisitor( - BufferAllocator allocator, - ReferenceCountedArrowArray underlyingAllocation, - ArrowFieldNode fieldNode, - long[] buffers) { - this.allocator = allocator; - this.underlyingAllocation = underlyingAllocation; - this.fieldNode = fieldNode; - this.buffers = buffers; - this.imported = new ArrayList<>(); - } - - @Override - public void close() throws Exception { - AutoCloseables.close(imported); - } - - @VisibleForTesting - ArrowBuf importBuffer(ArrowType type, int index, long capacity) { - checkState( - buffers.length > index, - "Expected at least %s buffers for type %s, but found %s", - index + 1, - type, - buffers.length); - long bufferPtr = buffers[index]; - - if (bufferPtr == NULL) { - // C array may be NULL but only accept that if expected capacity is zero too - if (capacity != 0) { - throw new IllegalStateException( - String.format("Buffer %s for type %s cannot be null", index, type)); - } else { - // no data in the C array, return an empty buffer - return allocator.getEmpty(); - } - } - - ArrowBuf buf = underlyingAllocation.unsafeAssociateAllocation(allocator, capacity, bufferPtr); - imported.add(buf); - return buf; - } - - private ArrowBuf importFixedBits(ArrowType type, int index, long bitsPerSlot) { - final long capacity = DataSizeRoundingUtil.divideBy8Ceil(bitsPerSlot * fieldNode.getLength()); - return importBuffer(type, index, capacity); - } - - private ArrowBuf importFixedBytes(ArrowType type, int index, long bytesPerSlot) { - final long capacity = bytesPerSlot * fieldNode.getLength(); - return importBuffer(type, index, capacity); - } - - private ArrowBuf importOffsets(ArrowType type, long bytesPerSlot) { - final long capacity = bytesPerSlot * (fieldNode.getLength() + 1); - return importBuffer(type, 1, capacity); - } - - private ArrowBuf importData(ArrowType type, long capacity) { - return importBuffer(type, 2, capacity); - } - - private ArrowBuf maybeImportBitmap(ArrowType type) { - checkState( - buffers.length > 0, - "Expected at least %s buffers for type %s, but found %s", - 1, - type, - buffers.length); - if (buffers[0] == NULL) { - return null; - } - return importFixedBits(type, 0, /*bitsPerSlot=*/ 1); - } - - @Override - public List visit(ArrowType.Null type) { - checkState( - buffers.length == 0, - "Expected %s buffers for type %s, but found %s", - 0, - type, - buffers.length); - return Collections.emptyList(); - } - - @Override - public List visit(ArrowType.Struct type) { - return Collections.singletonList(maybeImportBitmap(type)); - } - - @Override - public List visit(ArrowType.List type) { - return Arrays.asList(maybeImportBitmap(type), importOffsets(type, ListVector.OFFSET_WIDTH)); - } - - @Override - public List visit(ArrowType.LargeList type) { - return Arrays.asList( - maybeImportBitmap(type), importOffsets(type, LargeListVector.OFFSET_WIDTH)); - } - - @Override - public List visit(ArrowType.FixedSizeList type) { - return Collections.singletonList(maybeImportBitmap(type)); - } - - @Override - public List visit(ArrowType.Union type) { - switch (type.getMode()) { - case Sparse: - return Collections.singletonList(importFixedBytes(type, 0, UnionVector.TYPE_WIDTH)); - case Dense: - return Arrays.asList( - importFixedBytes(type, 0, DenseUnionVector.TYPE_WIDTH), - importFixedBytes(type, 1, DenseUnionVector.OFFSET_WIDTH)); - default: - throw new UnsupportedOperationException("Importing buffers for union type: " + type); - } - } - - @Override - public List visit(ArrowType.RunEndEncoded type) { - return List.of(); - } - - @Override - public List visit(ArrowType.Map type) { - return Arrays.asList(maybeImportBitmap(type), importOffsets(type, MapVector.OFFSET_WIDTH)); - } - - @Override - public List visit(ArrowType.Int type) { - return Arrays.asList(maybeImportBitmap(type), importFixedBits(type, 1, type.getBitWidth())); - } - - @Override - public List visit(ArrowType.FloatingPoint type) { - switch (type.getPrecision()) { - case HALF: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, /*bytesPerSlot=*/ 2)); - case SINGLE: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, Float4Vector.TYPE_WIDTH)); - case DOUBLE: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, Float8Vector.TYPE_WIDTH)); - default: - throw new UnsupportedOperationException("Importing buffers for type: " + type); - } - } - - @Override - public List visit(ArrowType.Utf8 type) { - try (ArrowBuf offsets = importOffsets(type, VarCharVector.OFFSET_WIDTH)) { - final int start = offsets.getInt(0); - final int end = offsets.getInt(fieldNode.getLength() * (long) VarCharVector.OFFSET_WIDTH); - checkState( - end >= start, - "Offset buffer for type %s is malformed: start: %s, end: %s", - type, - start, - end); - final int len = end - start; - offsets.getReferenceManager().retain(); - return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); - } - } - - private List visitVariableWidthView(ArrowType type) { - final int viewBufferIndex = 1; - final int variadicSizeBufferIndex = this.buffers.length - 1; - final long numOfVariadicBuffers = this.buffers.length - 3L; - final long variadicSizeBufferCapacity = numOfVariadicBuffers * Long.BYTES; - List buffers = new ArrayList<>(); - - ArrowBuf variadicSizeBuffer = - importBuffer(type, variadicSizeBufferIndex, variadicSizeBufferCapacity); - - ArrowBuf view = - importFixedBytes(type, viewBufferIndex, BaseVariableWidthViewVector.ELEMENT_SIZE); - buffers.add(maybeImportBitmap(type)); - buffers.add(view); - - // 0th buffer is validity buffer - // 1st buffer is view buffer - // 2nd buffer onwards are variadic buffer - // N-1 (this.buffers.length - 1) buffer is variadic size buffer - final int variadicBufferReadOffset = 2; - for (int i = 0; i < numOfVariadicBuffers; i++) { - long size = variadicSizeBuffer.getLong((long) i * Long.BYTES); - buffers.add(importBuffer(type, i + variadicBufferReadOffset, size)); - } - - return buffers; - } - - @Override - public List visit(ArrowType.Utf8View type) { - return visitVariableWidthView(type); - } - - @Override - public List visit(ArrowType.LargeUtf8 type) { - try (ArrowBuf offsets = importOffsets(type, LargeVarCharVector.OFFSET_WIDTH)) { - final long start = offsets.getLong(0); - final long end = - offsets.getLong(fieldNode.getLength() * (long) LargeVarCharVector.OFFSET_WIDTH); - checkState( - end >= start, - "Offset buffer for type %s is malformed: start: %s, end: %s", - type, - start, - end); - final long len = end - start; - offsets.getReferenceManager().retain(); - return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); - } - } - - @Override - public List visit(ArrowType.Binary type) { - try (ArrowBuf offsets = importOffsets(type, VarBinaryVector.OFFSET_WIDTH)) { - final int start = offsets.getInt(0); - final int end = offsets.getInt(fieldNode.getLength() * (long) VarBinaryVector.OFFSET_WIDTH); - checkState( - end >= start, - "Offset buffer for type %s is malformed: start: %s, end: %s", - type, - start, - end); - final int len = end - start; - offsets.getReferenceManager().retain(); - return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); - } - } - - @Override - public List visit(ArrowType.BinaryView type) { - return visitVariableWidthView(type); - } - - @Override - public List visit(ArrowType.LargeBinary type) { - try (ArrowBuf offsets = importOffsets(type, LargeVarBinaryVector.OFFSET_WIDTH)) { - final long start = offsets.getLong(0); - // TODO: need better tests to cover the failure when I forget to multiply by offset width - final long end = - offsets.getLong(fieldNode.getLength() * (long) LargeVarBinaryVector.OFFSET_WIDTH); - checkState( - end >= start, - "Offset buffer for type %s is malformed: start: %s, end: %s", - type, - start, - end); - final long len = end - start; - offsets.getReferenceManager().retain(); - return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); - } - } - - @Override - public List visit(ArrowType.FixedSizeBinary type) { - return Arrays.asList(maybeImportBitmap(type), importFixedBytes(type, 1, type.getByteWidth())); - } - - @Override - public List visit(ArrowType.Bool type) { - return Arrays.asList(maybeImportBitmap(type), importFixedBits(type, 1, /*bitsPerSlot=*/ 1)); - } - - @Override - public List visit(ArrowType.Decimal type) { - return Arrays.asList(maybeImportBitmap(type), importFixedBits(type, 1, type.getBitWidth())); - } - - @Override - public List visit(ArrowType.Date type) { - switch (type.getUnit()) { - case DAY: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, DateDayVector.TYPE_WIDTH)); - case MILLISECOND: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, DateMilliVector.TYPE_WIDTH)); - default: - throw new UnsupportedOperationException("Importing buffers for type: " + type); - } - } - - @Override - public List visit(ArrowType.Time type) { - switch (type.getUnit()) { - case SECOND: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, TimeSecVector.TYPE_WIDTH)); - case MILLISECOND: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, TimeMilliVector.TYPE_WIDTH)); - case MICROSECOND: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, TimeMicroVector.TYPE_WIDTH)); - case NANOSECOND: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, TimeNanoVector.TYPE_WIDTH)); - default: - throw new UnsupportedOperationException("Importing buffers for type: " + type); - } - } - - @Override - public List visit(ArrowType.Timestamp type) { - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, TimeStampVector.TYPE_WIDTH)); - } - - @Override - public List visit(ArrowType.Interval type) { - switch (type.getUnit()) { - case YEAR_MONTH: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, IntervalYearVector.TYPE_WIDTH)); - case DAY_TIME: - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, IntervalDayVector.TYPE_WIDTH)); - case MONTH_DAY_NANO: - return Arrays.asList( - maybeImportBitmap(type), - importFixedBytes(type, 1, IntervalMonthDayNanoVector.TYPE_WIDTH)); - default: - throw new UnsupportedOperationException("Importing buffers for type: " + type); - } - } - - @Override - public List visit(ArrowType.Duration type) { - return Arrays.asList( - maybeImportBitmap(type), importFixedBytes(type, 1, DurationVector.TYPE_WIDTH)); - } - - @Override - public List visit(ArrowType.ListView type) { - return Arrays.asList( - maybeImportBitmap(type), - importFixedBytes(type, 1, ListViewVector.OFFSET_WIDTH), - importFixedBytes(type, 2, ListViewVector.SIZE_WIDTH)); - } - - @Override - public List visit(ArrowType.LargeListView type) { - return Arrays.asList( - maybeImportBitmap(type), - importFixedBytes(type, 1, LargeListViewVector.OFFSET_WIDTH), - importFixedBytes(type, 2, LargeListViewVector.SIZE_WIDTH)); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java b/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java deleted file mode 100644 index 2b49e6e4b5bc1..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/CDataDictionaryProvider.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; - -/** - * A DictionaryProvider that is used in C Data Interface for imports. - * - *

    CDataDictionaryProvider is similar to {@link DictionaryProvider.MapDictionaryProvider} with a - * key difference that the dictionaries are owned by the provider so it must eventually be closed. - * - *

    The typical usage is to create the CDataDictionaryProvider and pass it to {@link - * Data#importField} or {@link Data#importSchema} to allocate empty dictionaries based on the - * information in {@link ArrowSchema}. Then you can re-use the same dictionary provider in any - * function that imports an {@link ArrowArray} that has the same schema. - */ -public class CDataDictionaryProvider implements DictionaryProvider, AutoCloseable { - - private final Map map; - - public CDataDictionaryProvider() { - this.map = new HashMap<>(); - } - - void put(Dictionary dictionary) { - Dictionary previous = map.put(dictionary.getEncoding().getId(), dictionary); - if (previous != null) { - previous.getVector().close(); - } - } - - @Override - public final Set getDictionaryIds() { - return map.keySet(); - } - - @Override - public Dictionary lookup(long id) { - return map.get(id); - } - - @Override - public void close() { - for (Dictionary dictionary : map.values()) { - dictionary.getVector().close(); - } - map.clear(); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/Data.java b/java/c/src/main/java/org/apache/arrow/c/Data.java deleted file mode 100644 index 0b4da33b4eedc..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/Data.java +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.table.Table; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * Functions for working with the C data interface. - * - *

    This API is EXPERIMENTAL. Note that currently only 64bit systems are supported. - */ -public final class Data { - - private Data() {} - - /** - * Export Java Field using the C data interface format. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param field Field object to export - * @param provider Dictionary provider for dictionary encoded fields (optional) - * @param out C struct where to export the field - */ - public static void exportField( - BufferAllocator allocator, Field field, DictionaryProvider provider, ArrowSchema out) { - SchemaExporter exporter = new SchemaExporter(allocator); - exporter.export(out, field, provider); - } - - /** - * Export Java Schema using the C data interface format. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param schema Schema object to export - * @param provider Dictionary provider for dictionary encoded fields (optional) - * @param out C struct where to export the field - */ - public static void exportSchema( - BufferAllocator allocator, Schema schema, DictionaryProvider provider, ArrowSchema out) { - // Convert to a struct field equivalent to the input schema - FieldType fieldType = - new FieldType(false, new ArrowType.Struct(), null, schema.getCustomMetadata()); - Field field = new Field("", fieldType, schema.getFields()); - exportField(allocator, field, provider, out); - } - - /** - * Export Java FieldVector using the C data interface format. - * - *

    The resulting ArrowArray struct keeps the array data and buffers alive until its release - * callback is called by the consumer. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param vector Vector object to export - * @param provider Dictionary provider for dictionary encoded vectors (optional) - * @param out C struct where to export the array - */ - public static void exportVector( - BufferAllocator allocator, FieldVector vector, DictionaryProvider provider, ArrowArray out) { - exportVector(allocator, vector, provider, out, null); - } - - /** - * Export Java FieldVector using the C data interface format. - * - *

    The resulting ArrowArray struct keeps the array data and buffers alive until its release - * callback is called by the consumer. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param vector Vector object to export - * @param provider Dictionary provider for dictionary encoded vectors (optional) - * @param out C struct where to export the array - * @param outSchema C struct where to export the array type (optional) - */ - public static void exportVector( - BufferAllocator allocator, - FieldVector vector, - DictionaryProvider provider, - ArrowArray out, - ArrowSchema outSchema) { - if (outSchema != null) { - exportField(allocator, vector.getField(), provider, outSchema); - } - - ArrayExporter exporter = new ArrayExporter(allocator); - exporter.export(out, vector, provider); - } - - /** - * Export the current contents of a Java Table using the C data interface format. - * - *

    The table is exported as if it were a struct array. The resulting ArrowArray struct keeps - * the record batch data and buffers alive until its release callback is called by the consumer. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param table Table to export - * @param out C struct where to export the record batch - */ - public static void exportTable(BufferAllocator allocator, Table table, ArrowArray out) { - exportTable(allocator, table, table.getDictionaryProvider(), out, null); - } - - /** - * Export the current contents of a Java Table using the C data interface format. - * - *

    The table is exported as if it were a struct array. The resulting ArrowArray struct keeps - * the record batch data and buffers alive until its release callback is called by the consumer. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param table Table to export - * @param provider Dictionary provider for dictionary encoded vectors (optional) - * @param out C struct where to export the record batch - */ - public static void exportTable( - BufferAllocator allocator, Table table, DictionaryProvider provider, ArrowArray out) { - exportTable(allocator, table, provider, out, null); - } - - /** - * Export the current contents of a Java Table using the C data interface format. - * - *

    The table is exported as if it were a struct array. The resulting ArrowArray struct keeps - * the record batch data and buffers alive until its release callback is called by the consumer. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param table Table to export - * @param provider Dictionary provider for dictionary encoded vectors (optional) - * @param out C struct where to export the record batch - * @param outSchema C struct where to export the record batch schema (optional) - */ - public static void exportTable( - BufferAllocator allocator, - Table table, - DictionaryProvider provider, - ArrowArray out, - ArrowSchema outSchema) { - try (VectorSchemaRoot root = table.toVectorSchemaRoot()) { - exportVectorSchemaRoot(allocator, root, provider, out, outSchema); - } - } - - /** - * Export the current contents of a Java VectorSchemaRoot using the C data interface format. - * - *

    The vector schema root is exported as if it were a struct array. The resulting ArrowArray - * struct keeps the record batch data and buffers alive until its release callback is called by - * the consumer. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param vsr Vector schema root to export - * @param provider Dictionary provider for dictionary encoded vectors (optional) - * @param out C struct where to export the record batch - */ - public static void exportVectorSchemaRoot( - BufferAllocator allocator, - VectorSchemaRoot vsr, - DictionaryProvider provider, - ArrowArray out) { - exportVectorSchemaRoot(allocator, vsr, provider, out, null); - } - - /** - * Export the current contents of a Java VectorSchemaRoot using the C data interface format. - * - *

    The vector schema root is exported as if it were a struct array. The resulting ArrowArray - * struct keeps the record batch data and buffers alive until its release callback is called by - * the consumer. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param vsr Vector schema root to export - * @param provider Dictionary provider for dictionary encoded vectors (optional) - * @param out C struct where to export the record batch - * @param outSchema C struct where to export the record batch schema (optional) - */ - public static void exportVectorSchemaRoot( - BufferAllocator allocator, - VectorSchemaRoot vsr, - DictionaryProvider provider, - ArrowArray out, - ArrowSchema outSchema) { - if (outSchema != null) { - exportSchema(allocator, vsr.getSchema(), provider, outSchema); - } - - VectorUnloader unloader = new VectorUnloader(vsr); - try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) { - StructVectorLoader loader = new StructVectorLoader(vsr.getSchema()); - try (StructVector vector = loader.load(allocator, recordBatch)) { - exportVector(allocator, vector, provider, out); - } - } - } - - /** - * Export a reader as an ArrowArrayStream using the C Stream Interface. - * - * @param allocator Buffer allocator for allocating C data interface fields - * @param reader Reader to export - * @param out C struct to export the stream - */ - public static void exportArrayStream( - BufferAllocator allocator, ArrowReader reader, ArrowArrayStream out) { - new ArrayStreamExporter(allocator).export(out, reader); - } - - /** - * Import Java Field from the C data interface. - * - *

    The given ArrowSchema struct is released (as per the C data interface specification), even - * if this function fails. - * - * @param allocator Buffer allocator for allocating dictionary vectors - * @param schema C data interface struct representing the field [inout] - * @param provider A dictionary provider will be initialized with empty dictionary vectors - * (optional) - * @return Imported field object - */ - public static Field importField( - BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) { - try { - SchemaImporter importer = new SchemaImporter(allocator); - return importer.importField(schema, provider); - } finally { - schema.release(); - schema.close(); - } - } - - /** - * Import Java Schema from the C data interface. - * - *

    The given ArrowSchema struct is released (as per the C data interface specification), even - * if this function fails. - * - * @param allocator Buffer allocator for allocating dictionary vectors - * @param schema C data interface struct representing the field - * @param provider A dictionary provider will be initialized with empty dictionary vectors - * (optional) - * @return Imported schema object - */ - public static Schema importSchema( - BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) { - Field structField = importField(allocator, schema, provider); - if (structField.getType().getTypeID() != ArrowTypeID.Struct) { - throw new IllegalArgumentException( - "Cannot import schema: ArrowSchema describes non-struct type"); - } - return new Schema(structField.getChildren(), structField.getMetadata()); - } - - /** - * Import Java vector from the C data interface. - * - *

    The ArrowArray struct has its contents moved (as per the C data interface specification) to - * a private object held alive by the resulting array. - * - * @param allocator Buffer allocator - * @param array C data interface struct holding the array data - * @param vector Imported vector object [out] - * @param provider Dictionary provider to load dictionary vectors to (optional) - */ - public static void importIntoVector( - BufferAllocator allocator, - ArrowArray array, - FieldVector vector, - DictionaryProvider provider) { - ArrayImporter importer = new ArrayImporter(allocator, vector, provider); - importer.importArray(array); - } - - /** - * Import Java vector and its type from the C data interface. - * - *

    The ArrowArray struct has its contents moved (as per the C data interface specification) to - * a private object held alive by the resulting vector. The ArrowSchema struct is released, even - * if this function fails. - * - * @param allocator Buffer allocator for allocating the output FieldVector - * @param array C data interface struct holding the array data - * @param schema C data interface struct holding the array type - * @param provider Dictionary provider to load dictionary vectors to (optional) - * @return Imported vector object - */ - public static FieldVector importVector( - BufferAllocator allocator, - ArrowArray array, - ArrowSchema schema, - CDataDictionaryProvider provider) { - Field field = importField(allocator, schema, provider); - FieldVector vector = field.createVector(allocator); - importIntoVector(allocator, array, vector, provider); - return vector; - } - - /** - * Import record batch from the C data interface into vector schema root. - * - *

    The ArrowArray struct has its contents moved (as per the C data interface specification) to - * a private object held alive by the resulting vector schema root. - * - *

    The schema of the vector schema root must match the input array (undefined behavior - * otherwise). - * - * @param allocator Buffer allocator - * @param array C data interface struct holding the record batch data - * @param root vector schema root to load into - * @param provider Dictionary provider to load dictionary vectors to (optional) - */ - public static void importIntoVectorSchemaRoot( - BufferAllocator allocator, - ArrowArray array, - VectorSchemaRoot root, - DictionaryProvider provider) { - try (StructVector structVector = StructVector.emptyWithDuplicates("", allocator)) { - structVector.initializeChildrenFromFields(root.getSchema().getFields()); - importIntoVector(allocator, array, structVector, provider); - StructVectorUnloader unloader = new StructVectorUnloader(structVector); - VectorLoader loader = new VectorLoader(root); - try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) { - loader.load(recordBatch); - } - } - } - - /** - * Import Java vector schema root from a C data interface Schema. - * - *

    The type represented by the ArrowSchema struct must be a struct type array. - * - *

    The ArrowSchema struct is released, even if this function fails. - * - * @param allocator Buffer allocator for allocating the output VectorSchemaRoot - * @param schema C data interface struct holding the record batch schema - * @param provider Dictionary provider to load dictionary vectors to (optional) - * @return Imported vector schema root - */ - public static VectorSchemaRoot importVectorSchemaRoot( - BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) { - return importVectorSchemaRoot(allocator, null, schema, provider); - } - - /** - * Import Java vector schema root from the C data interface. - * - *

    The type represented by the ArrowSchema struct must be a struct type array. - * - *

    The ArrowArray struct has its contents moved (as per the C data interface specification) to - * a private object held alive by the resulting record batch. The ArrowSchema struct is released, - * even if this function fails. - * - *

    Prefer {@link #importIntoVectorSchemaRoot} for loading array data while reusing the same - * vector schema root. - * - * @param allocator Buffer allocator for allocating the output VectorSchemaRoot - * @param array C data interface struct holding the record batch data (optional) - * @param schema C data interface struct holding the record batch schema - * @param provider Dictionary provider to load dictionary vectors to (optional) - * @return Imported vector schema root - */ - public static VectorSchemaRoot importVectorSchemaRoot( - BufferAllocator allocator, - ArrowArray array, - ArrowSchema schema, - CDataDictionaryProvider provider) { - VectorSchemaRoot vsr = - VectorSchemaRoot.create(importSchema(allocator, schema, provider), allocator); - if (array != null) { - importIntoVectorSchemaRoot(allocator, array, vsr, provider); - } - return vsr; - } - - /** - * Import an ArrowArrayStream as an {@link ArrowReader}. - * - * @param allocator Buffer allocator for allocating the output data. - * @param stream C stream interface struct to import. - * @return Imported reader - */ - public static ArrowReader importArrayStream(BufferAllocator allocator, ArrowArrayStream stream) { - return new ArrowArrayStreamReader(allocator, stream); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/Flags.java b/java/c/src/main/java/org/apache/arrow/c/Flags.java deleted file mode 100644 index dd38afb261a78..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/Flags.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; -import org.apache.arrow.vector.types.pojo.Field; - -/** Flags as defined in the C data interface specification. */ -final class Flags { - static final int ARROW_FLAG_DICTIONARY_ORDERED = 1; - static final int ARROW_FLAG_NULLABLE = 2; - static final int ARROW_FLAG_MAP_KEYS_SORTED = 4; - - private Flags() {} - - static long forField(Field field) { - long flags = 0L; - if (field.isNullable()) { - flags |= ARROW_FLAG_NULLABLE; - } - if (field.getDictionary() != null && field.getDictionary().isOrdered()) { - flags |= ARROW_FLAG_DICTIONARY_ORDERED; - } - if (field.getType().getTypeID() == ArrowTypeID.Map) { - ArrowType.Map map = (ArrowType.Map) field.getType(); - if (map.getKeysSorted()) { - flags |= ARROW_FLAG_MAP_KEYS_SORTED; - } - } - return flags; - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/Format.java b/java/c/src/main/java/org/apache/arrow/c/Format.java deleted file mode 100644 index 7ce99614d2a7a..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/Format.java +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import java.util.Arrays; -import java.util.Locale; -import java.util.stream.Collectors; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; - -/** - * Conversion between {@link ArrowType} and string formats, as per C data interface specification. - */ -final class Format { - - private Format() {} - - static String asString(ArrowType arrowType) { - if (arrowType instanceof ExtensionType) { - ArrowType innerType = ((ExtensionType) arrowType).storageType(); - return asString(innerType); - } - - switch (arrowType.getTypeID()) { - case Binary: - return "z"; - case Bool: - return "b"; - case Date: - { - ArrowType.Date type = (ArrowType.Date) arrowType; - switch (type.getUnit()) { - case DAY: - return "tdD"; - case MILLISECOND: - return "tdm"; - default: - throw new UnsupportedOperationException( - String.format("Date type with unit %s is unsupported", type.getUnit())); - } - } - case Decimal: - { - ArrowType.Decimal type = (ArrowType.Decimal) arrowType; - if (type.getBitWidth() == 128) { - return String.format("d:%d,%d", type.getPrecision(), type.getScale()); - } - return String.format( - "d:%d,%d,%d", type.getPrecision(), type.getScale(), type.getBitWidth()); - } - case Duration: - { - ArrowType.Duration type = (ArrowType.Duration) arrowType; - switch (type.getUnit()) { - case SECOND: - return "tDs"; - case MILLISECOND: - return "tDm"; - case MICROSECOND: - return "tDu"; - case NANOSECOND: - return "tDn"; - default: - throw new UnsupportedOperationException( - String.format("Duration type with unit %s is unsupported", type.getUnit())); - } - } - case FixedSizeBinary: - { - ArrowType.FixedSizeBinary type = (ArrowType.FixedSizeBinary) arrowType; - return String.format("w:%d", type.getByteWidth()); - } - case FixedSizeList: - { - ArrowType.FixedSizeList type = (ArrowType.FixedSizeList) arrowType; - return String.format("+w:%d", type.getListSize()); - } - case FloatingPoint: - { - ArrowType.FloatingPoint type = (ArrowType.FloatingPoint) arrowType; - switch (type.getPrecision()) { - case HALF: - return "e"; - case SINGLE: - return "f"; - case DOUBLE: - return "g"; - default: - throw new UnsupportedOperationException( - String.format( - "FloatingPoint type with precision %s is unsupported", type.getPrecision())); - } - } - case Int: - { - String format; - ArrowType.Int type = (ArrowType.Int) arrowType; - switch (type.getBitWidth()) { - case Byte.SIZE: - format = "C"; - break; - case Short.SIZE: - format = "S"; - break; - case Integer.SIZE: - format = "I"; - break; - case Long.SIZE: - format = "L"; - break; - default: - throw new UnsupportedOperationException( - String.format("Int type with bitwidth %d is unsupported", type.getBitWidth())); - } - if (type.getIsSigned()) { - format = format.toLowerCase(Locale.ROOT); - } - return format; - } - case Interval: - { - ArrowType.Interval type = (ArrowType.Interval) arrowType; - switch (type.getUnit()) { - case DAY_TIME: - return "tiD"; - case YEAR_MONTH: - return "tiM"; - case MONTH_DAY_NANO: - return "tin"; - default: - throw new UnsupportedOperationException( - String.format("Interval type with unit %s is unsupported", type.getUnit())); - } - } - case LargeBinary: - return "Z"; - case LargeList: - return "+L"; - case LargeUtf8: - return "U"; - case List: - return "+l"; - case Map: - return "+m"; - case Null: - return "n"; - case Struct: - return "+s"; - case Time: - { - ArrowType.Time type = (ArrowType.Time) arrowType; - if (type.getUnit() == TimeUnit.SECOND && type.getBitWidth() == 32) { - return "tts"; - } else if (type.getUnit() == TimeUnit.MILLISECOND && type.getBitWidth() == 32) { - return "ttm"; - } else if (type.getUnit() == TimeUnit.MICROSECOND && type.getBitWidth() == 64) { - return "ttu"; - } else if (type.getUnit() == TimeUnit.NANOSECOND && type.getBitWidth() == 64) { - return "ttn"; - } else { - throw new UnsupportedOperationException( - String.format( - "Time type with unit %s and bitwidth %d is unsupported", - type.getUnit(), type.getBitWidth())); - } - } - case Timestamp: - { - String format; - ArrowType.Timestamp type = (ArrowType.Timestamp) arrowType; - switch (type.getUnit()) { - case SECOND: - format = "tss"; - break; - case MILLISECOND: - format = "tsm"; - break; - case MICROSECOND: - format = "tsu"; - break; - case NANOSECOND: - format = "tsn"; - break; - default: - throw new UnsupportedOperationException( - String.format("Timestamp type with unit %s is unsupported", type.getUnit())); - } - String timezone = type.getTimezone(); - return String.format("%s:%s", format, timezone == null ? "" : timezone); - } - case Union: - ArrowType.Union type = (ArrowType.Union) arrowType; - String typeIDs = - Arrays.stream(type.getTypeIds()) - .mapToObj(String::valueOf) - .collect(Collectors.joining(",")); - switch (type.getMode()) { - case Dense: - return String.format("+ud:%s", typeIDs); - case Sparse: - return String.format("+us:%s", typeIDs); - default: - throw new UnsupportedOperationException( - String.format("Union type with mode %s is unsupported", type.getMode())); - } - case Utf8: - return "u"; - case Utf8View: - return "vu"; - case BinaryView: - return "vz"; - case ListView: - return "+vl"; - case LargeListView: - return "+vL"; - case RunEndEncoded: - return "+r"; - case NONE: - throw new IllegalArgumentException("Arrow type ID is NONE"); - default: - throw new UnsupportedOperationException( - String.format("Unknown type id %s", arrowType.getTypeID())); - } - } - - static ArrowType asType(String format, long flags) - throws NumberFormatException, UnsupportedOperationException, IllegalStateException { - switch (format) { - case "n": - return new ArrowType.Null(); - case "b": - return new ArrowType.Bool(); - case "c": - return new ArrowType.Int(8, true); - case "C": - return new ArrowType.Int(8, false); - case "s": - return new ArrowType.Int(16, true); - case "S": - return new ArrowType.Int(16, false); - case "i": - return new ArrowType.Int(32, true); - case "I": - return new ArrowType.Int(32, false); - case "l": - return new ArrowType.Int(64, true); - case "L": - return new ArrowType.Int(64, false); - case "e": - return new ArrowType.FloatingPoint(FloatingPointPrecision.HALF); - case "f": - return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); - case "g": - return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - case "z": - return new ArrowType.Binary(); - case "Z": - return new ArrowType.LargeBinary(); - case "u": - return new ArrowType.Utf8(); - case "U": - return new ArrowType.LargeUtf8(); - case "tdD": - return new ArrowType.Date(DateUnit.DAY); - case "tdm": - return new ArrowType.Date(DateUnit.MILLISECOND); - case "tts": - return new ArrowType.Time(TimeUnit.SECOND, 32); - case "ttm": - return new ArrowType.Time(TimeUnit.MILLISECOND, 32); - case "ttu": - return new ArrowType.Time(TimeUnit.MICROSECOND, 64); - case "ttn": - return new ArrowType.Time(TimeUnit.NANOSECOND, 64); - case "tDs": - return new ArrowType.Duration(TimeUnit.SECOND); - case "tDm": - return new ArrowType.Duration(TimeUnit.MILLISECOND); - case "tDu": - return new ArrowType.Duration(TimeUnit.MICROSECOND); - case "tDn": - return new ArrowType.Duration(TimeUnit.NANOSECOND); - case "tiM": - return new ArrowType.Interval(IntervalUnit.YEAR_MONTH); - case "tiD": - return new ArrowType.Interval(IntervalUnit.DAY_TIME); - case "tin": - return new ArrowType.Interval(IntervalUnit.MONTH_DAY_NANO); - case "+l": - return new ArrowType.List(); - case "+L": - return new ArrowType.LargeList(); - case "+s": - return new ArrowType.Struct(); - case "+m": - boolean keysSorted = (flags & Flags.ARROW_FLAG_MAP_KEYS_SORTED) != 0; - return new ArrowType.Map(keysSorted); - case "vu": - return new ArrowType.Utf8View(); - case "vz": - return new ArrowType.BinaryView(); - case "+vl": - return new ArrowType.ListView(); - case "+vL": - return new ArrowType.LargeListView(); - case "+r": - return new ArrowType.RunEndEncoded(); - default: - String[] parts = format.split(":", 2); - if (parts.length == 2) { - return parseComplexFormat(parts[0], parts[1]); - } - throw new UnsupportedOperationException( - String.format("Format %s is not supported", format)); - } - } - - private static ArrowType parseComplexFormat(String format, String payload) - throws NumberFormatException, UnsupportedOperationException, IllegalStateException { - switch (format) { - case "d": - { - int[] parts = payloadToIntArray(payload); - Preconditions.checkState( - parts.length == 2 || parts.length == 3, "Format %s:%s is illegal", format, payload); - int precision = parts[0]; - int scale = parts[1]; - Integer bitWidth = (parts.length == 3) ? parts[2] : null; - return ArrowType.Decimal.createDecimal(precision, scale, bitWidth); - } - case "w": - return new ArrowType.FixedSizeBinary(Integer.parseInt(payload)); - case "+w": - return new ArrowType.FixedSizeList(Integer.parseInt(payload)); - case "+ud": - return new ArrowType.Union(UnionMode.Dense, payloadToIntArray(payload)); - case "+us": - return new ArrowType.Union(UnionMode.Sparse, payloadToIntArray(payload)); - case "tss": - return new ArrowType.Timestamp(TimeUnit.SECOND, payloadToTimezone(payload)); - case "tsm": - return new ArrowType.Timestamp(TimeUnit.MILLISECOND, payloadToTimezone(payload)); - case "tsu": - return new ArrowType.Timestamp(TimeUnit.MICROSECOND, payloadToTimezone(payload)); - case "tsn": - return new ArrowType.Timestamp(TimeUnit.NANOSECOND, payloadToTimezone(payload)); - default: - throw new UnsupportedOperationException( - String.format("Format %s:%s is not supported", format, payload)); - } - } - - private static int[] payloadToIntArray(String payload) throws NumberFormatException { - return Arrays.stream(payload.split(",")).mapToInt(Integer::parseInt).toArray(); - } - - private static String payloadToTimezone(String payload) { - if (payload.isEmpty()) { - return null; - } - return payload; - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/Metadata.java b/java/c/src/main/java/org/apache/arrow/c/Metadata.java deleted file mode 100644 index 43567b7619388..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/Metadata.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.util.Preconditions.checkState; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.MemoryUtil; - -/** Encode and decode metadata. */ -final class Metadata { - - private Metadata() {} - - static ArrowBuf encode(BufferAllocator allocator, Map metadata) { - if (metadata == null || metadata.size() == 0) { - return null; - } - - List buffers = new ArrayList<>(metadata.size() * 2); - int totalSize = 4 + metadata.size() * 8; // number of key/value pairs + buffer length fields - for (Map.Entry entry : metadata.entrySet()) { - byte[] keyBuffer = entry.getKey().getBytes(StandardCharsets.UTF_8); - byte[] valueBuffer = entry.getValue().getBytes(StandardCharsets.UTF_8); - totalSize += keyBuffer.length; - totalSize += valueBuffer.length; - buffers.add(keyBuffer); - buffers.add(valueBuffer); - } - - ArrowBuf result = allocator.buffer(totalSize); - ByteBuffer writer = - MemoryUtil.directBuffer(result.memoryAddress(), totalSize).order(ByteOrder.nativeOrder()); - writer.putInt(metadata.size()); - for (byte[] buffer : buffers) { - writer.putInt(buffer.length); - writer.put(buffer); - } - return result.slice(0, totalSize); - } - - static Map decode(long bufferAddress) { - if (bufferAddress == NULL) { - return null; - } - - ByteBuffer reader = - MemoryUtil.directBuffer(bufferAddress, Integer.MAX_VALUE).order(ByteOrder.nativeOrder()); - - int size = reader.getInt(); - checkState(size >= 0, "Metadata size must not be negative"); - if (size == 0) { - return null; - } - - Map result = new HashMap<>(size); - for (int i = 0; i < size; i++) { - String key = readString(reader); - String value = readString(reader); - result.put(key, value); - } - return result; - } - - private static String readString(ByteBuffer reader) { - int length = reader.getInt(); - checkState(length >= 0, "Metadata item length must not be negative"); - String result = ""; - if (length > 0) { - byte[] dst = new byte[length]; - reader.get(dst); - result = new String(dst, StandardCharsets.UTF_8); - } - return result; - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java b/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java deleted file mode 100644 index 1b4c66dea55fe..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import java.nio.Buffer; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.MemoryUtil; - -/** Utility functions for working with native memory. */ -public final class NativeUtil { - public static final byte NULL = 0; - static final int MAX_STRING_LENGTH = Short.MAX_VALUE; - - private NativeUtil() {} - - /** - * Convert a pointer to a null terminated string into a Java String. - * - * @param cstringPtr pointer to C string - * @return Converted string - */ - public static String toJavaString(long cstringPtr) { - if (cstringPtr == NULL) { - return null; - } - ByteBuffer reader = - MemoryUtil.directBuffer(cstringPtr, MAX_STRING_LENGTH).order(ByteOrder.nativeOrder()); - - int length = 0; - while (reader.get() != NULL) { - length++; - } - byte[] bytes = new byte[length]; - // Force use of base class rewind() to avoid breaking change of ByteBuffer.rewind in JDK9+ - ((ByteBuffer) ((Buffer) reader).rewind()).get(bytes); - return new String(bytes, 0, length, StandardCharsets.UTF_8); - } - - /** - * Convert a native array pointer (void**) to Java array of pointers. - * - * @param arrayPtr Array pointer - * @param size Array size - * @return Array of pointer values as longs - */ - public static long[] toJavaArray(long arrayPtr, int size) { - if (arrayPtr == NULL) { - return null; - } - if (size < 0) { - throw new IllegalArgumentException("Invalid native array size"); - } - - long[] result = new long[size]; - ByteBuffer reader = - MemoryUtil.directBuffer(arrayPtr, Long.BYTES * size).order(ByteOrder.nativeOrder()); - for (int i = 0; i < size; i++) { - result[i] = reader.getLong(); - } - return result; - } - - /** - * Convert Java string to a null terminated string. - * - * @param allocator Buffer allocator for allocating the native string - * @param string Input String to convert - * @return Buffer with a null terminated string or null if the input is null - */ - public static ArrowBuf toNativeString(BufferAllocator allocator, String string) { - if (string == null) { - return null; - } - - byte[] bytes = string.getBytes(StandardCharsets.UTF_8); - ArrowBuf buffer = allocator.buffer(bytes.length + 1); - buffer.writeBytes(bytes); - buffer.writeByte(NULL); - return buffer; - } - - /** - * Close a buffer if it's not null. - * - * @param buf Buffer to close - */ - public static void closeBuffer(ArrowBuf buf) { - if (buf != null) { - buf.close(); - } - } - - /** - * Get the address of a buffer or {@value #NULL} if the input buffer is null. - * - * @param buf Buffer to get the address of - * @return Memory address or {@value #NULL} - */ - public static long addressOrNull(ArrowBuf buf) { - if (buf == null) { - return NULL; - } - return buf.memoryAddress(); - } - - /** - * Get the address of a C Data Interface struct or {@value #NULL} if the input struct is null. - * - * @param struct C Data Interface struct to get the address of - * @return Memory address or {@value #NULL} - */ - public static long addressOrNull(BaseStruct struct) { - if (struct == null) { - return NULL; - } - return struct.memoryAddress(); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/ReferenceCountedArrowArray.java b/java/c/src/main/java/org/apache/arrow/c/ReferenceCountedArrowArray.java deleted file mode 100644 index cf50f9417b602..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/ReferenceCountedArrowArray.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ForeignAllocation; - -/** - * The owner of an imported C Data Interface array. - * - *

    There is a fundamental mismatch here between memory allocation schemes: AllocationManager - * represents a single allocation (= a single address and length). But an ArrowArray combines - * multiple allocations behind a single deallocation callback. This class bridges the two by - * tracking a reference count, so that the single callback can be managed by multiple {@link - * ForeignAllocation} instances. - */ -final class ReferenceCountedArrowArray { - private final ArrowArray array; - private final AtomicInteger refCnt; - - ReferenceCountedArrowArray(ArrowArray array) { - this.array = array; - this.refCnt = new AtomicInteger(1); - } - - void retain() { - if (refCnt.addAndGet(1) - 1 <= 0) { - throw new IllegalStateException("Tried to retain a released ArrowArray"); - } - } - - void release() { - int refcnt = refCnt.addAndGet(-1); - if (refcnt == 0) { - array.release(); - array.close(); - } else if (refcnt < 0) { - throw new IllegalStateException("Reference count went negative for imported ArrowArray"); - } - } - - /** - * Create an ArrowBuf wrapping a buffer from this ArrowArray associated with the given - * BufferAllocator. - * - *

    This method is "unsafe" because there is no validation of the given capacity or address. If - * the returned buffer is not freed, a memory leak will occur. - */ - ArrowBuf unsafeAssociateAllocation( - BufferAllocator trackingAllocator, long capacity, long memoryAddress) { - retain(); - return trackingAllocator.wrapForeignAllocation( - new ForeignAllocation(capacity, memoryAddress) { - @Override - protected void release0() { - ReferenceCountedArrowArray.this.release(); - } - }); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java b/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java deleted file mode 100644 index 41a73c410f5bf..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/SchemaExporter.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.c.NativeUtil.addressOrNull; -import static org.apache.arrow.util.Preconditions.checkNotNull; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.c.jni.JniWrapper; -import org.apache.arrow.c.jni.PrivateData; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; - -/** Exporter for {@link ArrowSchema}. */ -final class SchemaExporter { - private final BufferAllocator allocator; - - public SchemaExporter(BufferAllocator allocator) { - this.allocator = allocator; - } - - /** Private data structure for exported schemas. */ - static class ExportedSchemaPrivateData implements PrivateData { - ArrowBuf format; - ArrowBuf name; - ArrowBuf metadata; - ArrowBuf children_ptrs; - ArrowSchema dictionary; - List children; - - @Override - public void close() { - NativeUtil.closeBuffer(format); - NativeUtil.closeBuffer(name); - NativeUtil.closeBuffer(metadata); - NativeUtil.closeBuffer(children_ptrs); - if (dictionary != null) { - dictionary.close(); - } - if (children != null) { - for (ArrowSchema child : children) { - child.close(); - } - } - } - } - - void export(ArrowSchema schema, Field field, DictionaryProvider dictionaryProvider) { - String name = field.getName(); - String format = Format.asString(field.getType()); - long flags = Flags.forField(field); - List children = field.getChildren(); - DictionaryEncoding dictionaryEncoding = field.getDictionary(); - - ExportedSchemaPrivateData data = new ExportedSchemaPrivateData(); - try { - data.format = NativeUtil.toNativeString(allocator, format); - data.name = NativeUtil.toNativeString(allocator, name); - data.metadata = Metadata.encode(allocator, field.getMetadata()); - - if (children != null) { - data.children = new ArrayList<>(children.size()); - data.children_ptrs = allocator.buffer((long) children.size() * Long.BYTES); - for (int i = 0; i < children.size(); i++) { - ArrowSchema child = ArrowSchema.allocateNew(allocator); - data.children.add(child); - data.children_ptrs.writeLong(child.memoryAddress()); - } - } - - if (dictionaryEncoding != null) { - Dictionary dictionary = dictionaryProvider.lookup(dictionaryEncoding.getId()); - checkNotNull(dictionary, "Dictionary lookup failed on export of field with dictionary"); - - data.dictionary = ArrowSchema.allocateNew(allocator); - export(data.dictionary, dictionary.getVector().getField(), dictionaryProvider); - } - - ArrowSchema.Snapshot snapshot = new ArrowSchema.Snapshot(); - snapshot.format = data.format.memoryAddress(); - snapshot.name = addressOrNull(data.name); - snapshot.metadata = addressOrNull(data.metadata); - snapshot.flags = flags; - snapshot.n_children = (data.children != null) ? data.children.size() : 0; - snapshot.children = addressOrNull(data.children_ptrs); - snapshot.dictionary = addressOrNull(data.dictionary); - snapshot.release = NULL; - schema.save(snapshot); - - // sets release and private data - JniWrapper.get().exportSchema(schema.memoryAddress(), data); - } catch (Exception e) { - data.close(); - throw e; - } - - // Export children - if (children != null) { - for (int i = 0; i < children.size(); i++) { - Field childField = children.get(i); - ArrowSchema child = data.children.get(i); - export(child, childField, dictionaryProvider); - } - } - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java b/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java deleted file mode 100644 index c94a2b3b8d9fc..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/SchemaImporter.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.c.NativeUtil.NULL; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.util.Preconditions.checkNotNull; -import static org.apache.arrow.util.Preconditions.checkState; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Importer for {@link ArrowSchema}. */ -final class SchemaImporter { - private static final Logger logger = LoggerFactory.getLogger(SchemaImporter.class); - - private static final int MAX_IMPORT_RECURSION_LEVEL = 64; - private long nextDictionaryID = 0L; - - private final BufferAllocator allocator; - - public SchemaImporter(BufferAllocator allocator) { - this.allocator = allocator; - } - - Field importField(ArrowSchema schema, CDataDictionaryProvider provider) { - return importField(schema, provider, 0); - } - - private Field importField( - ArrowSchema schema, CDataDictionaryProvider provider, int recursionLevel) { - checkState( - recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, - "Recursion level in ArrowSchema struct exceeded"); - - ArrowSchema.Snapshot snapshot = schema.snapshot(); - checkState(snapshot.release != NULL, "Cannot import released ArrowSchema"); - - String name = NativeUtil.toJavaString(snapshot.name); - String format = NativeUtil.toJavaString(snapshot.format); - checkNotNull(format, "format field must not be null"); - ArrowType arrowType = Format.asType(format, snapshot.flags); - boolean nullable = (snapshot.flags & Flags.ARROW_FLAG_NULLABLE) != 0; - Map metadata = Metadata.decode(snapshot.metadata); - - if (metadata != null && metadata.containsKey(ExtensionType.EXTENSION_METADATA_KEY_NAME)) { - final String extensionName = metadata.get(ExtensionType.EXTENSION_METADATA_KEY_NAME); - final String extensionMetadata = - metadata.getOrDefault(ExtensionType.EXTENSION_METADATA_KEY_METADATA, ""); - ExtensionType extensionType = ExtensionTypeRegistry.lookup(extensionName); - if (extensionType != null) { - arrowType = extensionType.deserialize(arrowType, extensionMetadata); - } else { - // Otherwise, we haven't registered the type - logger.info("Unrecognized extension type: {}", extensionName); - } - } - - // Handle dictionary encoded vectors - DictionaryEncoding dictionaryEncoding = null; - if (snapshot.dictionary != NULL && provider != null) { - boolean ordered = (snapshot.flags & Flags.ARROW_FLAG_DICTIONARY_ORDERED) != 0; - ArrowType.Int indexType = (ArrowType.Int) arrowType; - dictionaryEncoding = new DictionaryEncoding(nextDictionaryID++, ordered, indexType); - - ArrowSchema dictionarySchema = ArrowSchema.wrap(snapshot.dictionary); - Field dictionaryField = importField(dictionarySchema, provider, recursionLevel + 1); - provider.put(new Dictionary(dictionaryField.createVector(allocator), dictionaryEncoding)); - } - - FieldType fieldType = new FieldType(nullable, arrowType, dictionaryEncoding, metadata); - - List children = null; - long[] childrenIds = - NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children)); - if (childrenIds != null && childrenIds.length > 0) { - children = new ArrayList<>(childrenIds.length); - for (long childAddress : childrenIds) { - ArrowSchema childSchema = ArrowSchema.wrap(childAddress); - Field field = importField(childSchema, provider, recursionLevel + 1); - children.add(field); - } - } - return new Field(name, fieldType, children); - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java deleted file mode 100644 index 6a962cc342114..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.util.Preconditions.checkArgument; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.TypeLayout; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Loads buffers into {@link StructVector}. */ -public class StructVectorLoader { - - private final Schema schema; - private final CompressionCodec.Factory factory; - - /** - * A flag indicating if decompression is needed. This will affect the behavior of releasing - * buffers. - */ - private boolean decompressionNeeded; - - /** - * Construct with a schema. - * - *

    The schema referred to here can be obtained from the struct vector. The schema here should - * be the children of a struct vector, not a schema containing the struct field itself. For - * example: - * Schema schema = new Schema(structVector.getField().getChildren()); - * - * - * @param schema buffers are added based on schema. - */ - public StructVectorLoader(Schema schema) { - this(schema, NoCompressionCodec.Factory.INSTANCE); - } - - /** - * Construct with a schema and a compression codec factory. - * - * @param schema buffers are added based on schema. - * @param factory the factory to create codec. - */ - public StructVectorLoader(Schema schema, CompressionCodec.Factory factory) { - this.schema = schema; - this.factory = factory; - } - - /** - * Loads the record batch into the struct vector. - * - *

    This will not close the record batch. - * - * @param recordBatch the batch to load - */ - public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch) { - StructVector result = StructVector.emptyWithDuplicates("", allocator); - result.initializeChildrenFromFields(this.schema.getFields()); - - Iterator buffers = recordBatch.getBuffers().iterator(); - Iterator nodes = recordBatch.getNodes().iterator(); - CompressionUtil.CodecType codecType = - CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec()); - decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; - CompressionCodec codec = - decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; - Iterator variadicBufferCounts = Collections.emptyIterator(); - if (recordBatch.getVariadicBufferCounts() != null - && !recordBatch.getVariadicBufferCounts().isEmpty()) { - variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); - } - for (FieldVector fieldVector : result.getChildrenFromFields()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); - } - result.loadFieldBuffers( - new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null)); - if (nodes.hasNext() || buffers.hasNext() || variadicBufferCounts.hasNext()) { - throw new IllegalArgumentException( - "not all nodes, buffers and variadicBufferCounts were consumed. nodes: " - + Collections2.toString(nodes) - + " buffers: " - + Collections2.toString(buffers) - + " variadicBufferCounts: " - + Collections2.toString(variadicBufferCounts)); - } - return result; - } - - private void loadBuffers( - FieldVector vector, - Field field, - Iterator buffers, - Iterator nodes, - CompressionCodec codec, - Iterator variadicBufferCounts) { - checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); - ArrowFieldNode fieldNode = nodes.next(); - // variadicBufferLayoutCount will be 0 for vectors of a type except BaseVariableWidthViewVector - long variadicBufferLayoutCount = 0; - if (vector instanceof BaseVariableWidthViewVector) { - if (variadicBufferCounts.hasNext()) { - variadicBufferLayoutCount = variadicBufferCounts.next(); - } else { - throw new IllegalStateException( - "No variadicBufferCounts available for BaseVariableWidthViewVector"); - } - } - int bufferLayoutCount = - (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); - List ownBuffers = new ArrayList<>(bufferLayoutCount); - for (int j = 0; j < bufferLayoutCount; j++) { - ArrowBuf nextBuf = buffers.next(); - // for vectors without nulls, the buffer is empty, so there is no need to - // decompress it. - ArrowBuf bufferToAdd = - nextBuf.writerIndex() > 0 ? codec.decompress(vector.getAllocator(), nextBuf) : nextBuf; - ownBuffers.add(bufferToAdd); - if (decompressionNeeded) { - // decompression performed - nextBuf.getReferenceManager().retain(); - } - } - try { - vector.loadFieldBuffers(fieldNode, ownBuffers); - if (decompressionNeeded) { - for (ArrowBuf buf : ownBuffers) { - buf.close(); - } - } - } catch (RuntimeException e) { - throw new IllegalArgumentException( - "Could not load buffers for field " + field + ". error message: " + e.getMessage(), e); - } - List children = field.getChildren(); - if (children.size() > 0) { - List childrenFromFields = vector.getChildrenFromFields(); - checkArgument( - children.size() == childrenFromFields.size(), - "should have as many children as in the schema: found %s expected %s", - childrenFromFields.size(), - children.size()); - for (int i = 0; i < childrenFromFields.size(); i++) { - Field child = children.get(i); - FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); - } - } - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java deleted file mode 100644 index 6f094b92c7991..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.TypeLayout; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; - -/** Helper class that handles converting a {@link StructVector} to a {@link ArrowRecordBatch}. */ -public class StructVectorUnloader { - - private final StructVector root; - private final boolean includeNullCount; - private final CompressionCodec codec; - private final boolean alignBuffers; - - /** Constructs a new instance of the given struct vector. */ - public StructVectorUnloader(StructVector root) { - this(root, true, NoCompressionCodec.INSTANCE, true); - } - - /** - * Constructs a new instance. - * - * @param root The struct vector to serialize to an {@link ArrowRecordBatch}. - * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} - * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. - */ - public StructVectorUnloader(StructVector root, boolean includeNullCount, boolean alignBuffers) { - this(root, includeNullCount, NoCompressionCodec.INSTANCE, alignBuffers); - } - - /** - * Constructs a new instance. - * - * @param root The struct vector to serialize to an {@link ArrowRecordBatch}. - * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} - * @param codec the codec for compressing data. If it is null, then no compression is needed. - * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. - */ - public StructVectorUnloader( - StructVector root, boolean includeNullCount, CompressionCodec codec, boolean alignBuffers) { - this.root = root; - this.includeNullCount = includeNullCount; - this.codec = codec; - this.alignBuffers = alignBuffers; - } - - /** - * Performs the depth first traversal of the Vectors to create an {@link ArrowRecordBatch} - * suitable for serialization. - */ - public ArrowRecordBatch getRecordBatch() { - List nodes = new ArrayList<>(); - List buffers = new ArrayList<>(); - List variadicBufferCounts = new ArrayList<>(); - for (FieldVector vector : root.getChildrenFromFields()) { - appendNodes(vector, nodes, buffers, variadicBufferCounts); - } - return new ArrowRecordBatch( - root.getValueCount(), - nodes, - buffers, - CompressionUtil.createBodyCompression(codec), - variadicBufferCounts, - alignBuffers); - } - - private long getVariadicBufferCount(FieldVector vector) { - if (vector instanceof BaseVariableWidthViewVector) { - return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); - } - return 0L; - } - - private void appendNodes( - FieldVector vector, - List nodes, - List buffers, - List variadicBufferCounts) { - nodes.add( - new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); - List fieldBuffers = vector.getFieldBuffers(); - long variadicBufferCount = getVariadicBufferCount(vector); - int expectedBufferCount = - (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); - // only update variadicBufferCounts for vectors that have variadic buffers - if (vector instanceof BaseVariableWidthViewVector) { - variadicBufferCounts.add(variadicBufferCount); - } - if (fieldBuffers.size() != expectedBufferCount) { - throw new IllegalArgumentException( - String.format( - "wrong number of buffers for field %s in vector %s. found: %s", - vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); - } - for (ArrowBuf buf : fieldBuffers) { - buffers.add(codec.compress(vector.getAllocator(), buf)); - } - for (FieldVector child : vector.getChildrenFromFields()) { - appendNodes(child, nodes, buffers, variadicBufferCounts); - } - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/jni/CDataJniException.java b/java/c/src/main/java/org/apache/arrow/c/jni/CDataJniException.java deleted file mode 100644 index ec02f13d58df9..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/jni/CDataJniException.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c.jni; - -/** An exception raised by the JNI side of the C Data bridge. */ -public final class CDataJniException extends Exception { - private final int errno; - - public CDataJniException(int errno, String message) { - super(message); - this.errno = errno; - } - - /** The original error code returned from C. */ - public int getErrno() { - return errno; - } - - @Override - public String getMessage() { - return "CDataJniException{" + "errno=" + errno + ", message=" + super.getMessage() + '}'; - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java b/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java deleted file mode 100644 index f712b400bfd4e..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c.jni; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Set; - -/** The JniLoader for C Data Interface API's native implementation. */ -public class JniLoader { - private static final JniLoader INSTANCE = - new JniLoader(Collections.singletonList("arrow_cdata_jni")); - - public static JniLoader get() { - return INSTANCE; - } - - private final Set librariesToLoad; - - private JniLoader(List libraryNames) { - librariesToLoad = new HashSet<>(libraryNames); - } - - private boolean finished() { - return librariesToLoad.isEmpty(); - } - - /** If required JNI libraries are not loaded, then load them. */ - public void ensureLoaded() { - if (finished()) { - return; - } - loadRemaining(); - } - - private synchronized void loadRemaining() { - // The method is protected by a mutex via synchronized, if more than one thread - // race to call - // loadRemaining, at same time only one will do the actual loading and the - // others will wait for - // the mutex to be acquired then check on the remaining list: if there are - // libraries that were not - // successfully loaded then the mutex owner will try to load them again. - if (finished()) { - return; - } - List libs = new ArrayList<>(librariesToLoad); - for (String lib : libs) { - load(lib); - librariesToLoad.remove(lib); - } - } - - private void load(String name) { - final String libraryToLoad = - name + "/" + getNormalizedArch() + "/" + System.mapLibraryName(name); - try { - File temp = - File.createTempFile("jnilib-", ".tmp", new File(System.getProperty("java.io.tmpdir"))); - temp.deleteOnExit(); - try (final InputStream is = - JniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) { - if (is == null) { - throw new FileNotFoundException(libraryToLoad); - } - Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); - System.load(temp.getAbsolutePath()); - } - } catch (IOException e) { - throw new IllegalStateException("error loading native libraries: " + e); - } - } - - private String getNormalizedArch() { - String arch = System.getProperty("os.arch").toLowerCase(Locale.US); - switch (arch) { - case "amd64": - arch = "x86_64"; - break; - case "aarch64": - arch = "aarch_64"; - break; - default: - break; - } - return arch; - } -} diff --git a/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java b/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java deleted file mode 100644 index 12b0d0fa21ef5..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/jni/JniWrapper.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c.jni; - -/** JniWrapper for C Data Interface API implementation. */ -public class JniWrapper { - private static final JniWrapper INSTANCE = new JniWrapper(); - - public static JniWrapper get() { - JniLoader.get().ensureLoaded(); - return INSTANCE; - } - - private JniWrapper() { - // A best effort to error on 32-bit systems - String dataModel = System.getProperty("sun.arch.data.model"); - if (dataModel != null && dataModel.equals("32")) { - throw new UnsupportedOperationException( - "The Java C Data Interface implementation is currently only supported on 64-bit systems"); - } - } - - public native void releaseSchema(long memoryAddress); - - public native void releaseArray(long memoryAddress); - - public native void getNextArrayStream(long streamAddress, long arrayAddress) - throws CDataJniException; - - public native void getSchemaArrayStream(long streamAddress, long arrayAddress) - throws CDataJniException; - - public native void releaseArrayStream(long memoryAddress); - - public native void exportSchema(long memoryAddress, PrivateData privateData); - - public native void exportArray(long memoryAddress, PrivateData data); - - public native void exportArrayStream(long memoryAddress, PrivateData data); -} diff --git a/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java b/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java deleted file mode 100644 index 60db6831f9ca6..0000000000000 --- a/java/c/src/main/java/org/apache/arrow/c/jni/PrivateData.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c.jni; - -import java.io.Closeable; - -/** - * Interface for Java objects stored in C data interface private data. - * - *

    This interface is used for exported structures. - */ -public interface PrivateData extends Closeable { - - @Override - void close(); -} diff --git a/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java b/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java deleted file mode 100644 index 511358a5e62fa..0000000000000 --- a/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class ArrowArrayUtilityTest { - BufferAllocator allocator; - ArrowArray arrowArray; - ReferenceCountedArrowArray dummyHandle; - - @BeforeEach - void beforeEach() { - allocator = new RootAllocator(); - arrowArray = ArrowArray.allocateNew(allocator); - dummyHandle = new ReferenceCountedArrowArray(arrowArray); - } - - @AfterEach - void afterEach() { - dummyHandle.release(); - allocator.close(); - } - - @Test - void arraySchemaInit() { - ArrowSchema schema = ArrowSchema.allocateNew(allocator); - assertThat(schema.snapshot().release).isEqualTo(0); - schema.close(); - } - - // ------------------------------------------------------------ - // BufferImportTypeVisitor - - @Test - void importBuffer() throws Exception { - // Note values are all dummy values here - try (BufferImportTypeVisitor notEmptyDataVisitor = - new BufferImportTypeVisitor( - allocator, dummyHandle, new ArrowFieldNode(/* length= */ 1, 0), new long[] {0})) { - - // Too few buffers - assertThrows( - IllegalStateException.class, - () -> notEmptyDataVisitor.importBuffer(new ArrowType.Bool(), 1, 1)); - - // Null where one isn't expected - assertThrows( - IllegalStateException.class, - () -> notEmptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 1)); - - // Expected capacity not zero but c array ptr is NULL (zero) - assertThrows( - IllegalStateException.class, - () -> notEmptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 1)); - - // Expected capacity is zero and c array ptr is NULL (zero) - assertThat(notEmptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 0)) - .isEqualTo(allocator.getEmpty()); - } - - try (BufferImportTypeVisitor emptyDataVisitor = - new BufferImportTypeVisitor( - allocator, dummyHandle, new ArrowFieldNode(/* length= */ 0, 0), new long[] {0})) { - - // Too few buffers - assertThrows( - IllegalStateException.class, - () -> emptyDataVisitor.importBuffer(new ArrowType.Bool(), 1, 1)); - - // Expected capacity not zero but c array ptr is NULL (zero) - assertThrows( - IllegalStateException.class, - () -> emptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 1)); - - // Expected capacity is zero and c array ptr is NULL (zero) - assertThat(emptyDataVisitor.importBuffer(new ArrowType.Bool(), 0, 0)) - .isEqualTo(allocator.getEmpty()); - } - } - - @Test - void cleanupAfterFailure() throws Exception { - // Note values are all dummy values here - long address = MemoryUtil.allocateMemory(16); - try (BufferImportTypeVisitor visitor = - new BufferImportTypeVisitor( - allocator, dummyHandle, new ArrowFieldNode(0, 0), new long[] {address})) { - // This fails, but only after we've already imported a buffer. - assertThrows(IllegalStateException.class, () -> visitor.visit(new ArrowType.Int(32, true))); - } finally { - MemoryUtil.freeMemory(address); - } - } - - @Test - void bufferAssociatedWithAllocator() throws Exception { - // Note values are all dummy values here - final long bufferSize = 16; - final long fieldLength = bufferSize / IntVector.TYPE_WIDTH; - long address = MemoryUtil.allocateMemory(bufferSize); - long baseline = allocator.getAllocatedMemory(); - ArrowFieldNode fieldNode = new ArrowFieldNode(fieldLength, 0); - try (BufferImportTypeVisitor visitor = - new BufferImportTypeVisitor(allocator, dummyHandle, fieldNode, new long[] {0, address})) { - List buffers = visitor.visit(new ArrowType.Int(32, true)); - assertThat(buffers).hasSize(2); - assertThat(buffers.get(0)).isNull(); - assertThat(buffers.get(1)) - .isNotNull() - .extracting(ArrowBuf::getReferenceManager) - .extracting(ReferenceManager::getAllocator) - .isEqualTo(allocator); - assertThat(allocator.getAllocatedMemory()).isEqualTo(baseline + bufferSize); - } finally { - MemoryUtil.freeMemory(address); - } - assertThat(allocator.getAllocatedMemory()).isEqualTo(baseline); - } - - // ------------------------------------------------------------ - // ReferenceCountedArrowArray - - @Test - void releaseRetain() { - ArrowArray array = ArrowArray.allocateNew(allocator); - ReferenceCountedArrowArray handle = new ReferenceCountedArrowArray(array); - assertThat(array.isClosed()).isFalse(); - handle.retain(); - assertThat(array.isClosed()).isFalse(); - handle.release(); - assertThat(array.isClosed()).isFalse(); - handle.release(); - assertThat(array.isClosed()).isTrue(); - - assertThrows(IllegalStateException.class, handle::release); - assertThrows(IllegalStateException.class, handle::retain); - } - - @Test - void associate() { - final long bufferSize = 16; - final long address = MemoryUtil.allocateMemory(bufferSize); - try { - ArrowArray array = ArrowArray.allocateNew(allocator); - ReferenceCountedArrowArray handle = new ReferenceCountedArrowArray(array); - assertThat(array.isClosed()).isFalse(); - ArrowBuf buf = handle.unsafeAssociateAllocation(allocator, bufferSize, address); - assertThat(array.isClosed()).isFalse(); - buf.close(); - assertThat(array.isClosed()).isFalse(); - handle.release(); - assertThat(array.isClosed()).isTrue(); - } finally { - MemoryUtil.freeMemory(address); - } - } -} diff --git a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java deleted file mode 100644 index 8cd4913f22dd2..0000000000000 --- a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class DictionaryTest { - private RootAllocator allocator = null; - - @BeforeEach - public void setUp() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - void roundtrip(FieldVector vector, DictionaryProvider provider, Class clazz) { - // Consumer allocates empty structures - try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { - - // Producer creates structures from existing memory pointers - try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); - ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the C Data Interface structures - Data.exportVector(allocator, vector, provider, arrowArray, arrowSchema); - } - - // Consumer imports vector - try (CDataDictionaryProvider cDictionaryProvider = new CDataDictionaryProvider(); - FieldVector imported = - Data.importVector( - allocator, consumerArrowArray, consumerArrowSchema, cDictionaryProvider); ) { - assertTrue( - clazz.isInstance(imported), - String.format("expected %s but was %s", clazz, imported.getClass())); - assertTrue( - VectorEqualsVisitor.vectorEquals(vector, imported), "vectors are not equivalent"); - for (long id : cDictionaryProvider.getDictionaryIds()) { - ValueVector exportedDictionaryVector = provider.lookup(id).getVector(); - ValueVector importedDictionaryVector = cDictionaryProvider.lookup(id).getVector(); - assertTrue( - VectorEqualsVisitor.vectorEquals(exportedDictionaryVector, importedDictionaryVector), - String.format("Dictionary vectors for ID %d are not equivalent", id)); - } - } - } - } - - @Test - public void testWithDictionary() throws Exception { - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - // create dictionary and provider - final VarCharVector dictVector = new VarCharVector("dict", allocator); - dictVector.allocateNewSafe(); - dictVector.setSafe(0, "aa".getBytes(StandardCharsets.UTF_8)); - dictVector.setSafe(1, "bb".getBytes(StandardCharsets.UTF_8)); - dictVector.setSafe(2, "cc".getBytes(StandardCharsets.UTF_8)); - dictVector.setValueCount(3); - - Dictionary dictionary = - new Dictionary(dictVector, new DictionaryEncoding(0L, false, /* indexType= */ null)); - provider.put(dictionary); - - // create vector and encode it - final VarCharVector vector = new VarCharVector("vector", allocator); - vector.allocateNewSafe(); - vector.setSafe(0, "bb".getBytes(StandardCharsets.UTF_8)); - vector.setSafe(1, "bb".getBytes(StandardCharsets.UTF_8)); - vector.setSafe(2, "cc".getBytes(StandardCharsets.UTF_8)); - vector.setSafe(3, "aa".getBytes(StandardCharsets.UTF_8)); - vector.setValueCount(4); - - // get the encoded vector - IntVector encodedVector = (IntVector) DictionaryEncoder.encode(vector, dictionary); - - // Perform roundtrip using C Data Interface - roundtrip(encodedVector, provider, IntVector.class); - - // Close all - AutoCloseables.close((AutoCloseable) vector, encodedVector, dictVector); - } - - @Test - public void testRoundtripMultipleBatches() throws IOException { - try (ArrowStreamReader reader = createMultiBatchReader(); - ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator)) { - // Load first batch - reader.loadNextBatch(); - // Producer fills consumer schema structure - Data.exportSchema( - allocator, reader.getVectorSchemaRoot().getSchema(), reader, consumerArrowSchema); - // Consumer loads it as an empty vector schema root - try (CDataDictionaryProvider consumerDictionaryProvider = new CDataDictionaryProvider(); - VectorSchemaRoot consumerRoot = - Data.importVectorSchemaRoot( - allocator, consumerArrowSchema, consumerDictionaryProvider)) { - do { - try (ArrowArray consumerArray = ArrowArray.allocateNew(allocator)) { - // Producer exports next data - Data.exportVectorSchemaRoot( - allocator, reader.getVectorSchemaRoot(), reader, consumerArray); - // Consumer loads next data - Data.importIntoVectorSchemaRoot( - allocator, consumerArray, consumerRoot, consumerDictionaryProvider); - - // Roundtrip validation - assertTrue( - consumerRoot.equals(reader.getVectorSchemaRoot()), - "vector schema roots are not equivalent"); - for (long id : consumerDictionaryProvider.getDictionaryIds()) { - ValueVector exportedDictionaryVector = reader.lookup(id).getVector(); - ValueVector importedDictionaryVector = - consumerDictionaryProvider.lookup(id).getVector(); - assertTrue( - VectorEqualsVisitor.vectorEquals( - exportedDictionaryVector, importedDictionaryVector), - String.format("Dictionary vectors for ID %d are not equivalent", id)); - } - } - } while (reader.loadNextBatch()); - } - } - } - - private ArrowStreamReader createMultiBatchReader() throws IOException { - ByteArrayOutputStream os = new ByteArrayOutputStream(); - try (final VarCharVector dictVector = new VarCharVector("dict", allocator); - IntVector vector = new IntVector("foo", allocator)) { - // create dictionary and provider - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - dictVector.allocateNewSafe(); - dictVector.setSafe(0, "aa".getBytes(StandardCharsets.UTF_8)); - dictVector.setSafe(1, "bb".getBytes(StandardCharsets.UTF_8)); - dictVector.setSafe(2, "cc".getBytes(StandardCharsets.UTF_8)); - dictVector.setSafe(3, "dd".getBytes(StandardCharsets.UTF_8)); - dictVector.setSafe(4, "ee".getBytes(StandardCharsets.UTF_8)); - dictVector.setValueCount(5); - Dictionary dictionary = - new Dictionary(dictVector, new DictionaryEncoding(0L, false, /* indexType= */ null)); - provider.put(dictionary); - - Schema schema = new Schema(Collections.singletonList(vector.getField())); - try (VectorSchemaRoot root = - new VectorSchemaRoot( - schema, Collections.singletonList(vector), vector.getValueCount()); - ArrowStreamWriter writer = - new ArrowStreamWriter(root, provider, Channels.newChannel(os)); ) { - - writer.start(); - - // Batch 1 - vector.setNull(0); - vector.setSafe(1, 1); - vector.setSafe(2, 2); - vector.setNull(3); - vector.setSafe(4, 1); - vector.setValueCount(5); - root.setRowCount(5); - writer.writeBatch(); - - // Batch 2 - vector.setNull(0); - vector.setSafe(1, 1); - vector.setSafe(2, 2); - vector.setValueCount(3); - root.setRowCount(3); - writer.writeBatch(); - - // Batch 3 - vector.setSafe(0, 0); - vector.setSafe(1, 1); - vector.setSafe(2, 2); - vector.setSafe(3, 3); - vector.setSafe(4, 4); - vector.setValueCount(5); - root.setRowCount(5); - writer.writeBatch(); - - writer.end(); - } - } - - ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray()); - return new ArrowStreamReader(in, allocator); - } - - private void createStructVector(StructVector vector) { - final ViewVarCharVector child1 = - vector.addOrGet( - "f0", FieldType.nullable(MinorType.VIEWVARCHAR.getType()), ViewVarCharVector.class); - final IntVector child2 = - vector.addOrGet("f1", FieldType.nullable(MinorType.INT.getType()), IntVector.class); - - // Write the values to child 1 - child1.allocateNew(); - child1.set(0, "01234567890".getBytes(StandardCharsets.UTF_8)); - child1.set(1, "012345678901234567".getBytes(StandardCharsets.UTF_8)); - vector.setIndexDefined(0); - - // Write the values to child 2 - child2.allocateNew(); - child2.set(0, 10); - child2.set(1, 11); - vector.setIndexDefined(1); - - vector.setValueCount(2); - } - - private void createStructVectorInline(StructVector vector) { - final ViewVarCharVector child1 = - vector.addOrGet( - "f0", FieldType.nullable(MinorType.VIEWVARCHAR.getType()), ViewVarCharVector.class); - final IntVector child2 = - vector.addOrGet("f1", FieldType.nullable(MinorType.INT.getType()), IntVector.class); - - // Write the values to child 1 - child1.allocateNew(); - child1.set(0, "012345678".getBytes(StandardCharsets.UTF_8)); - child1.set(1, "01234".getBytes(StandardCharsets.UTF_8)); - vector.setIndexDefined(0); - - // Write the values to child 2 - child2.allocateNew(); - child2.set(0, 10); - child2.set(1, 11); - vector.setIndexDefined(1); - - vector.setValueCount(2); - } - - @Test - public void testVectorLoadUnloadOnStructVector() { - try (final StructVector structVector1 = StructVector.empty("struct", allocator)) { - createStructVector(structVector1); - Field field1 = structVector1.getField(); - Schema schema = new Schema(field1.getChildren()); - StructVectorUnloader vectorUnloader = new StructVectorUnloader(structVector1); - - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("struct", 0, Long.MAX_VALUE); ) { - // validating recordBatch contains an output for variadicBufferCounts - assertFalse(recordBatch.getVariadicBufferCounts().isEmpty()); - assertEquals(1, recordBatch.getVariadicBufferCounts().size()); - assertEquals(1, recordBatch.getVariadicBufferCounts().get(0)); - - StructVectorLoader vectorLoader = new StructVectorLoader(schema); - try (StructVector structVector2 = vectorLoader.load(finalVectorsAllocator, recordBatch)) { - // Improve this after fixing https://github.com/apache/arrow/issues/41933 - // assertTrue(VectorEqualsVisitor.vectorEquals(structVector1, structVector2), "vectors are - // not equivalent"); - assertTrue( - VectorEqualsVisitor.vectorEquals( - structVector1.getChild("f0"), structVector2.getChild("f0")), - "vectors are not equivalent"); - assertTrue( - VectorEqualsVisitor.vectorEquals( - structVector1.getChild("f1"), structVector2.getChild("f1")), - "vectors are not equivalent"); - } - } - } - } - - @Test - public void testVectorLoadUnloadOnStructVectorWithInline() { - try (final StructVector structVector1 = StructVector.empty("struct", allocator)) { - createStructVectorInline(structVector1); - Field field1 = structVector1.getField(); - Schema schema = new Schema(field1.getChildren()); - StructVectorUnloader vectorUnloader = new StructVectorUnloader(structVector1); - - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("struct", 0, Long.MAX_VALUE); ) { - // validating recordBatch contains an output for variadicBufferCounts - assertFalse(recordBatch.getVariadicBufferCounts().isEmpty()); - assertEquals(1, recordBatch.getVariadicBufferCounts().size()); - assertEquals(0, recordBatch.getVariadicBufferCounts().get(0)); - - StructVectorLoader vectorLoader = new StructVectorLoader(schema); - try (StructVector structVector2 = vectorLoader.load(finalVectorsAllocator, recordBatch)) { - // Improve this after fixing https://github.com/apache/arrow/issues/41933 - // assertTrue(VectorEqualsVisitor.vectorEquals(structVector1, structVector2), "vectors are - // not equivalent"); - assertTrue( - VectorEqualsVisitor.vectorEquals( - structVector1.getChild("f0"), structVector2.getChild("f0")), - "vectors are not equivalent"); - assertTrue( - VectorEqualsVisitor.vectorEquals( - structVector1.getChild("f1"), structVector2.getChild("f1")), - "vectors are not equivalent"); - } - } - } - } -} diff --git a/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java b/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java deleted file mode 100644 index 1e8843ce82363..0000000000000 --- a/java/c/src/test/java/org/apache/arrow/c/FlagsTest.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.ArrayList; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.Test; - -public class FlagsTest { - @Test - public void testForFieldNullableOrderedDict() { - FieldType fieldType = - new FieldType( - true, - ArrowType.Binary.INSTANCE, - new DictionaryEncoding(123L, true, new ArrowType.Int(8, true))); - - assertEquals( - Flags.ARROW_FLAG_DICTIONARY_ORDERED | Flags.ARROW_FLAG_NULLABLE, - Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); - } - - @Test - public void testForFieldOrderedDict() { - FieldType fieldType = - new FieldType( - false, - ArrowType.Binary.INSTANCE, - new DictionaryEncoding(123L, true, new ArrowType.Int(8, true))); - assertEquals( - Flags.ARROW_FLAG_DICTIONARY_ORDERED, - Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); - } - - @Test - public void testForFieldNullableDict() { - FieldType fieldType = - new FieldType( - true, - ArrowType.Binary.INSTANCE, - new DictionaryEncoding(123L, false, new ArrowType.Int(8, true))); - assertEquals( - Flags.ARROW_FLAG_NULLABLE, Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); - } - - @Test - public void testForFieldNullable() { - FieldType fieldType = new FieldType(true, ArrowType.Binary.INSTANCE, null); - assertEquals( - Flags.ARROW_FLAG_NULLABLE, Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); - } - - @Test - public void testForFieldNullableOrderedSortedMap() { - ArrowType.Map type = new ArrowType.Map(true); - FieldType fieldType = - new FieldType(true, type, new DictionaryEncoding(123L, true, new ArrowType.Int(8, true))); - assertEquals( - Flags.ARROW_FLAG_DICTIONARY_ORDERED - | Flags.ARROW_FLAG_NULLABLE - | Flags.ARROW_FLAG_MAP_KEYS_SORTED, - Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); - } - - @Test - public void testForFieldNullableOrderedMap() { - ArrowType.Map type = new ArrowType.Map(false); - FieldType fieldType = - new FieldType(true, type, new DictionaryEncoding(123L, true, new ArrowType.Int(8, true))); - assertEquals( - Flags.ARROW_FLAG_DICTIONARY_ORDERED | Flags.ARROW_FLAG_NULLABLE, - Flags.forField(new Field("Name", fieldType, new ArrayList<>()))); - } -} diff --git a/java/c/src/test/java/org/apache/arrow/c/FormatTest.java b/java/c/src/test/java/org/apache/arrow/c/FormatTest.java deleted file mode 100644 index c77332433097d..0000000000000 --- a/java/c/src/test/java/org/apache/arrow/c/FormatTest.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.jupiter.api.Test; - -public class FormatTest { - @Test - public void testAsString() { - assertEquals("z", Format.asString(new ArrowType.Binary())); - assertEquals("b", Format.asString(new ArrowType.Bool())); - assertEquals("tdD", Format.asString(new ArrowType.Date(DateUnit.DAY))); - assertEquals("tdm", Format.asString(new ArrowType.Date(DateUnit.MILLISECOND))); - assertEquals("d:1,1", Format.asString(new ArrowType.Decimal(1, 1, 128))); - assertEquals("d:1,1,1", Format.asString(new ArrowType.Decimal(1, 1, 1))); - assertEquals("d:9,1,1", Format.asString(new ArrowType.Decimal(9, 1, 1))); - assertEquals("tDs", Format.asString(new ArrowType.Duration(TimeUnit.SECOND))); - assertEquals("tDm", Format.asString(new ArrowType.Duration(TimeUnit.MILLISECOND))); - assertEquals("tDu", Format.asString(new ArrowType.Duration(TimeUnit.MICROSECOND))); - assertEquals("tDn", Format.asString(new ArrowType.Duration(TimeUnit.NANOSECOND))); - assertEquals("w:1", Format.asString(new ArrowType.FixedSizeBinary(1))); - assertEquals("+w:3", Format.asString(new ArrowType.FixedSizeList(3))); - assertEquals("e", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF))); - assertEquals("f", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE))); - assertEquals("g", Format.asString(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))); - assertEquals("c", Format.asString(new ArrowType.Int(Byte.SIZE, true))); - assertEquals("C", Format.asString(new ArrowType.Int(Byte.SIZE, false))); - assertEquals("s", Format.asString(new ArrowType.Int(Short.SIZE, true))); - assertEquals("S", Format.asString(new ArrowType.Int(Short.SIZE, false))); - assertEquals("i", Format.asString(new ArrowType.Int(Integer.SIZE, true))); - assertEquals("I", Format.asString(new ArrowType.Int(Integer.SIZE, false))); - assertEquals("l", Format.asString(new ArrowType.Int(Long.SIZE, true))); - assertEquals("L", Format.asString(new ArrowType.Int(Long.SIZE, false))); - assertEquals("tiD", Format.asString(new ArrowType.Interval(IntervalUnit.DAY_TIME))); - assertEquals("tiM", Format.asString(new ArrowType.Interval(IntervalUnit.YEAR_MONTH))); - assertEquals("Z", Format.asString(new ArrowType.LargeBinary())); - assertEquals("+L", Format.asString(new ArrowType.LargeList())); - assertEquals("U", Format.asString(new ArrowType.LargeUtf8())); - assertEquals("+l", Format.asString(new ArrowType.List())); - assertEquals("+m", Format.asString(new ArrowType.Map(true))); - assertEquals("n", Format.asString(new ArrowType.Null())); - assertEquals("+s", Format.asString(new ArrowType.Struct())); - assertEquals("tts", Format.asString(new ArrowType.Time(TimeUnit.SECOND, 32))); - assertEquals("ttm", Format.asString(new ArrowType.Time(TimeUnit.MILLISECOND, 32))); - assertEquals("ttu", Format.asString(new ArrowType.Time(TimeUnit.MICROSECOND, 64))); - assertEquals("ttn", Format.asString(new ArrowType.Time(TimeUnit.NANOSECOND, 64))); - assertEquals( - "tss:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.SECOND, "Timezone"))); - assertEquals( - "tsm:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "Timezone"))); - assertEquals( - "tsu:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "Timezone"))); - assertEquals( - "tsn:Timezone", Format.asString(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "Timezone"))); - assertEquals( - "+us:1,1,1", Format.asString(new ArrowType.Union(UnionMode.Sparse, new int[] {1, 1, 1}))); - assertEquals( - "+ud:1,1,1", Format.asString(new ArrowType.Union(UnionMode.Dense, new int[] {1, 1, 1}))); - assertEquals("u", Format.asString(new ArrowType.Utf8())); - - assertThrows( - UnsupportedOperationException.class, () -> Format.asString(new ArrowType.Int(1, true))); - assertThrows( - UnsupportedOperationException.class, - () -> Format.asString(new ArrowType.Time(TimeUnit.SECOND, 1))); - assertThrows( - UnsupportedOperationException.class, - () -> Format.asString(new ArrowType.Time(TimeUnit.MILLISECOND, 64))); - } - - @Test - public void testAsType() - throws IllegalStateException, NumberFormatException, UnsupportedOperationException { - assertTrue(Format.asType("n", 0L) instanceof ArrowType.Null); - assertTrue(Format.asType("b", 0L) instanceof ArrowType.Bool); - assertEquals(new ArrowType.Int(Byte.SIZE, true), Format.asType("c", 0L)); - assertEquals(new ArrowType.Int(Byte.SIZE, false), Format.asType("C", 0L)); - assertEquals(new ArrowType.Int(Short.SIZE, true), Format.asType("s", 0L)); - assertEquals(new ArrowType.Int(Short.SIZE, false), Format.asType("S", 0L)); - assertEquals(new ArrowType.Int(Integer.SIZE, true), Format.asType("i", 0L)); - assertEquals(new ArrowType.Int(Integer.SIZE, false), Format.asType("I", 0L)); - assertEquals(new ArrowType.Int(Long.SIZE, true), Format.asType("l", 0L)); - assertEquals(new ArrowType.Int(Long.SIZE, false), Format.asType("L", 0L)); - assertEquals(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF), Format.asType("e", 0L)); - assertEquals( - new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), Format.asType("f", 0L)); - assertEquals( - new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), Format.asType("g", 0L)); - assertTrue(Format.asType("z", 0L) instanceof ArrowType.Binary); - assertTrue(Format.asType("Z", 0L) instanceof ArrowType.LargeBinary); - assertTrue(Format.asType("u", 0L) instanceof ArrowType.Utf8); - assertTrue(Format.asType("U", 0L) instanceof ArrowType.LargeUtf8); - assertEquals(new ArrowType.Date(DateUnit.DAY), Format.asType("tdD", 0L)); - assertEquals(new ArrowType.Date(DateUnit.MILLISECOND), Format.asType("tdm", 0L)); - assertEquals(new ArrowType.Time(TimeUnit.SECOND, Integer.SIZE), Format.asType("tts", 0L)); - assertEquals(new ArrowType.Time(TimeUnit.MILLISECOND, Integer.SIZE), Format.asType("ttm", 0L)); - assertEquals(new ArrowType.Time(TimeUnit.MICROSECOND, Long.SIZE), Format.asType("ttu", 0L)); - assertEquals(new ArrowType.Time(TimeUnit.NANOSECOND, Long.SIZE), Format.asType("ttn", 0L)); - assertEquals(new ArrowType.Duration(TimeUnit.SECOND), Format.asType("tDs", 0L)); - assertEquals(new ArrowType.Duration(TimeUnit.MILLISECOND), Format.asType("tDm", 0L)); - assertEquals(new ArrowType.Duration(TimeUnit.MICROSECOND), Format.asType("tDu", 0L)); - assertEquals(new ArrowType.Duration(TimeUnit.NANOSECOND), Format.asType("tDn", 0L)); - assertEquals(new ArrowType.Interval(IntervalUnit.YEAR_MONTH), Format.asType("tiM", 0L)); - assertEquals(new ArrowType.Interval(IntervalUnit.DAY_TIME), Format.asType("tiD", 0L)); - assertTrue(Format.asType("+l", 0L) instanceof ArrowType.List); - assertTrue(Format.asType("+L", 0L) instanceof ArrowType.LargeList); - assertTrue(Format.asType("+s", 0L) instanceof ArrowType.Struct); - assertEquals(new ArrowType.Map(false), Format.asType("+m", 0L)); - assertEquals(new ArrowType.Map(true), Format.asType("+m", Flags.ARROW_FLAG_MAP_KEYS_SORTED)); - assertEquals(new ArrowType.Decimal(1, 1, 128), Format.asType("d:1,1", 0L)); - assertEquals(new ArrowType.Decimal(1, 1, 1), Format.asType("d:1,1,1", 0L)); - assertEquals(new ArrowType.Decimal(9, 1, 1), Format.asType("d:9,1,1", 0L)); - assertEquals(new ArrowType.FixedSizeBinary(1), Format.asType("w:1", 0L)); - assertEquals(new ArrowType.FixedSizeList(3), Format.asType("+w:3", 0L)); - assertEquals( - new ArrowType.Union(UnionMode.Dense, new int[] {1, 1, 1}), Format.asType("+ud:1,1,1", 0L)); - assertEquals( - new ArrowType.Union(UnionMode.Sparse, new int[] {1, 1, 1}), Format.asType("+us:1,1,1", 0L)); - assertEquals( - new ArrowType.Timestamp(TimeUnit.SECOND, "Timezone"), Format.asType("tss:Timezone", 0L)); - assertEquals( - new ArrowType.Timestamp(TimeUnit.MILLISECOND, "Timezone"), - Format.asType("tsm:Timezone", 0L)); - assertEquals( - new ArrowType.Timestamp(TimeUnit.MICROSECOND, "Timezone"), - Format.asType("tsu:Timezone", 0L)); - assertEquals( - new ArrowType.Timestamp(TimeUnit.NANOSECOND, "Timezone"), - Format.asType("tsn:Timezone", 0L)); - - assertThrows(UnsupportedOperationException.class, () -> Format.asType("Format", 0L)); - assertThrows(UnsupportedOperationException.class, () -> Format.asType(":", 0L)); - assertThrows(NumberFormatException.class, () -> Format.asType("w:1,2,3", 0L)); - } -} diff --git a/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java b/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java deleted file mode 100644 index 18ca18c8d295e..0000000000000 --- a/java/c/src/test/java/org/apache/arrow/c/MetadataTest.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.junit.jupiter.api.Assertions.*; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.LargeMemoryUtil; -import org.apache.arrow.memory.util.MemoryUtil; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class MetadataTest { - private RootAllocator allocator = null; - - private static Map metadata; - private static byte[] encoded; - - @BeforeAll - static void beforeAll() { - metadata = new HashMap<>(); - metadata.put("key1", ""); - metadata.put("key2", "bar"); - - if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) { - encoded = - new byte[] { - 2, 0, 0, 0, 4, 0, 0, 0, 'k', 'e', 'y', '1', 0, 0, 0, 0, 4, 0, 0, 0, 'k', 'e', 'y', '2', - 3, 0, 0, 0, 'b', 'a', 'r' - }; - } else { - encoded = - new byte[] { - 0, 0, 0, 2, 0, 0, 0, 4, 'k', 'e', 'y', '1', 0, 0, 0, 0, 0, 0, 0, 4, 'k', 'e', 'y', '2', - 0, 0, 0, 3, 'b', 'a', 'r' - }; - } - } - - @BeforeEach - public void setUp() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - @Test - public void testEncode() { - try (ArrowBuf buffer = Metadata.encode(allocator, metadata)) { - int totalSize = LargeMemoryUtil.checkedCastToInt(buffer.readableBytes()); - ByteBuffer reader = - MemoryUtil.directBuffer(buffer.memoryAddress(), totalSize).order(ByteOrder.nativeOrder()); - byte[] result = new byte[totalSize]; - reader.get(result); - assertArrayEquals(encoded, result); - } - } - - @Test - public void testDecode() { - try (ArrowBuf buffer = allocator.buffer(31)) { - buffer.setBytes(0, encoded); - Map decoded = Metadata.decode(buffer.memoryAddress()); - assertNotNull(decoded); - assertEquals(metadata, decoded); - } - } - - @Test - public void testEncodeEmpty() { - Map metadata = new HashMap<>(); - try (ArrowBuf encoded = Metadata.encode(allocator, metadata)) { - assertNull(encoded); - } - } - - @Test - public void testDecodeEmpty() { - Map decoded = Metadata.decode(NativeUtil.NULL); - assertNull(decoded); - } -} diff --git a/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java b/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java deleted file mode 100644 index 10c25ec0d3aba..0000000000000 --- a/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.LargeMemoryUtil; -import org.apache.arrow.memory.util.MemoryUtil; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class NativeUtilTest { - - private RootAllocator allocator = null; - - @BeforeEach - public void setUp() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - @Test - public void testString() { - String javaString = "abc"; - byte[] nativeString = new byte[] {97, 98, 99, 0}; - try (ArrowBuf buffer = NativeUtil.toNativeString(allocator, javaString)) { - int totalSize = LargeMemoryUtil.checkedCastToInt(buffer.readableBytes()); - ByteBuffer reader = - MemoryUtil.directBuffer(buffer.memoryAddress(), totalSize).order(ByteOrder.nativeOrder()); - byte[] result = new byte[totalSize]; - reader.get(result); - assertArrayEquals(nativeString, result); - - assertEquals(javaString, NativeUtil.toJavaString(buffer.memoryAddress())); - } - } - - @Test - public void testToJavaArray() { - long[] nativeArray = new long[] {1, 2, 3}; - try (ArrowBuf buffer = allocator.buffer(Long.BYTES * ((long) nativeArray.length), null)) { - for (long value : nativeArray) { - buffer.writeLong(value); - } - long[] actual = NativeUtil.toJavaArray(buffer.memoryAddress(), nativeArray.length); - assertArrayEquals(nativeArray, actual); - } - } - - @Test - public void testToZeroJavaArray() { - long[] actual = NativeUtil.toJavaArray(0xDEADBEEF, 0); - assertEquals(0, actual.length); - } -} diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java deleted file mode 100644 index 67ab282de5a32..0000000000000 --- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java +++ /dev/null @@ -1,1095 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.Stream; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float2Vector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ViewVarBinaryVector; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.holders.IntervalDayHolder; -import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.table.Table; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class RoundtripTest { - private static final String EMPTY_SCHEMA_PATH = ""; - private RootAllocator allocator = null; - private BufferAllocator childAllocator = null; - - @BeforeEach - public void setUp() { - allocator = new RootAllocator(Long.MAX_VALUE); - childAllocator = allocator.newChildAllocator("child", 0, Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - childAllocator.close(); - allocator.close(); - } - - FieldVector vectorRoundtrip(FieldVector vector) { - // Consumer allocates empty structures - try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { - - // Producer creates structures from existing memory pointers - try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); - ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the C Data Interface structures - Data.exportVector(allocator, vector, null, arrowArray, arrowSchema); - } - - // Consumer imports vector - FieldVector imported = - Data.importVector(childAllocator, consumerArrowArray, consumerArrowSchema, null); - if (!(imported instanceof NullVector)) { - assertEquals(childAllocator, imported.getAllocator()); - } - - // Check that transfers work - TransferPair pair = imported.getTransferPair(allocator); - pair.transfer(); - return (FieldVector) pair.getTo(); - } - } - - VectorSchemaRoot vectorSchemaRootRoundtrip(VectorSchemaRoot root) { - // Consumer allocates empty structures - try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { - - // Producer creates structures from existing memory pointers - try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); - ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the C Data Interface structures - Data.exportVectorSchemaRoot(allocator, root, null, arrowArray, arrowSchema); - } - - // Consumer imports vector - return Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null); - } - } - - boolean roundtrip(FieldVector vector, Class clazz) { - List fieldBuffers = vector.getFieldBuffers(); - List orgRefCnts = - fieldBuffers.stream().map(buf -> buf.refCnt()).collect(Collectors.toList()); - long orgMemorySize = allocator.getAllocatedMemory(); - - boolean result = false; - try (ValueVector imported = vectorRoundtrip(vector)) { - assertTrue( - clazz.isInstance(imported), - String.format("expected %s but was %s", clazz, imported.getClass())); - result = VectorEqualsVisitor.vectorEquals(vector, imported); - } - - // Check that the ref counts of the buffers are the same after the roundtrip - IntStream.range(0, orgRefCnts.size()) - .forEach( - i -> { - ArrowBuf buf = fieldBuffers.get(i); - assertEquals(buf.refCnt(), orgRefCnts.get(i)); - }); - - assertEquals(orgMemorySize, allocator.getAllocatedMemory()); - - return result; - } - - @Test - public void testBitVector() { - BitVector imported; - - try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024); - vector.setValueCount(1024); - - // Put and set a few values - vector.set(0, 1); - vector.set(1, 0); - vector.set(100, 0); - vector.set(1022, 1); - - vector.setValueCount(1024); - - imported = (BitVector) vectorRoundtrip(vector); - assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); - } - - assertEquals(1, imported.get(0)); - assertEquals(0, imported.get(1)); - assertEquals(0, imported.get(100)); - assertEquals(1, imported.get(1022)); - assertEquals(1020, imported.getNullCount()); - imported.close(); - } - - @Test - public void testIntVector() { - IntVector imported; - try (final IntVector vector = new IntVector("v", allocator)) { - setVector(vector, 1, 2, 3, null); - imported = (IntVector) vectorRoundtrip(vector); - assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); - } - assertEquals(1, imported.get(0)); - assertEquals(2, imported.get(1)); - assertEquals(3, imported.get(2)); - assertEquals(4, imported.getValueCount()); - assertEquals(1, imported.getNullCount()); - imported.close(); - } - - @Test - public void testBigIntVector() { - BigIntVector imported; - try (final BigIntVector vector = new BigIntVector("v", allocator)) { - setVector(vector, 1L, 2L, 3L, null); - imported = (BigIntVector) vectorRoundtrip(vector); - assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); - } - assertEquals(1, imported.get(0)); - assertEquals(2, imported.get(1)); - assertEquals(3, imported.get(2)); - assertEquals(4, imported.getValueCount()); - assertEquals(1, imported.getNullCount()); - imported.close(); - } - - @Test - public void testDateDayVector() { - DateDayVector imported; - try (final DateDayVector vector = new DateDayVector("v", allocator)) { - setVector(vector, 1, 2, 3, null); - imported = (DateDayVector) vectorRoundtrip(vector); - assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); - } - assertEquals(1, imported.get(0)); - assertEquals(2, imported.get(1)); - assertEquals(3, imported.get(2)); - assertEquals(4, imported.getValueCount()); - assertEquals(1, imported.getNullCount()); - imported.close(); - } - - @Test - public void testDateMilliVector() { - DateMilliVector imported; - try (final DateMilliVector vector = new DateMilliVector("v", allocator)) { - setVector(vector, 1L, 2L, 3L, null); - imported = (DateMilliVector) vectorRoundtrip(vector); - assertTrue(VectorEqualsVisitor.vectorEquals(vector, imported)); - } - assertEquals(1, imported.get(0)); - assertEquals(2, imported.get(1)); - assertEquals(3, imported.get(2)); - assertEquals(4, imported.getValueCount()); - assertEquals(1, imported.getNullCount()); - imported.close(); - } - - @Test - public void testDecimalVector() { - try (final DecimalVector vector = new DecimalVector("v", allocator, 1, 1)) { - setVector(vector, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, DecimalVector.class)); - } - } - - @Test - public void testDurationVector() { - for (TimeUnit unit : TimeUnit.values()) { - final FieldType fieldType = FieldType.nullable(new ArrowType.Duration(unit)); - try (final DurationVector vector = new DurationVector("v", fieldType, allocator)) { - setVector(vector, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, DurationVector.class)); - } - } - } - - @Test - public void testZeroVectorEquals() { - try (final ZeroVector vector = new ZeroVector()) { - // A ZeroVector is imported as a NullVector - assertTrue(roundtrip(vector, NullVector.class)); - } - } - - @Test - public void testFixedSizeBinaryVector() { - try (final FixedSizeBinaryVector vector = new FixedSizeBinaryVector("v", allocator, 2)) { - setVector(vector, new byte[] {0b0000, 0b0001}, new byte[] {0b0010, 0b0011}); - assertTrue(roundtrip(vector, FixedSizeBinaryVector.class)); - } - } - - @Test - public void testFloat2Vector() { - try (final Float2Vector vector = new Float2Vector("v", allocator)) { - setVector(vector, 0.1f, 0.2f, 0.3f, null); - assertTrue(roundtrip(vector, Float2Vector.class)); - } - } - - @Test - public void testFloat4Vector() { - try (final Float4Vector vector = new Float4Vector("v", allocator)) { - setVector(vector, 0.1f, 0.2f, 0.3f, null); - assertTrue(roundtrip(vector, Float4Vector.class)); - } - } - - @Test - public void testFloat8Vector() { - try (final Float8Vector vector = new Float8Vector("v", allocator)) { - setVector(vector, 0.1d, 0.2d, 0.3d, null); - assertTrue(roundtrip(vector, Float8Vector.class)); - } - } - - @Test - public void testIntervalDayVector() { - try (final IntervalDayVector vector = new IntervalDayVector("v", allocator)) { - IntervalDayHolder value = new IntervalDayHolder(); - value.days = 5; - value.milliseconds = 100; - setVector(vector, value, null); - assertTrue(roundtrip(vector, IntervalDayVector.class)); - } - } - - @Test - public void testIntervalYearVector() { - try (final IntervalYearVector vector = new IntervalYearVector("v", allocator)) { - setVector(vector, 1990, 2000, 2010, 2020, null); - assertTrue(roundtrip(vector, IntervalYearVector.class)); - } - } - - @Test - public void testSmallIntVector() { - try (final SmallIntVector vector = new SmallIntVector("v", allocator)) { - setVector(vector, (short) 0, (short) 256, null); - assertTrue(roundtrip(vector, SmallIntVector.class)); - } - } - - @Test - public void testTimeMicroVector() { - try (final TimeMicroVector vector = new TimeMicroVector("v", allocator)) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeMicroVector.class)); - } - } - - @Test - public void testTimeMilliVector() { - try (final TimeMilliVector vector = new TimeMilliVector("v", allocator)) { - setVector(vector, 0, 1, 2, 3, null); - assertTrue(roundtrip(vector, TimeMilliVector.class)); - } - } - - @Test - public void testTimeNanoVector() { - try (final TimeNanoVector vector = new TimeNanoVector("v", allocator)) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeNanoVector.class)); - } - } - - @Test - public void testTimeSecVector() { - try (final TimeSecVector vector = new TimeSecVector("v", allocator)) { - setVector(vector, 0, 1, 2, 3, null); - assertTrue(roundtrip(vector, TimeSecVector.class)); - } - } - - @Test - public void testTimeStampMicroTZVector() { - try (final TimeStampMicroTZVector vector = new TimeStampMicroTZVector("v", allocator, "UTC")) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeStampMicroTZVector.class)); - } - } - - @Test - public void testTimeStampMicroVector() { - try (final TimeStampMicroVector vector = new TimeStampMicroVector("v", allocator)) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeStampMicroVector.class)); - } - } - - @Test - public void testTimeStampMilliTZVector() { - try (final TimeStampMilliTZVector vector = new TimeStampMilliTZVector("v", allocator, "UTC")) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeStampMilliTZVector.class)); - } - } - - @Test - public void testTimeStampMilliVector() { - try (final TimeStampMilliVector vector = new TimeStampMilliVector("v", allocator)) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeStampMilliVector.class)); - } - } - - @Test - public void testTimeTimeStampNanoTZVector() { - try (final TimeStampNanoTZVector vector = new TimeStampNanoTZVector("v", allocator, "UTC")) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeStampNanoTZVector.class)); - } - } - - @Test - public void testTimeStampNanoVector() { - try (final TimeStampNanoVector vector = new TimeStampNanoVector("v", allocator)) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeStampNanoVector.class)); - } - } - - @Test - public void testTimeStampSecTZVector() { - try (final TimeStampSecTZVector vector = new TimeStampSecTZVector("v", allocator, "UTC")) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeStampSecTZVector.class)); - } - } - - @Test - public void testTimeStampSecVector() { - try (final TimeStampSecVector vector = new TimeStampSecVector("v", allocator)) { - setVector(vector, 0L, 1L, 2L, 3L, null); - assertTrue(roundtrip(vector, TimeStampSecVector.class)); - } - } - - @Test - public void testTinyIntVector() { - try (final TinyIntVector vector = new TinyIntVector("v", allocator)) { - setVector(vector, (byte) 0, (byte) 1, null); - assertTrue(roundtrip(vector, TinyIntVector.class)); - } - } - - @Test - public void testUInt1Vector() { - try (final UInt1Vector vector = new UInt1Vector("v", allocator)) { - setVector(vector, (byte) 0, (byte) 1, null); - assertTrue(roundtrip(vector, UInt1Vector.class)); - } - } - - @Test - public void testUInt2Vector() { - try (final UInt2Vector vector = new UInt2Vector("v", allocator)) { - setVector(vector, '0', '1', null); - assertTrue(roundtrip(vector, UInt2Vector.class)); - } - } - - @Test - public void testUInt4Vector() { - try (final UInt4Vector vector = new UInt4Vector("v", allocator)) { - setVector(vector, 0, 1, null); - assertTrue(roundtrip(vector, UInt4Vector.class)); - } - } - - @Test - public void testUInt8Vector() { - try (final UInt8Vector vector = new UInt8Vector("v", allocator)) { - setVector(vector, 0L, 1L, null); - assertTrue(roundtrip(vector, UInt8Vector.class)); - } - } - - @Test - public void testNullVector() { - try (final NullVector vector = new NullVector("v", 1024)) { - assertTrue(roundtrip(vector, NullVector.class)); - } - } - - @Test - public void testVarBinaryVector() { - try (final VarBinaryVector vector = new VarBinaryVector("v", allocator)) { - setVector( - vector, - "abc".getBytes(StandardCharsets.UTF_8), - "def".getBytes(StandardCharsets.UTF_8), - null); - assertTrue(roundtrip(vector, VarBinaryVector.class)); - } - } - - @Test - public void testViewVector() { - // ViewVarCharVector with short strings - try (final ViewVarCharVector vector = new ViewVarCharVector("v1", allocator)) { - setVector( - vector, - "abc".getBytes(StandardCharsets.UTF_8), - "def".getBytes(StandardCharsets.UTF_8), - null); - assertTrue(roundtrip(vector, ViewVarCharVector.class)); - } - - // ViewVarCharVector with long strings - try (final ViewVarCharVector vector = new ViewVarCharVector("v2", allocator)) { - setVector( - vector, - "01234567890123".getBytes(StandardCharsets.UTF_8), - "01234567890123567".getBytes(StandardCharsets.UTF_8), - null); - assertTrue(roundtrip(vector, ViewVarCharVector.class)); - } - - // ViewVarBinaryVector with short values - try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v3", allocator)) { - setVector( - vector, - "abc".getBytes(StandardCharsets.UTF_8), - "def".getBytes(StandardCharsets.UTF_8), - null); - assertTrue(roundtrip(vector, ViewVarBinaryVector.class)); - } - - // ViewVarBinaryVector with long values - try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v4", allocator)) { - setVector( - vector, - "01234567890123".getBytes(StandardCharsets.UTF_8), - "01234567890123567".getBytes(StandardCharsets.UTF_8), - null); - assertTrue(roundtrip(vector, ViewVarBinaryVector.class)); - } - - List byteArrayList = new ArrayList<>(); - for (int i = 1; i <= 500; i++) { - StringBuilder sb = new StringBuilder(i); - for (int j = 0; j < i; j++) { - sb.append(j); // or any other character - } - byte[] bytes = sb.toString().getBytes(StandardCharsets.UTF_8); - byteArrayList.add(bytes); - } - - // ViewVarCharVector with short long strings with multiple data buffers - try (final ViewVarCharVector vector = new ViewVarCharVector("v5", allocator)) { - setVector(vector, byteArrayList.toArray(new byte[0][])); - assertTrue(roundtrip(vector, ViewVarCharVector.class)); - } - - // ViewVarBinaryVector with short long strings with multiple data buffers - try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v6", allocator)) { - setVector(vector, byteArrayList.toArray(new byte[0][])); - assertTrue(roundtrip(vector, ViewVarBinaryVector.class)); - } - } - - @Test - public void testVarCharVector() { - try (final VarCharVector vector = new VarCharVector("v", allocator)) { - setVector(vector, "abc", "def", null); - assertTrue(roundtrip(vector, VarCharVector.class)); - } - } - - @Test - public void testLargeVarBinaryVector() { - try (final LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) { - vector.allocateNew(5, 1); - - NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder(); - nullHolder.isSet = 0; - - NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder(); - binHolder.isSet = 1; - - String str = "hello world"; - try (ArrowBuf buf = allocator.buffer(16)) { - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - binHolder.start = 0; - binHolder.end = str.length(); - binHolder.buffer = buf; - vector.setSafe(0, binHolder); - vector.setSafe(1, nullHolder); - - assertTrue(roundtrip(vector, LargeVarBinaryVector.class)); - } - } - } - - @Test - public void testLargeVarCharVector() { - try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) { - setVector(vector, "abc", "def", null); - assertTrue(roundtrip(vector, LargeVarCharVector.class)); - } - } - - @Test - public void testListVector() { - try (final ListVector vector = ListVector.empty("v", allocator)) { - setVector( - vector, - Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()), - Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()), - new ArrayList()); - assertTrue(roundtrip(vector, ListVector.class)); - } - } - - @Test - public void testEmptyListVector() { - try (final ListVector vector = ListVector.empty("v", allocator)) { - setVector(vector, new ArrayList()); - assertTrue(roundtrip(vector, ListVector.class)); - } - } - - @Test - public void testLargeListVector() { - try (final LargeListVector vector = LargeListVector.empty("v", allocator)) { - setVector( - vector, - Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()), - Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()), - new ArrayList()); - assertTrue(roundtrip(vector, LargeListVector.class)); - } - } - - @Test - public void testFixedSizeListVector() { - try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 2, allocator)) { - setVector( - vector, - Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()), - Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList())); - assertTrue(roundtrip(vector, FixedSizeListVector.class)); - } - } - - @Test - public void testListViewVector() { - try (final ListViewVector vector = ListViewVector.empty("v", allocator)) { - setVector( - vector, - Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()), - Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()), - new ArrayList()); - assertTrue(roundtrip(vector, ListViewVector.class)); - } - } - - @Test - public void testEmptyListViewVector() { - try (final ListViewVector vector = ListViewVector.empty("v", allocator)) { - setVector(vector, new ArrayList()); - assertTrue(roundtrip(vector, ListViewVector.class)); - } - } - - @Test - public void testLargeListViewVector() { - try (final LargeListViewVector vector = LargeListViewVector.empty("v", allocator)) { - setVector( - vector, - Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()), - Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()), - new ArrayList()); - assertTrue(roundtrip(vector, LargeListViewVector.class)); - } - } - - @Test - public void testEmptyLargeListViewVector() { - try (final LargeListViewVector vector = LargeListViewVector.empty("v", allocator)) { - setVector(vector, new ArrayList()); - assertTrue(roundtrip(vector, LargeListViewVector.class)); - } - } - - @Test - public void testMapVector() { - int count = 5; - try (final MapVector vector = MapVector.empty("v", allocator, false)) { - vector.allocateNew(); - UnionMapWriter mapWriter = vector.getWriter(); - for (int i = 0; i < count; i++) { - mapWriter.startMap(); - for (int j = 0; j < i + 1; j++) { - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(j); - mapWriter.value().integer().writeInt(j); - mapWriter.endEntry(); - } - mapWriter.endMap(); - } - mapWriter.setValueCount(count); - - assertTrue(roundtrip(vector, MapVector.class)); - } - } - - @Test - public void testUnionVector() { - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 100; - uInt4Holder.isSet = 1; - - try (UnionVector vector = UnionVector.empty("v", allocator)) { - vector.allocateNew(); - - // write some data - vector.setType(0, MinorType.UINT4); - vector.setSafe(0, uInt4Holder); - vector.setType(2, MinorType.UINT4); - vector.setSafe(2, uInt4Holder); - vector.setValueCount(4); - - assertTrue(roundtrip(vector, UnionVector.class)); - } - } - - @Test - public void testStructVector() { - try (final StructVector vector = StructVector.empty("v", allocator)) { - Map> data = new HashMap<>(); - data.put("col_1", Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList())); - data.put("col_2", Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList())); - setVector(vector, data); - assertTrue(roundtrip(vector, StructVector.class)); - } - } - - @Test - public void testRunEndEncodedVector() { - try (final RunEndEncodedVector vector = RunEndEncodedVector.empty("v", allocator)) { - setVector(vector, List.of(1, 3), List.of(1, 2)); - assertTrue(roundtrip(vector, RunEndEncodedVector.class)); - } - } - - @Test - public void testEmptyRunEndEncodedVector() { - try (final RunEndEncodedVector vector = RunEndEncodedVector.empty("v", allocator)) { - setVector(vector, List.of(), List.of()); - assertTrue(roundtrip(vector, RunEndEncodedVector.class)); - } - } - - @Test - public void testExtensionTypeVector() { - ExtensionTypeRegistry.register(new UuidType()); - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new UuidType()))); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - // Fill with data - UUID u1 = UUID.randomUUID(); - UUID u2 = UUID.randomUUID(); - UuidVector vector = (UuidVector) root.getVector("a"); - vector.setValueCount(2); - vector.set(0, u1); - vector.set(1, u2); - root.setRowCount(2); - - // Roundtrip (export + import) - VectorSchemaRoot importedRoot = vectorSchemaRootRoundtrip(root); - - // Verify correctness - assertEquals(root.getSchema(), importedRoot.getSchema()); - - final Field field = importedRoot.getSchema().getFields().get(0); - final UuidType expectedType = new UuidType(); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); - - final UuidVector deserialized = (UuidVector) importedRoot.getFieldVectors().get(0); - assertEquals(vector.getValueCount(), deserialized.getValueCount()); - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(vector.isNull(i), deserialized.isNull(i)); - if (!vector.isNull(i)) { - assertEquals(vector.getObject(i), deserialized.getObject(i)); - } - } - - importedRoot.close(); - } - } - - @Test - public void testVectorSchemaRoot() { - VectorSchemaRoot imported; - - // Consumer allocates empty structures - try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { - try (VectorSchemaRoot vsr = createTestVSR()) { - // Producer creates structures from existing memory pointers - try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); - ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the C Data Interface structures - Data.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema); - } - } - // Consumer imports vector - imported = - Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null); - } - - // Ensure that imported VectorSchemaRoot is valid even after C Data Interface - // structures are closed - try (VectorSchemaRoot original = createTestVSR()) { - assertTrue(imported.equals(original)); - } - imported.close(); - } - - /** - * Tests exporting Table and importing back to VSR. Importing back to Table is not supported at - * present. - */ - @Test - public void testTable() { - VectorSchemaRoot imported; - - // Consumer allocates empty structures - try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { - try (VectorSchemaRoot vsr = createTestVSR(); - Table table = new Table(vsr)) { - // Producer creates structures from existing memory pointers - try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); - ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the C Data Interface structures - Data.exportTable(allocator, table, null, arrowArray, arrowSchema); - } - } - // Consumer imports vector - imported = - Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null); - } - - // Ensure that imported VectorSchemaRoot is valid even after C Data Interface - // structures are closed - try (VectorSchemaRoot original = createTestVSR()) { - assertTrue(imported.equals(original)); - } - imported.close(); - } - - @Test - public void testVectorSchemaRootWithDuplicatedFieldNames() { - VectorSchemaRoot imported; - - // Consumer allocates empty structures - try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { - - try (VectorSchemaRoot testVSR1 = createTestVSR(); - VectorSchemaRoot testVSR2 = createTestVSR()) { - // Merge two VSRs to produce duplicated field names - final VectorSchemaRoot vsr = - new VectorSchemaRoot( - Stream.concat( - testVSR1.getFieldVectors().stream(), testVSR2.getFieldVectors().stream()) - .collect(Collectors.toList())); - // Producer creates structures from existing memory pointers - try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); - ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the C Data Interface structures - Data.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema); - } - } - // Consumer imports vector - imported = - Data.importVectorSchemaRoot(allocator, consumerArrowArray, consumerArrowSchema, null); - } - - // Ensure that imported VectorSchemaRoot is valid even after C Data Interface - // structures are closed - try (VectorSchemaRoot testVSR1 = createTestVSR(); - VectorSchemaRoot testVSR2 = createTestVSR()) { - final VectorSchemaRoot original = - new VectorSchemaRoot( - Stream.concat( - testVSR1.getFieldVectors().stream(), testVSR2.getFieldVectors().stream()) - .collect(Collectors.toList())); - assertTrue(imported.equals(original)); - } - imported.close(); - } - - @Test - public void testSchema() { - Field decimalField = - new Field("inner1", FieldType.nullable(new ArrowType.Decimal(19, 4, 128)), null); - Field strField = new Field("inner2", FieldType.nullable(new ArrowType.Utf8()), null); - Field itemField = - new Field( - "col1", - FieldType.nullable(new ArrowType.Struct()), - Arrays.asList(decimalField, strField)); - Field intField = new Field("col2", FieldType.nullable(new ArrowType.Int(32, true)), null); - Schema schema = new Schema(Arrays.asList(itemField, intField)); - // Consumer allocates empty ArrowSchema - try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator)) { - // Producer fills the schema with data - try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress())) { - Data.exportSchema(allocator, schema, null, arrowSchema); - } - // Consumer imports schema - Schema importedSchema = Data.importSchema(allocator, consumerArrowSchema, null); - assertEquals(schema.toJson(), importedSchema.toJson()); - } - } - - @Test - public void testImportedBufferAsNioBuffer() { - IntVector imported; - try (final IntVector vector = new IntVector("v", allocator)) { - setVector(vector, 1, 2, 3, null); - imported = (IntVector) vectorRoundtrip(vector); - } - ArrowBuf dataBuffer = imported.getDataBuffer(); - ByteBuffer nioBuffer = dataBuffer.nioBuffer().asReadOnlyBuffer(); - nioBuffer.order(ByteOrder.nativeOrder()); - assertEquals(1, nioBuffer.getInt(0)); - assertEquals(2, nioBuffer.getInt(1 << 2)); - assertEquals(3, nioBuffer.getInt(2 << 2)); - imported.close(); - } - - @Test - public void testImportReleasedArray() { - // Consumer allocates empty structures - try (ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator); - ArrowArray consumerArrowArray = ArrowArray.allocateNew(allocator)) { - // Producer creates structures from existing memory pointers - try (ArrowSchema arrowSchema = ArrowSchema.wrap(consumerArrowSchema.memoryAddress()); - ArrowArray arrowArray = ArrowArray.wrap(consumerArrowArray.memoryAddress())) { - // Producer exports vector into the C Data Interface structures - try (final NullVector vector = new NullVector()) { - Data.exportVector(allocator, vector, null, arrowArray, arrowSchema); - } - } - - // Release array structure - consumerArrowArray.markReleased(); - - // Consumer tried to imports vector but fails - Exception e = - assertThrows( - IllegalStateException.class, - () -> { - Data.importVector(allocator, consumerArrowArray, consumerArrowSchema, null); - }); - - assertEquals("Cannot import released ArrowArray", e.getMessage()); - } - } - - private VectorSchemaRoot createTestVSR() { - BitVector bitVector = new BitVector("boolean", allocator); - - Map metadata = new HashMap<>(); - metadata.put("key", "value"); - FieldType fieldType = new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata); - VarCharVector varCharVector = new VarCharVector("varchar", fieldType, allocator); - - bitVector.allocateNew(); - varCharVector.allocateNew(); - for (int i = 0; i < 10; i++) { - bitVector.setSafe(i, i % 2 == 0 ? 0 : 1); - varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8)); - } - bitVector.setValueCount(10); - varCharVector.setValueCount(10); - - List fields = Arrays.asList(bitVector.getField(), varCharVector.getField()); - List vectors = Arrays.asList(bitVector, varCharVector); - - return new VectorSchemaRoot(fields, vectors); - } - - static class UuidType extends ExtensionType { - - @Override - public ArrowType storageType() { - return new ArrowType.FixedSizeBinary(16); - } - - @Override - public String extensionName() { - return "uuid"; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other instanceof UuidType; - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - if (!storageType.equals(storageType())) { - throw new UnsupportedOperationException( - "Cannot construct UuidType from underlying type " + storageType); - } - return new UuidType(); - } - - @Override - public String serialize() { - return ""; - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); - } - } - - static class UuidVector extends ExtensionTypeVector { - - public UuidVector( - String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - } - - @Override - public UUID getObject(int index) { - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - return new UUID(bb.getLong(), bb.getLong()); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - public void set(int index, UUID uuid) { - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - getUnderlyingVector().set(index, bb.array()); - } - } -} diff --git a/java/c/src/test/java/org/apache/arrow/c/StreamTest.java b/java/c/src/test/java/org/apache/arrow/c/StreamTest.java deleted file mode 100644 index 95363fcc3287f..0000000000000 --- a/java/c/src/test/java/org/apache/arrow/c/StreamTest.java +++ /dev/null @@ -1,435 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.c; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ViewVarBinaryVector; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -final class StreamTest { - private RootAllocator allocator = null; - - @BeforeEach - public void setUp() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - @Test - public void testRoundtripInts() throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("ints", new ArrowType.Int(32, true)))); - final List batches = new ArrayList<>(); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final IntVector ints = (IntVector) root.getVector(0); - VectorUnloader unloader = new VectorUnloader(root); - - root.allocateNew(); - ints.setSafe(0, 1); - ints.setSafe(1, 2); - ints.setSafe(2, 4); - ints.setSafe(3, 8); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - - root.allocateNew(); - ints.setSafe(0, 1); - ints.setNull(1); - ints.setSafe(2, 4); - ints.setNull(3); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - roundtrip(schema, batches); - } - } - - @Test - public void roundtripStrings() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ints", new ArrowType.Int(32, true)), - Field.nullable("strs", new ArrowType.Utf8()))); - final List batches = new ArrayList<>(); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final IntVector ints = (IntVector) root.getVector(0); - final VarCharVector strs = (VarCharVector) root.getVector(1); - VectorUnloader unloader = new VectorUnloader(root); - - root.allocateNew(); - ints.setSafe(0, 1); - ints.setSafe(1, 2); - ints.setSafe(2, 4); - ints.setSafe(3, 8); - strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8)); - strs.setSafe(1, "a".getBytes(StandardCharsets.UTF_8)); - strs.setSafe(2, "bc".getBytes(StandardCharsets.UTF_8)); - strs.setSafe(3, "defg".getBytes(StandardCharsets.UTF_8)); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - - root.allocateNew(); - ints.setSafe(0, 1); - ints.setNull(1); - ints.setSafe(2, 4); - ints.setNull(3); - strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8)); - strs.setNull(1); - strs.setSafe(2, "bc".getBytes(StandardCharsets.UTF_8)); - strs.setNull(3); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - roundtrip(schema, batches); - } - } - - @Test - public void roundtripStringViews() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ints", new ArrowType.Int(32, true)), - Field.nullable("string_views", new ArrowType.Utf8View()))); - final List batches = new ArrayList<>(); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final IntVector ints = (IntVector) root.getVector(0); - final ViewVarCharVector strs = (ViewVarCharVector) root.getVector(1); - VectorUnloader unloader = new VectorUnloader(root); - - root.allocateNew(); - ints.setSafe(0, 1); - ints.setSafe(1, 2); - ints.setSafe(2, 4); - ints.setSafe(3, 8); - strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8)); - strs.setSafe(1, "a".getBytes(StandardCharsets.UTF_8)); - strs.setSafe(2, "bc1234567890bc".getBytes(StandardCharsets.UTF_8)); - strs.setSafe(3, "defg1234567890defg".getBytes(StandardCharsets.UTF_8)); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - - root.allocateNew(); - ints.setSafe(0, 1); - ints.setNull(1); - ints.setSafe(2, 4); - ints.setNull(3); - strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8)); - strs.setNull(1); - strs.setSafe(2, "bc1234567890bc".getBytes(StandardCharsets.UTF_8)); - strs.setNull(3); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - roundtrip(schema, batches); - } - } - - @Test - public void roundtripBinaryViews() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ints", new ArrowType.Int(32, true)), - Field.nullable("binary_views", new ArrowType.BinaryView()))); - final List batches = new ArrayList<>(); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final IntVector ints = (IntVector) root.getVector(0); - final ViewVarBinaryVector strs = (ViewVarBinaryVector) root.getVector(1); - VectorUnloader unloader = new VectorUnloader(root); - - root.allocateNew(); - ints.setSafe(0, 1); - ints.setSafe(1, 2); - ints.setSafe(2, 4); - ints.setSafe(3, 8); - strs.setSafe(0, new byte[0]); - strs.setSafe(1, new byte[] {97}); - strs.setSafe(2, new byte[] {98, 99, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 98, 99}); - strs.setSafe( - 3, - new byte[] { - 100, 101, 102, 103, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 100, 101, 102, 103 - }); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - - root.allocateNew(); - ints.setSafe(0, 1); - ints.setNull(1); - ints.setSafe(2, 4); - ints.setNull(3); - strs.setSafe(0, new byte[0]); - strs.setNull(1); - strs.setSafe(2, new byte[] {98, 99, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 98, 99}); - strs.setNull(3); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - roundtrip(schema, batches); - } - } - - @Test - public void roundtripDictionary() throws Exception { - final ArrowType.Int indexType = new ArrowType.Int(32, true); - final DictionaryEncoding encoding = new DictionaryEncoding(0L, false, indexType); - final Schema schema = - new Schema( - Collections.singletonList( - new Field( - "dict", - new FieldType(/*nullable=*/ true, indexType, encoding), - Collections.emptyList()))); - final List batches = new ArrayList<>(); - try (final CDataDictionaryProvider provider = new CDataDictionaryProvider(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final VarCharVector dictionary = new VarCharVector("values", allocator); - dictionary.allocateNew(); - dictionary.setSafe(0, "foo".getBytes(StandardCharsets.UTF_8)); - dictionary.setSafe(1, "bar".getBytes(StandardCharsets.UTF_8)); - dictionary.setNull(2); - dictionary.setValueCount(3); - provider.put(new Dictionary(dictionary, encoding)); - final IntVector encoded = (IntVector) root.getVector(0); - VectorUnloader unloader = new VectorUnloader(root); - - root.allocateNew(); - encoded.setSafe(0, 0); - encoded.setSafe(1, 1); - encoded.setSafe(2, 0); - encoded.setSafe(3, 2); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - - root.allocateNew(); - encoded.setSafe(0, 0); - encoded.setNull(1); - encoded.setSafe(2, 1); - encoded.setNull(3); - root.setRowCount(4); - batches.add(unloader.getRecordBatch()); - roundtrip(schema, batches, provider); - } - } - - @Test - public void importReleasedStream() { - try (final ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator)) { - Exception e = - assertThrows( - IllegalStateException.class, () -> Data.importArrayStream(allocator, stream)); - assertThat(e).hasMessageContaining("Cannot import released ArrowArrayStream"); - } - } - - @Test - public void getNextError() throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("ints", new ArrowType.Int(32, true)))); - final List batches = new ArrayList<>(); - try (final ArrowReader source = - new InMemoryArrowReader( - allocator, schema, batches, new DictionaryProvider.MapDictionaryProvider()) { - @Override - public boolean loadNextBatch() throws IOException { - throw new IOException("Failed to load batch!"); - } - }; - final ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator)) { - Data.exportArrayStream(allocator, source, stream); - try (final ArrowReader reader = Data.importArrayStream(allocator, stream)) { - assertThat(reader.getVectorSchemaRoot().getSchema()).isEqualTo(schema); - final IOException e = assertThrows(IOException.class, reader::loadNextBatch); - assertThat(e).hasMessageContaining("Failed to load batch!"); - assertThat(e).hasMessageContaining("[errno "); - } - } - } - - @Test - public void getSchemaError() throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("ints", new ArrowType.Int(32, true)))); - final List batches = new ArrayList<>(); - try (final ArrowReader source = - new InMemoryArrowReader( - allocator, schema, batches, new DictionaryProvider.MapDictionaryProvider()) { - @Override - protected Schema readSchema() { - throw new IllegalArgumentException("Failed to read schema!"); - } - }; - final ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator)) { - Data.exportArrayStream(allocator, source, stream); - try (final ArrowReader reader = Data.importArrayStream(allocator, stream)) { - final IOException e = assertThrows(IOException.class, reader::getVectorSchemaRoot); - assertThat(e).hasMessageContaining("Failed to read schema!"); - assertThat(e).hasMessageContaining("[errno "); - } - } - } - - void roundtrip(Schema schema, List batches, DictionaryProvider provider) - throws Exception { - ArrowReader source = new InMemoryArrowReader(allocator, schema, batches, provider); - - try (final ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final VectorLoader loader = new VectorLoader(root); - Data.exportArrayStream(allocator, source, stream); - - try (final ArrowReader reader = Data.importArrayStream(allocator, stream)) { - assertThat(reader.getVectorSchemaRoot().getSchema()).isEqualTo(schema); - - for (ArrowRecordBatch batch : batches) { - assertThat(reader.loadNextBatch()).isTrue(); - loader.load(batch); - - assertThat(reader.getVectorSchemaRoot().getRowCount()).isEqualTo(root.getRowCount()); - - for (int i = 0; i < root.getFieldVectors().size(); i++) { - final FieldVector expected = root.getVector(i); - final FieldVector actual = reader.getVectorSchemaRoot().getVector(i); - assertVectorsEqual(expected, actual); - } - } - assertThat(reader.loadNextBatch()).isFalse(); - assertThat(reader.getDictionaryIds()).isEqualTo(provider.getDictionaryIds()); - for (Map.Entry entry : reader.getDictionaryVectors().entrySet()) { - final FieldVector expected = provider.lookup(entry.getKey()).getVector(); - final FieldVector actual = entry.getValue().getVector(); - assertVectorsEqual(expected, actual); - } - } - } - } - - void roundtrip(Schema schema, List batches) throws Exception { - roundtrip(schema, batches, new CDataDictionaryProvider()); - } - - private static void assertVectorsEqual(FieldVector expected, FieldVector actual) { - assertThat(actual.getField().getType()).isEqualTo(expected.getField().getType()); - assertThat(actual.getValueCount()).isEqualTo(expected.getValueCount()); - final Range range = new Range(/*leftStart=*/ 0, /*rightStart=*/ 0, expected.getValueCount()); - assertThat(new RangeEqualsVisitor(expected, actual).rangeEquals(range)) - .as("Vectors were not equal.\nExpected: %s\nGot: %s", expected, actual) - .isTrue(); - } - - /** An ArrowReader backed by a fixed list of batches. */ - static class InMemoryArrowReader extends ArrowReader { - private final Schema schema; - private final List batches; - private final DictionaryProvider provider; - private int nextBatch; - - InMemoryArrowReader( - BufferAllocator allocator, - Schema schema, - List batches, - DictionaryProvider provider) { - super(allocator); - this.schema = schema; - this.batches = batches; - this.provider = provider; - this.nextBatch = 0; - } - - @Override - public Dictionary lookup(long id) { - return provider.lookup(id); - } - - @Override - public Set getDictionaryIds() { - return provider.getDictionaryIds(); - } - - @Override - public Map getDictionaryVectors() { - return getDictionaryIds().stream() - .collect(Collectors.toMap(Function.identity(), this::lookup)); - } - - @Override - public boolean loadNextBatch() throws IOException { - if (nextBatch < batches.size()) { - VectorLoader loader = new VectorLoader(getVectorSchemaRoot()); - loader.load(batches.get(nextBatch++)); - return true; - } - return false; - } - - @Override - public long bytesRead() { - return 0; - } - - @Override - protected void closeReadSource() throws IOException { - try { - AutoCloseables.close(batches); - } catch (Exception e) { - throw new IOException(e); - } - } - - @Override - protected Schema readSchema() { - return schema; - } - } -} diff --git a/java/c/src/test/python/integration_tests.py b/java/c/src/test/python/integration_tests.py deleted file mode 100644 index 3e14be11c4644..0000000000000 --- a/java/c/src/test/python/integration_tests.py +++ /dev/null @@ -1,418 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import decimal -import gc -import os -import sys -import unittest -import xml.etree.ElementTree as ET - -import jpype -import pyarrow as pa -import pyarrow.ipc as ipc -from pyarrow.cffi import ffi - - -def setup_jvm(): - # This test requires Arrow Java to be built in the same source tree - try: - arrow_dir = os.environ["ARROW_SOURCE_DIR"] - except KeyError: - arrow_dir = os.path.join(os.path.dirname( - __file__), '..', '..', '..', '..', '..') - pom_path = os.path.join(arrow_dir, 'java', 'pom.xml') - tree = ET.parse(pom_path) - version = tree.getroot().find( - 'POM:version', - namespaces={ - 'POM': 'http://maven.apache.org/POM/4.0.0' - }).text - jar_path = os.path.join( - arrow_dir, 'java', 'tools', 'target', - 'arrow-tools-{}-jar-with-dependencies.jar'.format(version)) - jar_path = os.getenv("ARROW_TOOLS_JAR", jar_path) - jar_path += ":{}".format(os.path.join(arrow_dir, - "java", "c/target/arrow-c-data-{}.jar".format(version))) - kwargs = {} - # This will be the default behaviour in jpype 0.8+ - kwargs['convertStrings'] = False - - # For debugging purpose please uncomment the following, and include *jvm_args, before **kwargs - # in startJVM function call - # jvm_args = [ - # "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005" - # ] - - jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.class.path=" + jar_path, **kwargs) - - -class Bridge: - def __init__(self): - self.java_allocator = jpype.JPackage( - "org").apache.arrow.memory.RootAllocator(sys.maxsize) - self.java_c = jpype.JPackage("org").apache.arrow.c - - def java_to_python_field(self, jfield): - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - self.java_c.Data.exportField(self.java_allocator, jfield, None, - self.java_c.ArrowSchema.wrap(ptr_schema)) - return pa.Field._import_from_c(ptr_schema) - - def java_to_python_array(self, vector, dictionary_provider=None): - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowArray*") - ptr_array = int(ffi.cast("uintptr_t", c_array)) - self.java_c.Data.exportVector(self.java_allocator, vector, dictionary_provider, self.java_c.ArrowArray.wrap( - ptr_array), self.java_c.ArrowSchema.wrap(ptr_schema)) - return pa.Array._import_from_c(ptr_array, ptr_schema) - - def java_to_python_record_batch(self, root): - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowArray*") - ptr_array = int(ffi.cast("uintptr_t", c_array)) - self.java_c.Data.exportVectorSchemaRoot(self.java_allocator, root, None, self.java_c.ArrowArray.wrap( - ptr_array), self.java_c.ArrowSchema.wrap(ptr_schema)) - return pa.RecordBatch._import_from_c(ptr_array, ptr_schema) - - def java_to_python_reader(self, reader): - c_stream = ffi.new("struct ArrowArrayStream*") - ptr_stream = int(ffi.cast("uintptr_t", c_stream)) - self.java_c.Data.exportArrayStream(self.java_allocator, reader, - self.java_c.ArrowArrayStream.wrap(ptr_stream)) - return pa.RecordBatchReader._import_from_c(ptr_stream) - - def python_to_java_field(self, field): - c_schema = self.java_c.ArrowSchema.allocateNew(self.java_allocator) - field._export_to_c(c_schema.memoryAddress()) - return self.java_c.Data.importField(self.java_allocator, c_schema, None) - - def python_to_java_array(self, array, dictionary_provider=None): - c_schema = self.java_c.ArrowSchema.allocateNew(self.java_allocator) - c_array = self.java_c.ArrowArray.allocateNew(self.java_allocator) - array._export_to_c(c_array.memoryAddress(), c_schema.memoryAddress()) - return self.java_c.Data.importVector(self.java_allocator, c_array, c_schema, dictionary_provider) - - def python_to_java_record_batch(self, record_batch): - c_schema = self.java_c.ArrowSchema.allocateNew(self.java_allocator) - c_array = self.java_c.ArrowArray.allocateNew(self.java_allocator) - record_batch._export_to_c( - c_array.memoryAddress(), c_schema.memoryAddress()) - return self.java_c.Data.importVectorSchemaRoot(self.java_allocator, c_array, c_schema, None) - - def python_to_java_reader(self, reader): - c_stream = self.java_c.ArrowArrayStream.allocateNew(self.java_allocator) - reader._export_to_c(c_stream.memoryAddress()) - return self.java_c.Data.importArrayStream(self.java_allocator, c_stream) - - def close(self): - self.java_allocator.close() - - -class TestPythonIntegration(unittest.TestCase): - @classmethod - def setUpClass(cls) -> None: - setup_jvm() - - def setUp(self): - gc.collect() - self.old_allocated_python = pa.total_allocated_bytes() - self.bridge = Bridge() - - def tearDown(self): - self.bridge.close() - gc.collect() - diff_python = pa.total_allocated_bytes() - self.old_allocated_python - self.assertEqual( - pa.total_allocated_bytes(), self.old_allocated_python, - f"PyArrow memory was not adequately released: {diff_python} bytes lost") - - def round_trip_field(self, field_generator): - original_field = field_generator() - java_field = self.bridge.python_to_java_field(original_field) - del original_field - new_field = self.bridge.java_to_python_field(java_field) - del java_field - - expected = field_generator() - self.assertEqual(expected, new_field) - - def round_trip_array(self, array_generator, check_metadata=True): - original_arr = array_generator() - with self.bridge.java_c.CDataDictionaryProvider() as dictionary_provider, \ - self.bridge.python_to_java_array(original_arr, dictionary_provider) as vector: - del original_arr - new_array = self.bridge.java_to_python_array(vector, dictionary_provider) - - expected = array_generator() - - self.assertEqual(expected, new_array) - if check_metadata: - self.assertTrue(new_array.type.equals(expected.type, check_metadata=True)) - - def round_trip_record_batch(self, rb_generator): - original_rb = rb_generator() - with self.bridge.python_to_java_record_batch(original_rb) as root: - del original_rb - new_rb = self.bridge.java_to_python_record_batch(root) - - expected = rb_generator() - self.assertEqual(expected, new_rb) - - def round_trip_reader(self, schema, batches): - reader = pa.RecordBatchReader.from_batches(schema, batches) - - java_reader = self.bridge.python_to_java_reader(reader) - del reader - py_reader = self.bridge.java_to_python_reader(java_reader) - del java_reader - - actual = list(py_reader) - self.assertEqual(batches, actual) - - def test_string_array(self): - self.round_trip_array(lambda: pa.array([None, "a", "bb", "ccc"])) - - def test_stringview_array(self): - # with nulls short strings - self.round_trip_array(lambda: pa.array([None, "a", "bb", "c"], type=pa.string_view())) - # with nulls long and strings - self.round_trip_array(lambda: pa.array([None, "a", "bb"*10, "c"*13], type=pa.string_view())) - # without nulls short strings - self.round_trip_array(lambda: pa.array(["a", "bb", "c"], type=pa.string_view())) - # without nulls long and strings - self.round_trip_array(lambda: pa.array(["a", "bb"*10, "c"*13], type=pa.string_view())) - # with multiple data buffers - arr1 = pa.array(["a", "bb", "c"], type=pa.string_view()) - arr2 = pa.array(["b", "ee" * 10, "f" * 20], type=pa.string_view()) - arr3 = pa.array(["c", "abc" * 20, "efg" * 30], type=pa.string_view()) - arr4 = pa.array(["d", "abcd" * 100, "efgh" * 200], type=pa.string_view()) - self.round_trip_array(lambda: pa.concat_arrays([arr1, arr2, arr3, arr4])) - # empty strings - self.round_trip_array(lambda: pa.array(["", "bb" * 10, "c", "", "d", ""], type=pa.string_view())) - # null value variations - self.round_trip_array(lambda: pa.array(["bb" * 10, None, "", "d", None], type=pa.string_view())) - # empty array - self.round_trip_array(lambda: pa.array([], type=pa.string_view())) - # all null array - self.round_trip_array(lambda: pa.array([None, None, None], type=pa.string_view())) - - def test_binaryview_array(self): - # with nulls short binary values - self.round_trip_array(lambda: pa.array([None, bytes([97]), bytes([98, 98]), bytes([99])], type=pa.binary_view())) - # with nulls long binary values - self.round_trip_array(lambda: pa.array([None, bytes([97]), bytes([98, 98] * 10), bytes([99] * 13)], type=pa.binary_view())) - # without nulls short binary values - self.round_trip_array(lambda: pa.array([bytes([97]), bytes([98, 98]), bytes([99])], type=pa.binary_view())) - # without nulls long binary values - self.round_trip_array(lambda: pa.array([bytes([97]), bytes([98, 98] * 10), bytes([99] * 13)], type=pa.binary_view())) - # with multiple data buffers - arr1 = pa.array([bytes([97]), bytes([98, 98]), bytes([99])], type=pa.binary_view()) - arr2 = pa.array([bytes([98]), bytes([98, 98] * 10), bytes([99] * 13)], type=pa.binary_view()) - arr3 = pa.array([bytes([99]), bytes([98, 100] * 100), bytes([99, 100]) * 30], type=pa.binary_view()) - arr4 = pa.array([bytes([100]), bytes([98, 100, 101] * 200), bytes([98, 99]) * 300], type=pa.binary_view()) - self.round_trip_array(lambda: pa.concat_arrays([arr1, arr2, arr3, arr4])) - # empty binary values - self.round_trip_array(lambda: pa.array([bytes([]), bytes([97, 97]) * 10, bytes([98]), bytes([]), bytes([97]), bytes([])], - type=pa.binary_view())) - # null value variations - self.round_trip_array(lambda: pa.array([bytes([97, 97]) * 10, None, bytes([]), bytes([99]), None], type=pa.binary_view())) - # empty array - self.round_trip_array(lambda: pa.array([], type=pa.binary_view())) - # all null array - self.round_trip_array(lambda: pa.array([None, None, None], type=pa.binary_view())) - - def test_decimal_array(self): - data = [ - round(decimal.Decimal(722.82), 2), - round(decimal.Decimal(-934.11), 2), - None, - ] - self.round_trip_array(lambda: pa.array(data, pa.decimal128(5, 2))) - - def test_int_array(self): - self.round_trip_array(lambda: pa.array([1, 2, 3], type=pa.int32())) - - def test_list_array(self): - self.round_trip_array(lambda: pa.array( - [[], [0], [1, 2], [4, 5, 6]], pa.list_(pa.int64()) - # disabled check_metadata since the list internal field name ("item") - # is not preserved during round trips (it becomes "$data$"). - ), check_metadata=False) - - def test_empty_list_array(self): - """Validates GH-37056 fix. - Empty list of int32 produces a vector with empty child data buffer, however with non-zero capacity. - Using streaming forces the c-data array which represent the child data buffer to be NULL (pointer is 0). - On Java side, an attempt to import such array triggered an exception described in GH-37056. - """ - with pa.BufferOutputStream() as bos: - schema = pa.schema([pa.field("f0", pa.list_(pa.int32()), True)]) - with ipc.new_stream(bos, schema) as writer: - src = pa.RecordBatch.from_arrays([pa.array([[]])], schema=schema) - writer.write(src) - data_bytes = bos.getvalue() - - def recreate_batch(): - with pa.input_stream(data_bytes) as ios: - with ipc.open_stream(ios) as reader: - return reader.read_next_batch() - - self.round_trip_record_batch(recreate_batch) - - def test_struct_array(self): - fields = [ - ("f1", pa.int32()), - ("f2", pa.string()), - ] - data = [ - {"f1": 1, "f2": "a"}, - None, - {"f1": 3, "f2": None}, - {"f1": None, "f2": "d"}, - {"f1": None, "f2": None}, - ] - self.round_trip_array(lambda: pa.array(data, type=pa.struct(fields))) - - def test_dict(self): - self.round_trip_array( - lambda: pa.array(["a", "b", None, "d"], pa.dictionary(pa.int64(), pa.utf8()))) - - def test_map(self): - offsets = [0, None, 2, 6] - pykeys = [b"a", b"b", b"c", b"d", b"e", b"f"] - pyitems = [1, 2, 3, None, 4, 5] - keys = pa.array(pykeys, type="binary") - items = pa.array(pyitems, type="i4") - self.round_trip_array( - lambda: pa.MapArray.from_arrays(offsets, keys, items)) - - def test_field(self): - self.round_trip_field(lambda: pa.field("aa", pa.bool_())) - - def test_field_nested(self): - self.round_trip_field(lambda: pa.field( - "test", pa.list_(pa.int32()), nullable=True)) - - def test_field_metadata(self): - self.round_trip_field(lambda: pa.field("aa", pa.bool_(), {"a": "b"})) - - def test_record_batch_with_list(self): - data = [ - pa.array([[1], [2], [3], [4, 5, 6]]), - pa.array([1, 2, 3, 4]), - pa.array(['foo', 'bar', 'baz', None]), - pa.array([True, None, False, True]) - ] - self.round_trip_record_batch( - lambda: pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2', 'f3'])) - - def test_reader_roundtrip(self): - schema = pa.schema([("ints", pa.int64()), ("strs", pa.string())]) - data = [ - pa.record_batch([[1, 2, 3, None], - ["a", "bc", None, ""]], - schema=schema), - pa.record_batch([[None, 4, 5, 6], - [None, "", "def", "g"]], - schema=schema), - ] - self.round_trip_reader(schema, data) - - def test_reader_complex_roundtrip(self): - schema = pa.schema([ - ("str_dict", pa.dictionary(pa.int8(), pa.string())), - ("int_list", pa.list_(pa.int64())), - ]) - dictionary = pa.array(["a", "bc", None]) - data = [ - pa.record_batch([pa.DictionaryArray.from_arrays([0, 2], dictionary), - [[1, 2, 3], None]], - schema=schema), - pa.record_batch([pa.DictionaryArray.from_arrays([None, 1], dictionary), - [[], [4]]], - schema=schema), - ] - self.round_trip_reader(schema, data) - - def test_listview_array(self): - self.round_trip_array(lambda: pa.array( - [[], [0], [1, 2], [4, 5, 6]], pa.list_view(pa.int64()) - # disabled check_metadata since in Java API the listview - # internal field name ("item") is not preserved - # during round trips (it becomes "$data$"). - ), check_metadata=False) - - def test_empty_listview_array(self): - with pa.BufferOutputStream() as bos: - schema = pa.schema([pa.field("f0", pa.list_view(pa.int32()), True)]) - with ipc.new_stream(bos, schema) as writer: - src = pa.RecordBatch.from_arrays( - [pa.array([[]], pa.list_view(pa.int32()))], schema=schema) - writer.write(src) - data_bytes = bos.getvalue() - - def recreate_batch(): - with pa.input_stream(data_bytes) as ios: - with ipc.open_stream(ios) as reader: - return reader.read_next_batch() - - self.round_trip_record_batch(recreate_batch) - - def test_largelistview_array(self): - self.round_trip_array(lambda: pa.array( - [[], [0], [1, 2], [4, 5, 6]], pa.large_list_view(pa.int64()) - # disabled check_metadata since in Java API the listview - # internal field name ("item") is not preserved - # during round trips (it becomes "$data$"). - ), check_metadata=False) - - def test_empty_largelistview_array(self): - with pa.BufferOutputStream() as bos: - schema = pa.schema([pa.field("f0", pa.large_list_view(pa.int32()), True)]) - with ipc.new_stream(bos, schema) as writer: - src = pa.RecordBatch.from_arrays( - [pa.array([[]], pa.large_list_view(pa.int32()))], schema=schema) - writer.write(src) - data_bytes = bos.getvalue() - - def recreate_batch(): - with pa.input_stream(data_bytes) as ios: - with ipc.open_stream(ios) as reader: - return reader.read_next_batch() - - self.round_trip_record_batch(recreate_batch) - - def test_runendencoded_array(self): - # empty vector - self.round_trip_array(lambda: pa.RunEndEncodedArray.from_arrays([], [], pa.run_end_encoded(pa.int64(), pa.int64()))) - - # constant null vector - self.round_trip_array(lambda: pa.RunEndEncodedArray.from_arrays([10], [None])) - # constant int vector - self.round_trip_array(lambda: pa.RunEndEncodedArray.from_arrays([10], [10])) - - # run end int vector - self.round_trip_array(lambda: pa.RunEndEncodedArray.from_arrays([3, 5, 10, 12, 19], [1, 2, 1, None, 3])) - # run end string vector - self.round_trip_array(lambda: pa.RunEndEncodedArray.from_arrays([3, 5, 10, 12, 19], ["1", "2", "1", None, "3"])) - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/java/compression/pom.xml b/java/compression/pom.xml deleted file mode 100644 index 8cc4909034abe..0000000000000 --- a/java/compression/pom.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - arrow-compression - Arrow Compression - (Experimental/Contrib) A library for working with the compression/decompression of Arrow data. - - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - arrow-memory-unsafe - test - - - org.immutables - value-annotations - - - org.apache.commons - commons-compress - 1.27.1 - - - com.github.luben - zstd-jni - 1.5.6-7 - - - diff --git a/java/compression/src/main/java/module-info.java b/java/compression/src/main/java/module-info.java deleted file mode 100644 index 113a1dba9d45f..0000000000000 --- a/java/compression/src/main/java/module-info.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.vector.compression.CompressionCodec; - -module org.apache.arrow.compression { - exports org.apache.arrow.compression; - - requires com.github.luben.zstd_jni; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; - requires org.apache.commons.compress; - - // Also defined under META-INF/services to support non-modular applications - provides CompressionCodec.Factory with - org.apache.arrow.compression.CommonsCompressionFactory; -} diff --git a/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java b/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java deleted file mode 100644 index f15c139df1d0c..0000000000000 --- a/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.compression; - -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; - -/** - * Default implementation of factory supported LZ4 and ZSTD compression. - * - *

    // TODO(ARROW-12115): Rename this class. - */ -public class CommonsCompressionFactory implements CompressionCodec.Factory { - - public static final CommonsCompressionFactory INSTANCE = new CommonsCompressionFactory(); - - @Override - public CompressionCodec createCodec(CompressionUtil.CodecType codecType) { - switch (codecType) { - case LZ4_FRAME: - return new Lz4CompressionCodec(); - case ZSTD: - return new ZstdCompressionCodec(); - default: - throw new IllegalArgumentException("Compression type not supported: " + codecType); - } - } - - @Override - public CompressionCodec createCodec(CompressionUtil.CodecType codecType, int compressionLevel) { - switch (codecType) { - case LZ4_FRAME: - return new Lz4CompressionCodec(); - case ZSTD: - return new ZstdCompressionCodec(compressionLevel); - default: - throw new IllegalArgumentException("Compression type not supported: " + codecType); - } - } -} diff --git a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java deleted file mode 100644 index edd52604bc757..0000000000000 --- a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.compression; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compression.AbstractCompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorInputStream; -import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream; -import org.apache.commons.compress.utils.IOUtils; - -/** Compression codec for the LZ4 algorithm. */ -public class Lz4CompressionCodec extends AbstractCompressionCodec { - - @Override - protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) { - Preconditions.checkArgument( - uncompressedBuffer.writerIndex() <= Integer.MAX_VALUE, - "The uncompressed buffer size exceeds the integer limit %s.", - Integer.MAX_VALUE); - - byte[] inBytes = new byte[(int) uncompressedBuffer.writerIndex()]; - uncompressedBuffer.getBytes(/*index=*/ 0, inBytes); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (InputStream in = new ByteArrayInputStream(inBytes); - OutputStream out = new FramedLZ4CompressorOutputStream(baos)) { - IOUtils.copy(in, out); - } catch (IOException e) { - throw new RuntimeException(e); - } - - byte[] outBytes = baos.toByteArray(); - - ArrowBuf compressedBuffer = - allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length); - compressedBuffer.setBytes(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, outBytes); - compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length); - return compressedBuffer; - } - - @Override - protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer) { - Preconditions.checkArgument( - compressedBuffer.writerIndex() <= Integer.MAX_VALUE, - "The compressed buffer size exceeds the integer limit %s", - Integer.MAX_VALUE); - - long decompressedLength = readUncompressedLength(compressedBuffer); - - byte[] inBytes = - new byte - [(int) (compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH)]; - compressedBuffer.getBytes(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, inBytes); - ByteArrayOutputStream out = new ByteArrayOutputStream((int) decompressedLength); - try (InputStream in = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(inBytes))) { - IOUtils.copy(in, out); - } catch (IOException e) { - throw new RuntimeException(e); - } - - byte[] outBytes = out.toByteArray(); - ArrowBuf decompressedBuffer = allocator.buffer(outBytes.length); - decompressedBuffer.setBytes(/*index=*/ 0, outBytes); - decompressedBuffer.writerIndex(decompressedLength); - return decompressedBuffer; - } - - @Override - public CompressionUtil.CodecType getCodecType() { - return CompressionUtil.CodecType.LZ4_FRAME; - } -} diff --git a/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java b/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java deleted file mode 100644 index 6e48aae71fcfe..0000000000000 --- a/java/compression/src/main/java/org/apache/arrow/compression/ZstdCompressionCodec.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.compression; - -import com.github.luben.zstd.Zstd; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.compression.AbstractCompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; - -/** Compression codec for the ZSTD algorithm. */ -public class ZstdCompressionCodec extends AbstractCompressionCodec { - - private int compressionLevel; - private static final int DEFAULT_COMPRESSION_LEVEL = 3; - - public ZstdCompressionCodec() { - this.compressionLevel = DEFAULT_COMPRESSION_LEVEL; - } - - public ZstdCompressionCodec(int compressionLevel) { - this.compressionLevel = compressionLevel; - } - - @Override - protected ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) { - long maxSize = Zstd.compressBound(uncompressedBuffer.writerIndex()); - long dstSize = CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + maxSize; - ArrowBuf compressedBuffer = allocator.buffer(dstSize); - long bytesWritten = - Zstd.compressUnsafe( - compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, - dstSize, - /*src*/ uncompressedBuffer.memoryAddress(), - /*srcSize=*/ uncompressedBuffer.writerIndex(), - /*level=*/ this.compressionLevel); - if (Zstd.isError(bytesWritten)) { - compressedBuffer.close(); - throw new RuntimeException("Error compressing: " + Zstd.getErrorName(bytesWritten)); - } - compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + bytesWritten); - return compressedBuffer; - } - - @Override - protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer) { - long decompressedLength = readUncompressedLength(compressedBuffer); - ArrowBuf uncompressedBuffer = allocator.buffer(decompressedLength); - long decompressedSize = - Zstd.decompressUnsafe( - uncompressedBuffer.memoryAddress(), - decompressedLength, - /*src=*/ compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, - compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH); - if (Zstd.isError(decompressedSize)) { - uncompressedBuffer.close(); - throw new RuntimeException("Error decompressing: " + Zstd.getErrorName(decompressedLength)); - } - if (decompressedLength != decompressedSize) { - uncompressedBuffer.close(); - throw new RuntimeException( - "Expected != actual decompressed length: " - + decompressedLength - + " != " - + decompressedSize); - } - uncompressedBuffer.writerIndex(decompressedLength); - return uncompressedBuffer; - } - - @Override - public CompressionUtil.CodecType getCodecType() { - return CompressionUtil.CodecType.ZSTD; - } -} diff --git a/java/compression/src/main/resources/META-INF/services/org.apache.arrow.vector.compression.CompressionCodec$Factory b/java/compression/src/main/resources/META-INF/services/org.apache.arrow.vector.compression.CompressionCodec$Factory deleted file mode 100644 index ccdcef9aed96a..0000000000000 --- a/java/compression/src/main/resources/META-INF/services/org.apache.arrow.vector.compression.CompressionCodec$Factory +++ /dev/null @@ -1,15 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -org.apache.arrow.compression.CommonsCompressionFactory diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java deleted file mode 100644 index d7318e306c37d..0000000000000 --- a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.compression; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Optional; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.GenerateSampleData; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestArrowReaderWriterWithCompression { - - private BufferAllocator allocator; - private ByteArrayOutputStream out; - private VectorSchemaRoot root; - - @BeforeEach - public void setup() { - if (allocator == null) { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - out = new ByteArrayOutputStream(); - root = null; - } - - @AfterEach - public void tearDown() { - if (root != null) { - root.close(); - } - if (allocator != null) { - allocator.close(); - } - if (out != null) { - out.reset(); - } - } - - private void createAndWriteArrowFile( - DictionaryProvider provider, CompressionUtil.CodecType codecType) throws IOException { - List fields = new ArrayList<>(); - fields.add(new Field("col", FieldType.notNullable(new ArrowType.Utf8()), new ArrayList<>())); - root = VectorSchemaRoot.create(new Schema(fields), allocator); - - final int rowCount = 10; - GenerateSampleData.generateTestData(root.getVector(0), rowCount); - root.setRowCount(rowCount); - - try (final ArrowFileWriter writer = - new ArrowFileWriter( - root, - provider, - Channels.newChannel(out), - new HashMap<>(), - IpcOption.DEFAULT, - CommonsCompressionFactory.INSTANCE, - codecType, - Optional.of(7))) { - writer.start(); - writer.writeBatch(); - writer.end(); - } - } - - private void createAndWriteArrowStream( - DictionaryProvider provider, CompressionUtil.CodecType codecType) throws IOException { - List fields = new ArrayList<>(); - fields.add(new Field("col", FieldType.notNullable(new ArrowType.Utf8()), new ArrayList<>())); - root = VectorSchemaRoot.create(new Schema(fields), allocator); - - final int rowCount = 10; - GenerateSampleData.generateTestData(root.getVector(0), rowCount); - root.setRowCount(rowCount); - - try (final ArrowStreamWriter writer = - new ArrowStreamWriter( - root, - provider, - Channels.newChannel(out), - IpcOption.DEFAULT, - CommonsCompressionFactory.INSTANCE, - codecType, - Optional.of(7))) { - writer.start(); - writer.writeBatch(); - writer.end(); - } - } - - private Dictionary createDictionary(VarCharVector dictionaryVector) { - setVector( - dictionaryVector, - "foo".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8), - "baz".getBytes(StandardCharsets.UTF_8)); - - return new Dictionary( - dictionaryVector, - new DictionaryEncoding(/*id=*/ 1L, /*ordered=*/ false, /*indexType=*/ null)); - } - - @Test - public void testArrowFileZstdRoundTrip() throws Exception { - createAndWriteArrowFile(null, CompressionUtil.CodecType.ZSTD); - // with compression - try (ArrowFileReader reader = - new ArrowFileReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), - allocator, - CommonsCompressionFactory.INSTANCE)) { - assertEquals(1, reader.getRecordBlocks().size()); - assertTrue(reader.loadNextBatch()); - assertTrue(root.equals(reader.getVectorSchemaRoot())); - assertFalse(reader.loadNextBatch()); - } - // without compression - try (ArrowFileReader reader = - new ArrowFileReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), - allocator, - NoCompressionCodec.Factory.INSTANCE)) { - assertEquals(1, reader.getRecordBlocks().size()); - Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - assertEquals( - "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", - exception.getMessage()); - } - } - - @Test - public void testArrowStreamZstdRoundTrip() throws Exception { - createAndWriteArrowStream(null, CompressionUtil.CodecType.ZSTD); - // with compression - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), - allocator, - CommonsCompressionFactory.INSTANCE)) { - assertTrue(reader.loadNextBatch()); - assertTrue(root.equals(reader.getVectorSchemaRoot())); - assertFalse(reader.loadNextBatch()); - } - // without compression - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), - allocator, - NoCompressionCodec.Factory.INSTANCE)) { - Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - assertEquals( - "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", - exception.getMessage()); - } - } - - @Test - public void testArrowFileZstdRoundTripWithDictionary() throws Exception { - VarCharVector dictionaryVector = - (VarCharVector) - FieldType.nullable(new ArrowType.Utf8()) - .createNewSingleVector("f1_file", allocator, null); - Dictionary dictionary = createDictionary(dictionaryVector); - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary); - - createAndWriteArrowFile(provider, CompressionUtil.CodecType.ZSTD); - - // with compression - try (ArrowFileReader reader = - new ArrowFileReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), - allocator, - CommonsCompressionFactory.INSTANCE)) { - assertEquals(1, reader.getRecordBlocks().size()); - assertTrue(reader.loadNextBatch()); - assertTrue(root.equals(reader.getVectorSchemaRoot())); - assertFalse(reader.loadNextBatch()); - } - // without compression - try (ArrowFileReader reader = - new ArrowFileReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), - allocator, - NoCompressionCodec.Factory.INSTANCE)) { - assertEquals(1, reader.getRecordBlocks().size()); - Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - assertEquals( - "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", - exception.getMessage()); - } - dictionaryVector.close(); - } - - @Test - public void testArrowStreamZstdRoundTripWithDictionary() throws Exception { - VarCharVector dictionaryVector = - (VarCharVector) - FieldType.nullable(new ArrowType.Utf8()) - .createNewSingleVector("f1_stream", allocator, null); - Dictionary dictionary = createDictionary(dictionaryVector); - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary); - - createAndWriteArrowStream(provider, CompressionUtil.CodecType.ZSTD); - - // with compression - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), - allocator, - CommonsCompressionFactory.INSTANCE)) { - assertTrue(reader.loadNextBatch()); - assertTrue(root.equals(reader.getVectorSchemaRoot())); - assertFalse(reader.loadNextBatch()); - } - // without compression - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), - allocator, - NoCompressionCodec.Factory.INSTANCE)) { - Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - assertEquals( - "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", - exception.getMessage()); - } - dictionaryVector.close(); - } - - public static void setVector(VarCharVector vector, byte[]... values) { - final int length = values.length; - vector.allocateNewSafe(); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } -} diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java deleted file mode 100644 index b8fb4e28b9059..0000000000000 --- a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.compression; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Optional; -import java.util.function.BiConsumer; -import java.util.stream.Stream; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test cases for {@link CompressionCodec}s. */ -class TestCompressionCodec { - private BufferAllocator allocator; - - @BeforeEach - void init() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - void terminate() { - allocator.close(); - } - - static Collection codecs() { - List params = new ArrayList<>(); - - int[] lengths = new int[] {10, 100, 1000}; - for (int len : lengths) { - CompressionCodec dumbCodec = NoCompressionCodec.INSTANCE; - params.add(Arguments.arguments(len, dumbCodec)); - - CompressionCodec lz4Codec = new Lz4CompressionCodec(); - params.add(Arguments.arguments(len, lz4Codec)); - - CompressionCodec zstdCodec = new ZstdCompressionCodec(); - params.add(Arguments.arguments(len, zstdCodec)); - - CompressionCodec zstdCodecAndCompressionLevel = new ZstdCompressionCodec(7); - params.add(Arguments.arguments(len, zstdCodecAndCompressionLevel)); - } - return params; - } - - private List compressBuffers(CompressionCodec codec, List inputBuffers) { - List outputBuffers = new ArrayList<>(inputBuffers.size()); - for (ArrowBuf buf : inputBuffers) { - outputBuffers.add(codec.compress(allocator, buf)); - } - return outputBuffers; - } - - private List deCompressBuffers(CompressionCodec codec, List inputBuffers) { - List outputBuffers = new ArrayList<>(inputBuffers.size()); - for (ArrowBuf buf : inputBuffers) { - outputBuffers.add(codec.decompress(allocator, buf)); - } - return outputBuffers; - } - - private void assertWriterIndex(List decompressedBuffers) { - for (ArrowBuf decompressedBuf : decompressedBuffers) { - assertTrue(decompressedBuf.writerIndex() > 0); - } - } - - @ParameterizedTest - @MethodSource("codecs") - void testCompressFixedWidthBuffers(int vectorLength, CompressionCodec codec) throws Exception { - // prepare vector to compress - IntVector origVec = new IntVector("vec", allocator); - origVec.allocateNew(vectorLength); - for (int i = 0; i < vectorLength; i++) { - if (i % 10 == 0) { - origVec.setNull(i); - } else { - origVec.set(i, i); - } - } - origVec.setValueCount(vectorLength); - int nullCount = origVec.getNullCount(); - - // compress & decompress - List origBuffers = origVec.getFieldBuffers(); - List compressedBuffers = compressBuffers(codec, origBuffers); - List decompressedBuffers = deCompressBuffers(codec, compressedBuffers); - - assertEquals(2, decompressedBuffers.size()); - assertWriterIndex(decompressedBuffers); - - // orchestrate new vector - IntVector newVec = new IntVector("new vec", allocator); - newVec.loadFieldBuffers(new ArrowFieldNode(vectorLength, nullCount), decompressedBuffers); - - // verify new vector - assertEquals(vectorLength, newVec.getValueCount()); - for (int i = 0; i < vectorLength; i++) { - if (i % 10 == 0) { - assertTrue(newVec.isNull(i)); - } else { - assertEquals(i, newVec.get(i)); - } - } - - newVec.close(); - AutoCloseables.close(decompressedBuffers); - } - - @ParameterizedTest - @MethodSource("codecs") - void testCompressVariableWidthBuffers(int vectorLength, CompressionCodec codec) throws Exception { - // prepare vector to compress - VarCharVector origVec = new VarCharVector("vec", allocator); - origVec.allocateNew(); - for (int i = 0; i < vectorLength; i++) { - if (i % 10 == 0) { - origVec.setNull(i); - } else { - origVec.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - } - origVec.setValueCount(vectorLength); - int nullCount = origVec.getNullCount(); - - // compress & decompress - List origBuffers = origVec.getFieldBuffers(); - List compressedBuffers = compressBuffers(codec, origBuffers); - List decompressedBuffers = deCompressBuffers(codec, compressedBuffers); - - assertEquals(3, decompressedBuffers.size()); - assertWriterIndex(decompressedBuffers); - - // orchestrate new vector - VarCharVector newVec = new VarCharVector("new vec", allocator); - newVec.loadFieldBuffers(new ArrowFieldNode(vectorLength, nullCount), decompressedBuffers); - - // verify new vector - assertEquals(vectorLength, newVec.getValueCount()); - for (int i = 0; i < vectorLength; i++) { - if (i % 10 == 0) { - assertTrue(newVec.isNull(i)); - } else { - assertArrayEquals(String.valueOf(i).getBytes(StandardCharsets.UTF_8), newVec.get(i)); - } - } - - newVec.close(); - AutoCloseables.close(decompressedBuffers); - } - - @ParameterizedTest - @MethodSource("codecs") - void testEmptyBuffer(int vectorLength, CompressionCodec codec) throws Exception { - final VarBinaryVector origVec = new VarBinaryVector("vec", allocator); - - origVec.allocateNew(vectorLength); - - // Do not set any values (all missing) - origVec.setValueCount(vectorLength); - - final List origBuffers = origVec.getFieldBuffers(); - final List compressedBuffers = compressBuffers(codec, origBuffers); - final List decompressedBuffers = deCompressBuffers(codec, compressedBuffers); - - // orchestrate new vector - VarBinaryVector newVec = new VarBinaryVector("new vec", allocator); - newVec.loadFieldBuffers(new ArrowFieldNode(vectorLength, vectorLength), decompressedBuffers); - - // verify new vector - assertEquals(vectorLength, newVec.getValueCount()); - for (int i = 0; i < vectorLength; i++) { - assertTrue(newVec.isNull(i)); - } - - newVec.close(); - AutoCloseables.close(decompressedBuffers); - } - - private static Stream codecTypes() { - return Arrays.stream(CompressionUtil.CodecType.values()); - } - - @ParameterizedTest - @MethodSource("codecTypes") - void testReadWriteStream(CompressionUtil.CodecType codec) throws Exception { - withRoot( - codec, - (factory, root) -> { - ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); - try (final ArrowStreamWriter writer = - new ArrowStreamWriter( - root, - new DictionaryProvider.MapDictionaryProvider(), - Channels.newChannel(compressedStream), - IpcOption.DEFAULT, - factory, - codec, - Optional.of(7))) { - writer.start(); - writer.writeBatch(); - writer.end(); - } catch (IOException e) { - throw new RuntimeException(e); - } - - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(compressedStream.toByteArray()), - allocator, - factory)) { - assertTrue(reader.loadNextBatch()); - assertTrue(root.equals(reader.getVectorSchemaRoot())); - assertFalse(reader.loadNextBatch()); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } - - @ParameterizedTest - @MethodSource("codecTypes") - void testReadWriteFile(CompressionUtil.CodecType codec) throws Exception { - withRoot( - codec, - (factory, root) -> { - ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); - try (final ArrowFileWriter writer = - new ArrowFileWriter( - root, - new DictionaryProvider.MapDictionaryProvider(), - Channels.newChannel(compressedStream), - new HashMap<>(), - IpcOption.DEFAULT, - factory, - codec, - Optional.of(7))) { - writer.start(); - writer.writeBatch(); - writer.end(); - } catch (IOException e) { - throw new RuntimeException(e); - } - - try (ArrowFileReader reader = - new ArrowFileReader( - new ByteArrayReadableSeekableByteChannel(compressedStream.toByteArray()), - allocator, - factory)) { - assertTrue(reader.loadNextBatch()); - assertTrue(root.equals(reader.getVectorSchemaRoot())); - assertFalse(reader.loadNextBatch()); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - } - - /** Unloading a vector should not free source buffers. */ - @ParameterizedTest - @MethodSource("codecTypes") - void testUnloadCompressed(CompressionUtil.CodecType codec) { - withRoot( - codec, - (factory, root) -> { - root.getFieldVectors() - .forEach( - (vector) -> { - Arrays.stream(vector.getBuffers(/*clear*/ false)) - .forEach( - (buf) -> { - assertNotEquals(0, buf.getReferenceManager().getRefCount()); - }); - }); - - final VectorUnloader unloader = - new VectorUnloader( - root, /*includeNullCount*/ - true, - factory.createCodec(codec), /*alignBuffers*/ - true); - unloader.getRecordBatch().close(); - - root.getFieldVectors() - .forEach( - (vector) -> { - Arrays.stream(vector.getBuffers(/*clear*/ false)) - .forEach( - (buf) -> { - assertNotEquals(0, buf.getReferenceManager().getRefCount()); - }); - }); - }); - } - - void withRoot( - CompressionUtil.CodecType codec, - BiConsumer testBody) { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ints", new ArrowType.Int(32, true)), - Field.nullable("strings", ArrowType.Utf8.INSTANCE))); - CompressionCodec.Factory factory = - codec == CompressionUtil.CodecType.NO_COMPRESSION - ? NoCompressionCodec.Factory.INSTANCE - : CommonsCompressionFactory.INSTANCE; - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final IntVector ints = (IntVector) root.getVector(0); - final VarCharVector strings = (VarCharVector) root.getVector(1); - // Doesn't get compressed - ints.setSafe(0, 0x4a3e); - ints.setSafe(1, 0x8aba); - ints.setSafe(2, 0x4362); - ints.setSafe(3, 0x383f); - // Gets compressed - String compressibleString = " "; // 16 bytes - compressibleString = compressibleString + compressibleString; - compressibleString = compressibleString + compressibleString; - compressibleString = compressibleString + compressibleString; - compressibleString = compressibleString + compressibleString; - compressibleString = compressibleString + compressibleString; // 512 bytes - byte[] compressibleData = compressibleString.getBytes(StandardCharsets.UTF_8); - strings.setSafe(0, compressibleData); - strings.setSafe(1, compressibleData); - strings.setSafe(2, compressibleData); - strings.setSafe(3, compressibleData); - root.setRowCount(4); - - testBody.accept(factory, root); - } - } -} diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecServiceProvider.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecServiceProvider.java deleted file mode 100644 index 795e05d7cb123..0000000000000 --- a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecServiceProvider.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.compression; - -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.junit.jupiter.api.Test; - -public class TestCompressionCodecServiceProvider { - - /** - * When arrow-compression is in the classpath/module-path, {@link - * CompressionCodec.Factory#INSTANCE} should be able to handle all codec types. - */ - @Test - public void testSupportedCompressionTypes() { - assertThrows( // no-compression doesn't support any actual compression types - IllegalArgumentException.class, - () -> checkAllCodecTypes(NoCompressionCodec.Factory.INSTANCE)); - assertThrows( // commons-compression doesn't support the uncompressed type - IllegalArgumentException.class, - () -> checkAllCodecTypes(CommonsCompressionFactory.INSTANCE)); - checkAllCodecTypes( // and the winner is... - CompressionCodec.Factory.INSTANCE); // combines the two above to support all types - } - - private void checkAllCodecTypes(CompressionCodec.Factory factory) { - for (CompressionUtil.CodecType codecType : CompressionUtil.CodecType.values()) { - assertNotNull(factory.createCodec(codecType)); - } - } -} diff --git a/java/compression/src/test/resources/logback.xml b/java/compression/src/test/resources/logback.xml deleted file mode 100644 index 4c54d18a210ff..0000000000000 --- a/java/compression/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/dataset/CMakeLists.txt b/java/dataset/CMakeLists.txt deleted file mode 100644 index 348850c3be5da..0000000000000 --- a/java/dataset/CMakeLists.txt +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -find_package(ArrowDataset REQUIRED) -find_package(ArrowSubstrait REQUIRED) - -include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} - ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) - -add_jar(arrow_java_jni_dataset_jar - src/main/java/org/apache/arrow/dataset/jni/JniLoader.java - src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java - src/main/java/org/apache/arrow/dataset/file/JniWrapper.java - src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java - src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java - src/main/java/org/apache/arrow/dataset/substrait/JniWrapper.java - GENERATE_NATIVE_HEADERS - arrow_java_jni_dataset_headers) - -add_library(arrow_java_jni_dataset SHARED src/main/cpp/jni_wrapper.cc - src/main/cpp/jni_util.cc) -set_property(TARGET arrow_java_jni_dataset PROPERTY OUTPUT_NAME "arrow_dataset_jni") -target_link_libraries(arrow_java_jni_dataset - arrow_java_jni_dataset_headers - jni - ArrowDataset::arrow_dataset_static - ArrowSubstrait::arrow_substrait_static) - -if(BUILD_TESTING) - add_executable(arrow-java-jni-dataset-test src/main/cpp/jni_util_test.cc - src/main/cpp/jni_util.cc) - target_link_libraries(arrow-java-jni-dataset-test arrow_java_test) - add_test(NAME arrow-java-jni-dataset-test COMMAND arrow-java-jni-dataset-test) -endif() - -set(ARROW_JAVA_JNI_DATASET_LIBDIR - "${CMAKE_INSTALL_PREFIX}/lib/arrow_dataset_jni/${ARROW_JAVA_JNI_ARCH_DIR}") - -set(ARROW_JAVA_JNI_DATASET_BINDIR - "${CMAKE_INSTALL_PREFIX}/bin/arrow_dataset_jni/${ARROW_JAVA_JNI_ARCH_DIR}") - -install(TARGETS arrow_java_jni_dataset - LIBRARY DESTINATION ${ARROW_JAVA_JNI_DATASET_LIBDIR} - RUNTIME DESTINATION ${ARROW_JAVA_JNI_DATASET_BINDIR}) diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml deleted file mode 100644 index 0c1f55dd69edb..0000000000000 --- a/java/dataset/pom.xml +++ /dev/null @@ -1,216 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - - arrow-dataset - jar - Arrow Java Dataset - Java implementation of Arrow Dataset API/Framework - - - ../../../cpp/release-build/ - 1.14.4 - 1.12.0 - - - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - compile - - - org.apache.arrow - arrow-memory-core - compile - - - org.apache.arrow - arrow-c-data - compile - - - org.immutables - value-annotations - - - org.apache.arrow - arrow-memory-netty - test - - - org.apache.parquet - parquet-avro - ${parquet.version} - test - - - org.apache.avro - avro - ${avro.version} - test - - - org.apache.parquet - parquet-hadoop - ${parquet.version} - test - - - org.apache.arrow - arrow-vector - ${project.version} - tests - test - - - org.apache.hadoop - hadoop-common - ${dep.hadoop.version} - test - - - commons-logging - commons-logging - - - javax.servlet - servlet-api - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - - - com.google.guava - guava - test - - - com.fasterxml.jackson.core - jackson-databind - test - - - org.apache.arrow.orc - arrow-orc - ${project.version} - test - - - org.apache.orc - orc-core - 1.9.5 - test - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - commons-logging - commons-logging - - - - - org.apache.hive - hive-storage-api - 4.0.1 - test - - - commons-io - commons-io - 2.17.0 - test - - - org.hamcrest - hamcrest - test - - - - - - - ${arrow.cpp.build.dir} - - **/*arrow_dataset_jni.* - - - - - - - maven-surefire-plugin - - --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED - false - - ${project.basedir}/../../testing/data - - - - - org.xolstice.maven.plugins - protobuf-maven-plugin - - - src - - compile - - - ../../cpp/src/jni/dataset/proto - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Werror - - - - - - diff --git a/java/dataset/src/main/cpp/jni_util.cc b/java/dataset/src/main/cpp/jni_util.cc deleted file mode 100644 index 1fd15696e6e5f..0000000000000 --- a/java/dataset/src/main/cpp/jni_util.cc +++ /dev/null @@ -1,377 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "jni_util.h" - -#include -#include - -#include "arrow/c/bridge.h" -#include "arrow/c/helpers.h" -#include "arrow/util/logging.h" - -namespace arrow { -namespace dataset { -namespace jni { - -jint JNI_VERSION = JNI_VERSION_10; - -class ReservationListenableMemoryPool::Impl { - public: - explicit Impl(arrow::MemoryPool* pool, std::shared_ptr listener, - int64_t block_size) - : pool_(pool), listener_(listener), block_size_(block_size), blocks_reserved_(0) {} - - arrow::Status Allocate(int64_t size, int64_t alignment, uint8_t** out) { - RETURN_NOT_OK(UpdateReservation(size)); - arrow::Status error = pool_->Allocate(size, alignment, out); - if (!error.ok()) { - RETURN_NOT_OK(UpdateReservation(-size)); - return error; - } - return arrow::Status::OK(); - } - - arrow::Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment, - uint8_t** ptr) { - bool reserved = false; - int64_t diff = new_size - old_size; - if (new_size >= old_size) { - // new_size >= old_size, pre-reserve bytes from listener before allocating - // from underlying pool - RETURN_NOT_OK(UpdateReservation(diff)); - reserved = true; - } - arrow::Status error = pool_->Reallocate(old_size, new_size, alignment, ptr); - if (!error.ok()) { - if (reserved) { - // roll back reservations on error - RETURN_NOT_OK(UpdateReservation(-diff)); - } - return error; - } - if (!reserved) { - // otherwise (e.g. new_size < old_size), make updates after calling underlying pool - RETURN_NOT_OK(UpdateReservation(diff)); - } - return arrow::Status::OK(); - } - - void Free(uint8_t* buffer, int64_t size, int64_t alignment) { - pool_->Free(buffer, size, alignment); - // FIXME: See ARROW-11143, currently method ::Free doesn't allow Status return - arrow::Status s = UpdateReservation(-size); - if (!s.ok()) { - ARROW_LOG(FATAL) << "Failed to update reservation while freeing bytes: " - << s.message(); - return; - } - } - - arrow::Status UpdateReservation(int64_t diff) { - int64_t granted = Reserve(diff); - if (granted == 0) { - return arrow::Status::OK(); - } - if (granted < 0) { - RETURN_NOT_OK(listener_->OnRelease(-granted)); - return arrow::Status::OK(); - } - RETURN_NOT_OK(listener_->OnReservation(granted)); - return arrow::Status::OK(); - } - - int64_t Reserve(int64_t diff) { - std::lock_guard lock(mutex_); - if (diff > 0) { - stats_.DidAllocateBytes(diff); - } else if (diff < 0) { - stats_.DidFreeBytes(-diff); - } - int64_t new_block_count; - int64_t bytes_reserved = stats_.bytes_allocated(); - if (bytes_reserved == 0) { - new_block_count = 0; - } else { - // ceil to get the required block number - new_block_count = (bytes_reserved - 1) / block_size_ + 1; - } - int64_t bytes_granted = (new_block_count - blocks_reserved_) * block_size_; - blocks_reserved_ = new_block_count; - return bytes_granted; - } - - int64_t bytes_allocated() { return stats_.bytes_allocated(); } - - int64_t max_memory() { return stats_.max_memory(); } - - int64_t total_bytes_allocated() { return stats_.total_bytes_allocated(); } - - int64_t num_allocations() { return stats_.num_allocations(); } - - std::string backend_name() { return pool_->backend_name(); } - - std::shared_ptr get_listener() { return listener_; } - - private: - arrow::MemoryPool* pool_; - std::shared_ptr listener_; - int64_t block_size_; - int64_t blocks_reserved_; - arrow::internal::MemoryPoolStats stats_; - std::mutex mutex_; -}; - -ReservationListenableMemoryPool::ReservationListenableMemoryPool( - MemoryPool* pool, std::shared_ptr listener, int64_t block_size) { - impl_.reset(new Impl(pool, listener, block_size)); -} - -arrow::Status ReservationListenableMemoryPool::Allocate(int64_t size, int64_t alignment, - uint8_t** out) { - return impl_->Allocate(size, alignment, out); -} - -arrow::Status ReservationListenableMemoryPool::Reallocate(int64_t old_size, - int64_t new_size, - int64_t alignment, - uint8_t** ptr) { - return impl_->Reallocate(old_size, new_size, alignment, ptr); -} - -void ReservationListenableMemoryPool::Free(uint8_t* buffer, int64_t size, - int64_t alignment) { - return impl_->Free(buffer, size, alignment); -} - -int64_t ReservationListenableMemoryPool::bytes_allocated() const { - return impl_->bytes_allocated(); -} - -int64_t ReservationListenableMemoryPool::max_memory() const { - return impl_->max_memory(); -} - -int64_t ReservationListenableMemoryPool::total_bytes_allocated() const { - return impl_->total_bytes_allocated(); -} - -int64_t ReservationListenableMemoryPool::num_allocations() const { - return impl_->num_allocations(); -} - -std::string ReservationListenableMemoryPool::backend_name() const { - return impl_->backend_name(); -} - -std::shared_ptr ReservationListenableMemoryPool::get_listener() { - return impl_->get_listener(); -} - -ReservationListenableMemoryPool::~ReservationListenableMemoryPool() {} - -std::string Describe(JNIEnv* env, jthrowable t) { - jclass describer_class = - env->FindClass("org/apache/arrow/dataset/jni/JniExceptionDescriber"); - DCHECK_NE(describer_class, nullptr); - jmethodID describe_method = env->GetStaticMethodID( - describer_class, "describe", "(Ljava/lang/Throwable;)Ljava/lang/String;"); - std::string description = JStringToCString( - env, (jstring)env->CallStaticObjectMethod(describer_class, describe_method, t)); - return description; -} - -bool IsErrorInstanceOf(JNIEnv* env, jthrowable t, std::string class_name) { - jclass java_class = env->FindClass(class_name.c_str()); - DCHECK_NE(java_class, nullptr) << "Could not find Java class " << class_name; - return env->IsInstanceOf(t, java_class); -} - -arrow::StatusCode MapJavaError(JNIEnv* env, jthrowable t) { - StatusCode code; - if (IsErrorInstanceOf(env, t, "org/apache/arrow/memory/OutOfMemoryException")) { - code = StatusCode::OutOfMemory; - } else if (IsErrorInstanceOf(env, t, "java/lang/UnsupportedOperationException")) { - code = StatusCode::NotImplemented; - } else if (IsErrorInstanceOf(env, t, "java/io/NotSerializableException")) { - code = StatusCode::SerializationError; - } else if (IsErrorInstanceOf(env, t, "java/io/IOException")) { - code = StatusCode::IOError; - } else if (IsErrorInstanceOf(env, t, "java/lang/IllegalArgumentException")) { - code = StatusCode::Invalid; - } else if (IsErrorInstanceOf(env, t, "java/lang/IllegalStateException")) { - code = StatusCode::Invalid; - } else { - code = StatusCode::UnknownError; - } - return code; -} - -JNIEnv* GetEnvOrAttach(JavaVM* vm) { - JNIEnv* env; - int getEnvStat = vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); - if (getEnvStat == JNI_EDETACHED) { - // Reattach current thread to JVM - getEnvStat = vm->AttachCurrentThread(reinterpret_cast(&env), nullptr); - if (getEnvStat != JNI_OK) { - ARROW_LOG(FATAL) << "Failed to attach current thread to JVM"; - } - } - return env; -} - -const char kJavaErrorDetailTypeId[] = "arrow::dataset::jni::JavaErrorDetail"; - -JavaErrorDetail::JavaErrorDetail(JavaVM* vm, jthrowable cause) : vm_(vm) { - JNIEnv* env = GetEnvOrAttach(vm_); - if (env == nullptr) { - this->cause_ = nullptr; - return; - } - this->cause_ = (jthrowable)env->NewGlobalRef(cause); -} - -JavaErrorDetail::~JavaErrorDetail() { - JNIEnv* env = GetEnvOrAttach(vm_); - if (env == nullptr || this->cause_ == nullptr) { - return; - } - env->DeleteGlobalRef(cause_); -} - -const char* JavaErrorDetail::type_id() const { return kJavaErrorDetailTypeId; } - -jthrowable JavaErrorDetail::GetCause() const { - JNIEnv* env = GetEnvOrAttach(vm_); - if (env == nullptr || this->cause_ == nullptr) { - return nullptr; - } - return (jthrowable)env->NewLocalRef(cause_); -} - -std::string JavaErrorDetail::ToString() const { - JNIEnv* env = GetEnvOrAttach(vm_); - if (env == nullptr) { - return "Java Exception, ID: " + std::to_string(reinterpret_cast(cause_)); - } - return "Java Exception: " + Describe(env, cause_); -} - -Status CheckException(JNIEnv* env) { - if (env->ExceptionCheck()) { - jthrowable t = env->ExceptionOccurred(); - env->ExceptionClear(); - arrow::StatusCode code = MapJavaError(env, t); - JavaVM* vm; - if (env->GetJavaVM(&vm) != JNI_OK) { - return Status::Invalid("Error getting JavaVM object"); - } - std::shared_ptr detail = std::make_shared(vm, t); - return {code, detail->ToString(), detail}; - } - return Status::OK(); -} - -jclass CreateGlobalClassReference(JNIEnv* env, const char* class_name) { - jclass local_class = env->FindClass(class_name); - jclass global_class = (jclass)env->NewGlobalRef(local_class); - env->DeleteLocalRef(local_class); - return global_class; -} - -arrow::Result GetMethodID(JNIEnv* env, jclass this_class, const char* name, - const char* sig) { - jmethodID ret = env->GetMethodID(this_class, name, sig); - if (ret == nullptr) { - std::string error_message = "Unable to find method " + std::string(name) + - " within signature" + std::string(sig); - return arrow::Status::Invalid(error_message); - } - return ret; -} - -arrow::Result GetStaticMethodID(JNIEnv* env, jclass this_class, - const char* name, const char* sig) { - jmethodID ret = env->GetStaticMethodID(this_class, name, sig); - if (ret == nullptr) { - std::string error_message = "Unable to find static method " + std::string(name) + - " within signature" + std::string(sig); - return arrow::Status::Invalid(error_message); - } - return ret; -} - -std::string JStringToCString(JNIEnv* env, jstring string) { - if (string == nullptr) { - return std::string(); - } - const char* chars = env->GetStringUTFChars(string, nullptr); - std::string ret(chars); - env->ReleaseStringUTFChars(string, chars); - return ret; -} - -std::vector ToStringVector(JNIEnv* env, jobjectArray& str_array) { - int length = env->GetArrayLength(str_array); - std::vector vector; - for (int i = 0; i < length; i++) { - auto string = reinterpret_cast(env->GetObjectArrayElement(str_array, i)); - vector.push_back(JStringToCString(env, string)); - } - return vector; -} - -arrow::Result ToSchemaByteArray(JNIEnv* env, - std::shared_ptr schema) { - ARROW_ASSIGN_OR_RAISE( - std::shared_ptr buffer, - arrow::ipc::SerializeSchema(*schema, arrow::default_memory_pool())) - - jbyteArray out = env->NewByteArray(static_cast(buffer->size())); - auto src = reinterpret_cast(buffer->data()); - env->SetByteArrayRegion(out, 0, static_cast(buffer->size()), src); - return out; -} - -arrow::Result> FromSchemaByteArray( - JNIEnv* env, jbyteArray schemaBytes) { - arrow::ipc::DictionaryMemo in_memo; - int schemaBytes_len = env->GetArrayLength(schemaBytes); - jbyte* schemaBytes_data = env->GetByteArrayElements(schemaBytes, nullptr); - auto serialized_schema = std::make_shared( - reinterpret_cast(schemaBytes_data), schemaBytes_len); - arrow::io::BufferReader buf_reader(serialized_schema); - ARROW_ASSIGN_OR_RAISE(std::shared_ptr schema, - arrow::ipc::ReadSchema(&buf_reader, &in_memo)) - env->ReleaseByteArrayElements(schemaBytes, schemaBytes_data, JNI_ABORT); - return schema; -} -arrow::Status ExportRecordBatch(JNIEnv* env, const std::shared_ptr& batch, - jlong struct_array) { - return arrow::ExportRecordBatch(*batch, - reinterpret_cast(struct_array)); -} - -arrow::Result> ImportRecordBatch( - JNIEnv* env, const std::shared_ptr& schema, jlong struct_array) { - return arrow::ImportRecordBatch(reinterpret_cast(struct_array), - schema); -} - -} // namespace jni -} // namespace dataset -} // namespace arrow diff --git a/java/dataset/src/main/cpp/jni_util.h b/java/dataset/src/main/cpp/jni_util.h deleted file mode 100644 index 20482a6c54ddd..0000000000000 --- a/java/dataset/src/main/cpp/jni_util.h +++ /dev/null @@ -1,176 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "arrow/array.h" -#include "arrow/io/api.h" -#include "arrow/ipc/api.h" -#include "arrow/memory_pool.h" -#include "arrow/result.h" -#include "arrow/type.h" - -namespace arrow { -namespace dataset { -namespace jni { - -JNIEnv* GetEnvOrAttach(JavaVM* vm); - -Status CheckException(JNIEnv* env); - -jclass CreateGlobalClassReference(JNIEnv* env, const char* class_name); - -arrow::Result GetMethodID(JNIEnv* env, jclass this_class, const char* name, - const char* sig); - -arrow::Result GetStaticMethodID(JNIEnv* env, jclass this_class, - const char* name, const char* sig); - -std::string JStringToCString(JNIEnv* env, jstring string); - -std::vector ToStringVector(JNIEnv* env, jobjectArray& str_array); - -arrow::Result ToSchemaByteArray(JNIEnv* env, - std::shared_ptr schema); - -arrow::Result> FromSchemaByteArray(JNIEnv* env, - jbyteArray schemaBytes); - -/// \brief Export arrow::RecordBatch for Java (or other JVM languages) use. -/// The exported batch is subject to C data interface specification and can be -/// imported from Java side using provided JNI utilities. -arrow::Status ExportRecordBatch(JNIEnv* env, const std::shared_ptr& batch, - jlong struct_array); - -/// \brief Import arrow::RecordBatch from JVM language side. The input data should -/// ideally be exported from specific JNI utilities from JVM language side and should -/// conform to C data interface specification. -arrow::Result> ImportRecordBatch( - JNIEnv* env, const std::shared_ptr& schema, jlong struct_array); - -/// \brief Create a new shared_ptr on heap from shared_ptr t to prevent -/// the managed object from being garbage-collected. -/// -/// \return address of the newly created shared pointer -template -jlong CreateNativeRef(std::shared_ptr t) { - std::shared_ptr* retained_ptr = new std::shared_ptr(t); - return reinterpret_cast(retained_ptr); -} - -/// \brief Get the shared_ptr that was derived via function CreateNativeRef. -/// -/// \param[in] ref address of the shared_ptr -/// \return the shared_ptr object -template -std::shared_ptr RetrieveNativeInstance(jlong ref) { - std::shared_ptr* retrieved_ptr = reinterpret_cast*>(ref); - return *retrieved_ptr; -} - -/// \brief Destroy a shared_ptr using its memory address. -/// -/// \param[in] ref address of the shared_ptr -template -void ReleaseNativeRef(jlong ref) { - std::shared_ptr* retrieved_ptr = reinterpret_cast*>(ref); - delete retrieved_ptr; -} - -// Indicate an exception thrown during calling Java method via JNI. -// Not thread safe. -class JavaErrorDetail : public StatusDetail { - public: - JavaErrorDetail(JavaVM* vm, jthrowable cause); - virtual ~JavaErrorDetail(); - - const char* type_id() const override; - std::string ToString() const override; - jthrowable GetCause() const; - - private: - JavaVM* vm_; - jthrowable cause_; -}; - -/// Listener to act on reservations/unreservations from ReservationListenableMemoryPool. -/// -/// Note the memory pool will call this listener only on block-level memory -/// reservation/unreservation is granted. So the invocation parameter "size" is always -/// multiple of block size (by default, 512k) specified in memory pool. -class ReservationListener { - public: - virtual ~ReservationListener() = default; - - virtual arrow::Status OnReservation(int64_t size) = 0; - virtual arrow::Status OnRelease(int64_t size) = 0; - - protected: - ReservationListener() = default; -}; - -/// A memory pool implementation for pre-reserving memory blocks from a -/// customizable listener. This will typically be used when memory allocations -/// have to be subject to another "virtual" resource manager, which just tracks or -/// limits number of bytes of application's overall memory usage. The underlying -/// memory pool will still be responsible for actual malloc/free operations. -class ReservationListenableMemoryPool : public arrow::MemoryPool { - public: - /// \brief Constructor. - /// - /// \param[in] pool the underlying memory pool - /// \param[in] listener a listener for block-level reservations/releases. - /// \param[in] block_size size of each block to reserve from the listener - explicit ReservationListenableMemoryPool(MemoryPool* pool, - std::shared_ptr listener, - int64_t block_size = 512 * 1024); - - ~ReservationListenableMemoryPool(); - - using MemoryPool::Allocate; - using MemoryPool::Free; - using MemoryPool::Reallocate; - - arrow::Status Allocate(int64_t size, int64_t alignment, uint8_t** out) override; - - arrow::Status Reallocate(int64_t old_size, int64_t new_size, int64_t alignment, - uint8_t** ptr) override; - - void Free(uint8_t* buffer, int64_t size, int64_t alignment) override; - - int64_t bytes_allocated() const override; - - int64_t max_memory() const override; - - int64_t total_bytes_allocated() const override; - - int64_t num_allocations() const override; - - std::string backend_name() const override; - - std::shared_ptr get_listener(); - - private: - class Impl; - std::unique_ptr impl_; -}; - -} // namespace jni -} // namespace dataset -} // namespace arrow diff --git a/java/dataset/src/main/cpp/jni_util_test.cc b/java/dataset/src/main/cpp/jni_util_test.cc deleted file mode 100644 index a15a9322521a5..0000000000000 --- a/java/dataset/src/main/cpp/jni_util_test.cc +++ /dev/null @@ -1,135 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "jni_util.h" - -#include - -#include "arrow/memory_pool.h" -#include "arrow/testing/gtest_util.h" - -namespace arrow { -namespace dataset { -namespace jni { - -class MyListener : public ReservationListener { - public: - Status OnReservation(int64_t size) override { - bytes_reserved_ += size; - reservation_count_++; - return arrow::Status::OK(); - } - - Status OnRelease(int64_t size) override { - bytes_reserved_ -= size; - release_count_++; - return arrow::Status::OK(); - } - - int64_t bytes_reserved() { return bytes_reserved_; } - - int32_t reservation_count() const { return reservation_count_; } - - int32_t release_count() const { return release_count_; } - - private: - int64_t bytes_reserved_; - int32_t reservation_count_; - int32_t release_count_; -}; - -TEST(ReservationListenableMemoryPool, Basic) { - auto pool = MemoryPool::CreateDefault(); - auto listener = std::make_shared(); - ReservationListenableMemoryPool rlp(pool.get(), listener); - - uint8_t* data; - ASSERT_OK(rlp.Allocate(100, &data)); - - uint8_t* data2; - ASSERT_OK(rlp.Allocate(100, &data2)); - - rlp.Free(data, 100); - rlp.Free(data2, 100); - - ASSERT_EQ(200, rlp.max_memory()); - ASSERT_EQ(200, pool->max_memory()); -} - -TEST(ReservationListenableMemoryPool, Listener) { - auto pool = MemoryPool::CreateDefault(); - auto listener = std::make_shared(); - ReservationListenableMemoryPool rlp(pool.get(), listener); - - uint8_t* data; - ASSERT_OK(rlp.Allocate(100, &data)); - - uint8_t* data2; - ASSERT_OK(rlp.Allocate(100, &data2)); - - ASSERT_EQ(200, rlp.bytes_allocated()); - ASSERT_EQ(512 * 1024, listener->bytes_reserved()); - - rlp.Free(data, 100); - rlp.Free(data2, 100); - - ASSERT_EQ(0, rlp.bytes_allocated()); - ASSERT_EQ(0, listener->bytes_reserved()); - ASSERT_EQ(1, listener->reservation_count()); - ASSERT_EQ(1, listener->release_count()); -} - -TEST(ReservationListenableMemoryPool, BlockSize) { - auto pool = MemoryPool::CreateDefault(); - auto listener = std::make_shared(); - ReservationListenableMemoryPool rlp(pool.get(), listener, 100); - - uint8_t* data; - ASSERT_OK(rlp.Allocate(100, &data)); - - ASSERT_EQ(100, rlp.bytes_allocated()); - ASSERT_EQ(100, listener->bytes_reserved()); - - rlp.Free(data, 100); - - ASSERT_EQ(0, rlp.bytes_allocated()); - ASSERT_EQ(0, listener->bytes_reserved()); -} - -TEST(ReservationListenableMemoryPool, BlockSize2) { - auto pool = MemoryPool::CreateDefault(); - auto listener = std::make_shared(); - ReservationListenableMemoryPool rlp(pool.get(), listener, 99); - - uint8_t* data; - ASSERT_OK(rlp.Allocate(100, &data)); - - ASSERT_EQ(100, rlp.bytes_allocated()); - ASSERT_EQ(198, listener->bytes_reserved()); - - rlp.Free(data, 100); - - ASSERT_EQ(0, rlp.bytes_allocated()); - ASSERT_EQ(0, listener->bytes_reserved()); - - ASSERT_EQ(1, listener->reservation_count()); - ASSERT_EQ(1, listener->release_count()); -} - -} // namespace jni -} // namespace dataset -} // namespace arrow diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc deleted file mode 100644 index 49cc85251c8e9..0000000000000 --- a/java/dataset/src/main/cpp/jni_wrapper.cc +++ /dev/null @@ -1,1013 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include "arrow/array.h" -#include "arrow/array/concatenate.h" -#include "arrow/c/bridge.h" -#include "arrow/c/helpers.h" -#include "arrow/dataset/api.h" -#include "arrow/dataset/file_base.h" -#ifdef ARROW_CSV -#include "arrow/dataset/file_csv.h" -#endif -#include "arrow/filesystem/api.h" -#include "arrow/filesystem/path_util.h" -#include "arrow/engine/substrait/util.h" -#include "arrow/engine/substrait/serde.h" -#include "arrow/engine/substrait/relation.h" -#include "arrow/ipc/api.h" -#include "arrow/util/iterator.h" -#include "jni_util.h" -#include "org_apache_arrow_dataset_file_JniWrapper.h" -#include "org_apache_arrow_dataset_jni_JniWrapper.h" -#include "org_apache_arrow_dataset_jni_NativeMemoryPool.h" -#include "org_apache_arrow_dataset_substrait_JniWrapper.h" - -namespace { - -jclass illegal_access_exception_class; -jclass illegal_argument_exception_class; -jclass runtime_exception_class; - -jclass java_reservation_listener_class; - -jmethodID reserve_memory_method; -jmethodID unreserve_memory_method; - -jlong default_memory_pool_id = -1L; - -jint JNI_VERSION = JNI_VERSION_10; - -class JniPendingException : public std::runtime_error { - public: - explicit JniPendingException(const std::string& arg, jthrowable cause) - : runtime_error(arg), cause_(cause) {} - - jthrowable GetCause() const { return cause_; } - bool HasCause() const { return cause_ != nullptr; } - - private: - jthrowable cause_; -}; - -void ThrowPendingException(const std::string& message, jthrowable cause = nullptr) { - throw JniPendingException(message, cause); -} - -void ThrowIfError(const arrow::Status& status) { - const std::shared_ptr& detail = status.detail(); - const std::shared_ptr& maybe_java = - std::dynamic_pointer_cast(detail); - if (maybe_java != nullptr) { - ThrowPendingException(status.message(), maybe_java->GetCause()); - return; - } - if (!status.ok()) { - ThrowPendingException(status.message()); - } -} - -class JNIEnvGuard { - public: - explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), env_(nullptr), should_detach_(false) { - JNIEnv* env; - jint code = vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); - if (code == JNI_EDETACHED) { - JavaVMAttachArgs args; - args.version = JNI_VERSION; - args.name = NULL; - args.group = NULL; - code = vm->AttachCurrentThread(reinterpret_cast(&env), &args); - should_detach_ = (code == JNI_OK); - } - if (code != JNI_OK) { - ThrowPendingException("Failed to attach the current thread to a Java VM"); - } - env_ = env; - } - - JNIEnv* env() { return env_; } - - ~JNIEnvGuard() { - if (should_detach_) { - vm_->DetachCurrentThread(); - should_detach_ = false; - } - } - - private: - JavaVM* vm_; - JNIEnv* env_; - bool should_detach_; -}; - -template -T JniGetOrThrow(arrow::Result result) { - const arrow::Status& status = result.status(); - ThrowIfError(status); - return std::move(result).ValueOrDie(); -} - -void JniAssertOkOrThrow(arrow::Status status) { ThrowIfError(status); } - -void JniThrow(std::string message) { ThrowPendingException(message); } - -arrow::Result> GetFileFormat( - jint file_format_id) { - switch (file_format_id) { - case 0: - return std::make_shared(); - case 1: - return std::make_shared(); -#ifdef ARROW_ORC - case 2: - return std::make_shared(); -#endif -#ifdef ARROW_CSV - case 3: - return std::make_shared(); -#endif -#ifdef ARROW_JSON - case 4: - return std::make_shared(); -#endif - default: - std::string error_message = - "illegal file format id: " + std::to_string(file_format_id); - return arrow::Status::Invalid(error_message); - } -} - -class ReserveFromJava : public arrow::dataset::jni::ReservationListener { - public: - ReserveFromJava(JavaVM* vm, jobject java_reservation_listener) - : vm_(vm), java_reservation_listener_(java_reservation_listener) {} - - arrow::Status OnReservation(int64_t size) override { - try { - JNIEnvGuard guard(vm_); - JNIEnv* env = guard.env(); - env->CallObjectMethod(java_reservation_listener_, reserve_memory_method, size); - RETURN_NOT_OK(arrow::dataset::jni::CheckException(env)); - return arrow::Status::OK(); - } catch (const JniPendingException& e) { - return arrow::Status::Invalid(e.what()); - } - } - - arrow::Status OnRelease(int64_t size) override { - try { - JNIEnvGuard guard(vm_); - JNIEnv* env = guard.env(); - env->CallObjectMethod(java_reservation_listener_, unreserve_memory_method, size); - RETURN_NOT_OK(arrow::dataset::jni::CheckException(env)); - return arrow::Status::OK(); - } catch (const JniPendingException& e) { - return arrow::Status::Invalid(e.what()); - } - } - - jobject GetJavaReservationListener() { return java_reservation_listener_; } - - private: - JavaVM* vm_; - jobject java_reservation_listener_; -}; - -/// \class DisposableScannerAdaptor -/// \brief An adaptor that iterates over a Scanner instance then returns RecordBatches -/// directly. -/// -/// This lessens the complexity of the JNI bridge to make sure it to be easier to -/// maintain. On Java-side, NativeScanner can only produces a single NativeScanTask -/// instance during its whole lifecycle. Each task stands for a DisposableScannerAdaptor -/// instance through JNI bridge. -/// -class DisposableScannerAdaptor { - public: - DisposableScannerAdaptor(std::shared_ptr scanner, - arrow::dataset::TaggedRecordBatchIterator batch_itr) - : scanner_(std::move(scanner)), batch_itr_(std::move(batch_itr)) {} - - static arrow::Result> Create( - std::shared_ptr scanner) { - ARROW_ASSIGN_OR_RAISE(auto batch_itr, scanner->ScanBatches()); - return std::make_shared(scanner, std::move(batch_itr)); - } - - arrow::Result> Next() { - ARROW_ASSIGN_OR_RAISE(std::shared_ptr batch, NextBatch()); - return batch; - } - - const std::shared_ptr& GetScanner() const { return scanner_; } - - private: - std::shared_ptr scanner_; - arrow::dataset::TaggedRecordBatchIterator batch_itr_; - - arrow::Result> NextBatch() { - ARROW_ASSIGN_OR_RAISE(auto batch, batch_itr_.Next()) - return batch.record_batch; - } -}; - -arrow::Result> SchemaFromColumnNames( - const std::shared_ptr& input, - const std::vector& column_names) { - std::vector> columns; - for (arrow::FieldRef ref : column_names) { - auto maybe_field = ref.GetOne(*input); - if (maybe_field.ok()) { - columns.push_back(std::move(maybe_field).ValueOrDie()); - } else { - return arrow::Status::Invalid("Partition column '", ref.ToString(), "' is not in dataset schema"); - } - } - return schema(std::move(columns))->WithMetadata(input->metadata()); -} -} // namespace - -using arrow::dataset::jni::CreateGlobalClassReference; -using arrow::dataset::jni::CreateNativeRef; -using arrow::dataset::jni::FromSchemaByteArray; -using arrow::dataset::jni::GetMethodID; -using arrow::dataset::jni::JStringToCString; -using arrow::dataset::jni::ReleaseNativeRef; -using arrow::dataset::jni::RetrieveNativeInstance; -using arrow::dataset::jni::ToSchemaByteArray; -using arrow::dataset::jni::ToStringVector; - -using arrow::dataset::jni::ReservationListenableMemoryPool; -using arrow::dataset::jni::ReservationListener; - -#define JNI_METHOD_START try { -// macro ended - -#define JNI_METHOD_END(fallback_expr) \ - } \ - catch (JniPendingException & e) { \ - if (e.HasCause()) { \ - env->Throw(e.GetCause()); \ - return fallback_expr; \ - } \ - env->ThrowNew(runtime_exception_class, e.what()); \ - return fallback_expr; \ - } -// macro ended - -jint JNI_OnLoad(JavaVM* vm, void* reserved) { - JNIEnv* env; - if (vm->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) { - return JNI_ERR; - } - JNI_METHOD_START - illegal_access_exception_class = - CreateGlobalClassReference(env, "Ljava/lang/IllegalAccessException;"); - illegal_argument_exception_class = - CreateGlobalClassReference(env, "Ljava/lang/IllegalArgumentException;"); - runtime_exception_class = - CreateGlobalClassReference(env, "Ljava/lang/RuntimeException;"); - - java_reservation_listener_class = - CreateGlobalClassReference(env, - "Lorg/apache/arrow/" - "dataset/jni/ReservationListener;"); - reserve_memory_method = - JniGetOrThrow(GetMethodID(env, java_reservation_listener_class, "reserve", "(J)V")); - unreserve_memory_method = JniGetOrThrow( - GetMethodID(env, java_reservation_listener_class, "unreserve", "(J)V")); - - default_memory_pool_id = reinterpret_cast(arrow::default_memory_pool()); - return JNI_VERSION; - JNI_METHOD_END(JNI_ERR) -} - -void JNI_OnUnload(JavaVM* vm, void* reserved) { - JNIEnv* env; - vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); - env->DeleteGlobalRef(illegal_access_exception_class); - env->DeleteGlobalRef(illegal_argument_exception_class); - env->DeleteGlobalRef(runtime_exception_class); - env->DeleteGlobalRef(java_reservation_listener_class); - - default_memory_pool_id = -1L; -} - -/// Unpack the named tables passed through JNI. -/// -/// Named tables are encoded as a string array, where every two elements -/// encode (1) the table name and (2) the address of an ArrowArrayStream -/// containing the table data. This function will eagerly read all -/// tables into Tables. -std::unordered_map> LoadNamedTables(JNIEnv* env, const jobjectArray& str_array) { - std::unordered_map> map_table_to_record_batch_reader; - int length = env->GetArrayLength(str_array); - if (length % 2 != 0) { - JniThrow("Cannot map odd number of array elements to key/value pairs"); - } - std::shared_ptr output_table; - for (int pos = 0; pos < length; pos++) { - auto j_string_key = reinterpret_cast(env->GetObjectArrayElement(str_array, pos)); - pos++; - auto j_string_value = reinterpret_cast(env->GetObjectArrayElement(str_array, pos)); - uintptr_t memory_address = 0; - try { - memory_address = std::stol(JStringToCString(env, j_string_value)); - } catch(const std::exception& ex) { - JniThrow("Failed to parse memory address from string value. Error: " + std::string(ex.what())); - } catch (...) { - JniThrow("Failed to parse memory address from string value."); - } - auto* arrow_stream_in = reinterpret_cast(memory_address); - std::shared_ptr readerIn = JniGetOrThrow(arrow::ImportRecordBatchReader(arrow_stream_in)); - output_table = JniGetOrThrow(readerIn->ToTable()); - map_table_to_record_batch_reader[JStringToCString(env, j_string_key)] = output_table; - } - return map_table_to_record_batch_reader; -} - -/// Find the arrow Table associated with a given table name -std::shared_ptr GetTableByName(const std::vector& names, - const std::unordered_map>& tables) { - if (names.size() != 1) { - JniThrow("Tables with hierarchical names are not supported"); - } - const auto& it = tables.find(names[0]); - if (it == tables.end()) { - JniThrow("Table is referenced, but not provided: " + names[0]); - } - return it->second; -} - -std::shared_ptr LoadArrowBufferFromByteBuffer(JNIEnv* env, jobject byte_buffer) { - const auto *buff = reinterpret_cast(env->GetDirectBufferAddress(byte_buffer)); - int length = env->GetDirectBufferCapacity(byte_buffer); - std::shared_ptr buffer = JniGetOrThrow(arrow::AllocateBuffer(length)); - std::memcpy(buffer->mutable_data(), buff, length); - return buffer; -} - -inline bool ParseBool(const std::string& value) { return value == "true" ? true : false; } - -inline char ParseChar(const std::string& key, const std::string& value) { - if (value.size() != 1) { - JniThrow("Option " + key + " should be a char, but is " + value); - } - return value.at(0); -} - -/// \brief Construct FragmentScanOptions from config map -#ifdef ARROW_CSV - -bool SetCsvConvertOptions(arrow::csv::ConvertOptions& options, const std::string& key, - const std::string& value) { - if (key == "column_types") { - int64_t schema_address = std::stol(value); - ArrowSchema* c_schema = reinterpret_cast(schema_address); - auto schema = JniGetOrThrow(arrow::ImportSchema(c_schema)); - auto& column_types = options.column_types; - for (auto field : schema->fields()) { - column_types[field->name()] = field->type(); - } - } else if (key == "strings_can_be_null") { - options.strings_can_be_null = ParseBool(value); - } else if (key == "check_utf8") { - options.check_utf8 = ParseBool(value); - } else if (key == "null_values") { - options.null_values = {value}; - } else if (key == "true_values") { - options.true_values = {value}; - } else if (key == "false_values") { - options.false_values = {value}; - } else if (key == "quoted_strings_can_be_null") { - options.quoted_strings_can_be_null = ParseBool(value); - } else if (key == "auto_dict_encode") { - options.auto_dict_encode = ParseBool(value); - } else if (key == "auto_dict_max_cardinality") { - options.auto_dict_max_cardinality = std::stoi(value); - } else if (key == "decimal_point") { - options.decimal_point = ParseChar(key, value); - } else if (key == "include_missing_columns") { - options.include_missing_columns = ParseBool(value); - } else { - return false; - } - return true; -} - -bool SetCsvParseOptions(arrow::csv::ParseOptions& options, const std::string& key, - const std::string& value) { - if (key == "delimiter") { - options.delimiter = ParseChar(key, value); - } else if (key == "quoting") { - options.quoting = ParseBool(value); - } else if (key == "quote_char") { - options.quote_char = ParseChar(key, value); - } else if (key == "double_quote") { - options.double_quote = ParseBool(value); - } else if (key == "escaping") { - options.escaping = ParseBool(value); - } else if (key == "escape_char") { - options.escape_char = ParseChar(key, value); - } else if (key == "newlines_in_values") { - options.newlines_in_values = ParseBool(value); - } else if (key == "ignore_empty_lines") { - options.ignore_empty_lines = ParseBool(value); - } else { - return false; - } - return true; -} - -bool SetCsvReadOptions(arrow::csv::ReadOptions& options, const std::string& key, - const std::string& value) { - if (key == "use_threads") { - options.use_threads = ParseBool(value); - } else if (key == "block_size") { - options.block_size = std::stoi(value); - } else if (key == "skip_rows") { - options.skip_rows = std::stoi(value); - } else if (key == "skip_rows_after_names") { - options.skip_rows_after_names = std::stoi(value); - } else if (key == "autogenerate_column_names") { - options.autogenerate_column_names = ParseBool(value); - } else { - return false; - } - return true; -} - -std::shared_ptr ToCsvFragmentScanOptions( - const std::unordered_map& configs) { - std::shared_ptr options = - std::make_shared(); - for (const auto& [key, value] : configs) { - bool setValid = SetCsvParseOptions(options->parse_options, key, value) || - SetCsvConvertOptions(options->convert_options, key, value) || - SetCsvReadOptions(options->read_options, key, value); - if (!setValid) { - JniThrow("Config " + key + " is not supported."); - } - } - return options; -} -#endif - -arrow::Result> -GetFragmentScanOptions(jint file_format_id, - const std::unordered_map& configs) { - switch (file_format_id) { -#ifdef ARROW_CSV - case 3: - return ToCsvFragmentScanOptions(configs); -#endif - default: - return arrow::Status::Invalid("Illegal file format id: ", file_format_id); - } -} - -std::unordered_map ToStringMap(JNIEnv* env, - jobjectArray& str_array) { - int length = env->GetArrayLength(str_array); - std::unordered_map map; - map.reserve(length / 2); - for (int i = 0; i < length; i += 2) { - auto key = reinterpret_cast(env->GetObjectArrayElement(str_array, i)); - auto value = reinterpret_cast(env->GetObjectArrayElement(str_array, i + 1)); - map[JStringToCString(env, key)] = JStringToCString(env, value); - } - return map; -} - -/* - * Class: org_apache_arrow_dataset_jni_NativeMemoryPool - * Method: getDefaultMemoryPool - * Signature: ()J - */ -JNIEXPORT jlong JNICALL -Java_org_apache_arrow_dataset_jni_NativeMemoryPool_getDefaultMemoryPool(JNIEnv* env, - jclass) { - JNI_METHOD_START - return default_memory_pool_id; - JNI_METHOD_END(-1L) -} - -/* - * Class: org_apache_arrow_dataset_jni_NativeMemoryPool - * Method: createListenableMemoryPool - * Signature: (Lorg/apache/arrow/memory/ReservationListener;)J - */ -JNIEXPORT jlong JNICALL -Java_org_apache_arrow_dataset_jni_NativeMemoryPool_createListenableMemoryPool( - JNIEnv* env, jclass, jobject jlistener) { - JNI_METHOD_START - jobject jlistener_ref = env->NewGlobalRef(jlistener); - JavaVM* vm; - if (env->GetJavaVM(&vm) != JNI_OK) { - JniThrow("Unable to get JavaVM instance"); - } - std::shared_ptr listener = - std::make_shared(vm, jlistener_ref); - auto memory_pool = - new ReservationListenableMemoryPool(arrow::default_memory_pool(), listener); - return reinterpret_cast(memory_pool); - JNI_METHOD_END(-1L) -} - -/* - * Class: org_apache_arrow_dataset_jni_NativeMemoryPool - * Method: releaseMemoryPool - * Signature: (J)V - */ -JNIEXPORT void JNICALL -Java_org_apache_arrow_dataset_jni_NativeMemoryPool_releaseMemoryPool( - JNIEnv* env, jclass, jlong memory_pool_id) { - JNI_METHOD_START - if (memory_pool_id == default_memory_pool_id) { - return; - } - ReservationListenableMemoryPool* pool = - reinterpret_cast(memory_pool_id); - if (pool == nullptr) { - return; - } - std::shared_ptr rm = - std::dynamic_pointer_cast(pool->get_listener()); - if (rm == nullptr) { - delete pool; - return; - } - delete pool; - env->DeleteGlobalRef(rm->GetJavaReservationListener()); - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_dataset_jni_NativeMemoryPool - * Method: bytesAllocated - * Signature: (J)J - */ -JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_NativeMemoryPool_bytesAllocated( - JNIEnv* env, jclass, jlong memory_pool_id) { - JNI_METHOD_START - arrow::MemoryPool* pool = reinterpret_cast(memory_pool_id); - if (pool == nullptr) { - JniThrow("Memory pool instance not found. It may not exist or have been closed"); - } - return pool->bytes_allocated(); - JNI_METHOD_END(-1L) -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: closeDatasetFactory - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_closeDatasetFactory( - JNIEnv* env, jobject, jlong id) { - JNI_METHOD_START - ReleaseNativeRef(id); - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: inspectSchema - * Signature: (J)[B - */ -JNIEXPORT jbyteArray JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_inspectSchema( - JNIEnv* env, jobject, jlong dataset_factor_id) { - JNI_METHOD_START - std::shared_ptr d = - RetrieveNativeInstance(dataset_factor_id); - std::shared_ptr schema = JniGetOrThrow(d->Inspect()); - return JniGetOrThrow(ToSchemaByteArray(env, schema)); - JNI_METHOD_END(nullptr) -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: createDataset - * Signature: (J[B)J - */ -JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createDataset( - JNIEnv* env, jobject, jlong dataset_factory_id, jbyteArray schema_bytes) { - JNI_METHOD_START - std::shared_ptr d = - RetrieveNativeInstance(dataset_factory_id); - std::shared_ptr schema; - schema = JniGetOrThrow(FromSchemaByteArray(env, schema_bytes)); - std::shared_ptr dataset = JniGetOrThrow(d->Finish(schema)); - return CreateNativeRef(dataset); - JNI_METHOD_END(-1L) -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: closeDataset - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_closeDataset( - JNIEnv* env, jobject, jlong id) { - JNI_METHOD_START - ReleaseNativeRef(id); - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: createScanner - * Signature: - * (J[Ljava/lang/String;Ljava/nio/ByteBuffer;Ljava/nio/ByteBuffer;JI;[Ljava/lang/String;J)J - */ -JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScanner( - JNIEnv* env, jobject, jlong dataset_id, jobjectArray columns, - jobject substrait_projection, jobject substrait_filter, jlong batch_size, - jint file_format_id, jobjectArray options, jlong memory_pool_id) { - JNI_METHOD_START - arrow::MemoryPool* pool = reinterpret_cast(memory_pool_id); - if (pool == nullptr) { - JniThrow("Memory pool does not exist or has been closed"); - } - std::shared_ptr dataset = - RetrieveNativeInstance(dataset_id); - std::shared_ptr scanner_builder = - JniGetOrThrow(dataset->NewScan()); - JniAssertOkOrThrow(scanner_builder->Pool(pool)); - if (columns != nullptr) { - std::vector column_vector = ToStringVector(env, columns); - JniAssertOkOrThrow(scanner_builder->Project(column_vector)); - } - if (substrait_projection != nullptr) { - std::shared_ptr buffer = LoadArrowBufferFromByteBuffer(env, - substrait_projection); - std::vector project_exprs; - std::vector project_names; - arrow::engine::BoundExpressions bounded_expression = - JniGetOrThrow(arrow::engine::DeserializeExpressions(*buffer)); - for(arrow::engine::NamedExpression& named_expression : - bounded_expression.named_expressions) { - project_exprs.push_back(std::move(named_expression.expression)); - project_names.push_back(std::move(named_expression.name)); - } - JniAssertOkOrThrow(scanner_builder->Project(std::move(project_exprs), std::move(project_names))); - } - if (substrait_filter != nullptr) { - std::shared_ptr buffer = LoadArrowBufferFromByteBuffer(env, - substrait_filter); - std::optional filter_expr = std::nullopt; - arrow::engine::BoundExpressions bounded_expression = - JniGetOrThrow(arrow::engine::DeserializeExpressions(*buffer)); - for(arrow::engine::NamedExpression& named_expression : - bounded_expression.named_expressions) { - filter_expr = named_expression.expression; - if (named_expression.expression.type()->id() == arrow::Type::BOOL) { - filter_expr = named_expression.expression; - } else { - JniThrow("There is no filter expression in the expression provided"); - } - } - if (filter_expr == std::nullopt) { - JniThrow("The filter expression has not been provided"); - } - JniAssertOkOrThrow(scanner_builder->Filter(*filter_expr)); - } - if (file_format_id != -1 && options != nullptr) { - std::unordered_map option_map = ToStringMap(env, options); - std::shared_ptr scan_options = - JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map)); - JniAssertOkOrThrow(scanner_builder->FragmentScanOptions(scan_options)); - } - JniAssertOkOrThrow(scanner_builder->BatchSize(batch_size)); - - auto scanner = JniGetOrThrow(scanner_builder->Finish()); - std::shared_ptr scanner_adaptor = - JniGetOrThrow(DisposableScannerAdaptor::Create(scanner)); - jlong id = CreateNativeRef(scanner_adaptor); - return id; - JNI_METHOD_END(-1L) -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: closeScanner - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_closeScanner( - JNIEnv* env, jobject, jlong scanner_id) { - JNI_METHOD_START - ReleaseNativeRef(scanner_id); - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: getSchemaFromScanner - * Signature: (J)[B - */ -JNIEXPORT jbyteArray JNICALL -Java_org_apache_arrow_dataset_jni_JniWrapper_getSchemaFromScanner(JNIEnv* env, jobject, - jlong scanner_id) { - JNI_METHOD_START - std::shared_ptr schema = - RetrieveNativeInstance(scanner_id) - ->GetScanner() - ->options() - ->projected_schema; - return JniGetOrThrow(ToSchemaByteArray(env, schema)); - JNI_METHOD_END(nullptr) -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: nextRecordBatch - * Signature: (JJ)Z - */ -JNIEXPORT jboolean JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_nextRecordBatch( - JNIEnv* env, jobject, jlong scanner_id, jlong struct_array) { - JNI_METHOD_START - std::shared_ptr scanner_adaptor = - RetrieveNativeInstance(scanner_id); - - std::shared_ptr record_batch = - JniGetOrThrow(scanner_adaptor->Next()); - if (record_batch == nullptr) { - return false; // stream ended - } - std::vector> offset_zeroed_arrays; - for (int i = 0; i < record_batch->num_columns(); ++i) { - // TODO: If the array has an offset then we need to de-offset the array - // in order for it to be properly consumed on the Java end. - // This forces a copy, it would be nice to avoid this if Java - // could consume offset-arrays. Perhaps at some point in the future - // using the C data interface. See ARROW-15275 - // - // Generally a non-zero offset will occur whenever the scanner batch - // size is smaller than the batch size of the underlying files. - std::shared_ptr array = record_batch->column(i); - if (array->offset() == 0) { - offset_zeroed_arrays.push_back(array); - continue; - } - std::shared_ptr offset_zeroed = - JniGetOrThrow(arrow::Concatenate({array})); - offset_zeroed_arrays.push_back(offset_zeroed); - } - - std::shared_ptr offset_zeroed_batch = arrow::RecordBatch::Make( - record_batch->schema(), record_batch->num_rows(), offset_zeroed_arrays); - JniAssertOkOrThrow( - arrow::dataset::jni::ExportRecordBatch(env, offset_zeroed_batch, struct_array)); - return true; - JNI_METHOD_END(false) -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: releaseBuffer - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffer( - JNIEnv* env, jobject, jlong id) { - JNI_METHOD_START - ReleaseNativeRef(id); - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_dataset_jni_JniWrapper - * Method: ensureS3Finalized - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized( - JNIEnv* env, jobject) { - JNI_METHOD_START -#ifdef ARROW_S3 - JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized()); -#endif - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_dataset_file_JniWrapper - * Method: makeFileSystemDatasetFactory - * Signature: (Ljava/lang/String;II;Ljava/lang/String;Ljava/lang/String)J - */ -JNIEXPORT jlong JNICALL -Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory( - JNIEnv* env, jobject, jstring uri, jint file_format_id, jobjectArray options) { - JNI_METHOD_START - std::shared_ptr file_format = - JniGetOrThrow(GetFileFormat(file_format_id)); - if (options != nullptr) { - std::unordered_map option_map = ToStringMap(env, options); - std::shared_ptr scan_options = - JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map)); - file_format->default_fragment_scan_options = scan_options; -#ifdef ARROW_CSV - if (file_format_id == 3) { - std::shared_ptr csv_file_format = - std::dynamic_pointer_cast(file_format); - csv_file_format->parse_options = - std::dynamic_pointer_cast(scan_options) - ->parse_options; - } -#endif - } - arrow::dataset::FileSystemFactoryOptions options; - std::shared_ptr d = - JniGetOrThrow(arrow::dataset::FileSystemDatasetFactory::Make( - JStringToCString(env, uri), file_format, options)); - return CreateNativeRef(d); - JNI_METHOD_END(-1L) -} - -/* - * Class: org_apache_arrow_dataset_file_JniWrapper - * Method: makeFileSystemDatasetFactoryWithFiles - * Signature: ([Ljava/lang/String;II;[Ljava/lang/String)J - */ -JNIEXPORT jlong JNICALL -Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactoryWithFiles( - JNIEnv* env, jobject, jobjectArray uris, jint file_format_id, jobjectArray options) { - JNI_METHOD_START - - std::shared_ptr file_format = - JniGetOrThrow(GetFileFormat(file_format_id)); - if (options != nullptr) { - std::unordered_map option_map = ToStringMap(env, options); - std::shared_ptr scan_options = - JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map)); - file_format->default_fragment_scan_options = scan_options; -#ifdef ARROW_CSV - if (file_format_id == 3) { - std::shared_ptr csv_file_format = - std::dynamic_pointer_cast(file_format); - csv_file_format->parse_options = - std::dynamic_pointer_cast(scan_options) - ->parse_options; - } -#endif - } - arrow::dataset::FileSystemFactoryOptions options; - - std::vector uri_vec = ToStringVector(env, uris); - if (uri_vec.size() == 0) { - JniThrow("No URIs provided."); - } - - // If not all URIs, throw exception - if (auto elem = std::find_if_not(uri_vec.begin(), uri_vec.end(), arrow::fs::internal::IsLikelyUri); - elem != uri_vec.end()) { - JniThrow("Unrecognized file type in URI: " + *elem); - } - - std::vector output_paths; - std::string first_path; - // We know that uri_vec isn't empty, from the conditional above - auto fs = JniGetOrThrow(arrow::fs::FileSystemFromUri(uri_vec[0], &first_path)); - output_paths.push_back(first_path); - - std::transform(uri_vec.begin() + 1, uri_vec.end(), std::back_inserter(output_paths), - [&](const auto& s) -> std::string { - auto result = JniGetOrThrow(fs->PathFromUri(s)); - return std::move(result); - }); - - std::shared_ptr d = - JniGetOrThrow(arrow::dataset::FileSystemDatasetFactory::Make( - std::move(fs), std::move(output_paths), file_format, options)); - return CreateNativeRef(d); - JNI_METHOD_END(-1L) -} - -/* - * Class: org_apache_arrow_dataset_file_JniWrapper - * Method: writeFromScannerToFile - * Signature: - * (JJJLjava/lang/String;[Ljava/lang/String;ILjava/lang/String;)V - */ -JNIEXPORT void JNICALL -Java_org_apache_arrow_dataset_file_JniWrapper_writeFromScannerToFile( - JNIEnv* env, jobject, jlong c_arrow_array_stream_address, - jlong file_format_id, jstring uri, jobjectArray partition_columns, - jint max_partitions, jstring base_name_template) { - JNI_METHOD_START - JavaVM* vm; - if (env->GetJavaVM(&vm) != JNI_OK) { - JniThrow("Unable to get JavaVM instance"); - } - - auto* arrow_stream = reinterpret_cast(c_arrow_array_stream_address); - std::shared_ptr reader = - JniGetOrThrow(arrow::ImportRecordBatchReader(arrow_stream)); - std::shared_ptr scanner_builder = - arrow::dataset::ScannerBuilder::FromRecordBatchReader(reader); - JniAssertOkOrThrow(scanner_builder->Pool(arrow::default_memory_pool())); - auto scanner = JniGetOrThrow(scanner_builder->Finish()); - - std::shared_ptr schema = reader->schema(); - - std::shared_ptr file_format = - JniGetOrThrow(GetFileFormat(file_format_id)); - arrow::dataset::FileSystemDatasetWriteOptions options; - std::string output_path; - auto filesystem = JniGetOrThrow( - arrow::fs::FileSystemFromUri(JStringToCString(env, uri), &output_path)); - std::vector partition_column_vector = - ToStringVector(env, partition_columns); - options.file_write_options = file_format->DefaultWriteOptions(); - options.filesystem = filesystem; - options.base_dir = output_path; - options.basename_template = JStringToCString(env, base_name_template); - options.partitioning = std::make_shared( - SchemaFromColumnNames(schema, partition_column_vector).ValueOrDie()); - options.max_partitions = max_partitions; - JniAssertOkOrThrow(arrow::dataset::FileSystemDataset::Write(options, scanner)); - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_dataset_substrait_JniWrapper - * Method: executeSerializedPlan - * Signature: (Ljava/lang/String;[Ljava/lang/String;J)V - */ -JNIEXPORT void JNICALL - Java_org_apache_arrow_dataset_substrait_JniWrapper_executeSerializedPlan__Ljava_lang_String_2_3Ljava_lang_String_2J ( - JNIEnv* env, jobject, jstring plan, jobjectArray table_to_memory_address_input, - jlong memory_address_output) { - JNI_METHOD_START - // get mapping of table name to memory address - std::unordered_map> map_table_to_reader = - LoadNamedTables(env, table_to_memory_address_input); - // create table provider - arrow::engine::NamedTableProvider table_provider = - [&map_table_to_reader](const std::vector& names, const arrow::Schema&) { - std::shared_ptr output_table = GetTableByName(names, map_table_to_reader); - std::shared_ptr options = - std::make_shared(std::move(output_table)); - return arrow::acero::Declaration("table_source", {}, options, "java_source"); - }; - arrow::engine::ConversionOptions conversion_options; - conversion_options.named_table_provider = std::move(table_provider); - // execute plan - std::shared_ptr buffer = JniGetOrThrow(arrow::engine::SerializeJsonPlan( - JStringToCString(env, plan))); - std::shared_ptr reader_out = - JniGetOrThrow(arrow::engine::ExecuteSerializedPlan(*buffer, nullptr, nullptr, conversion_options)); - auto* arrow_stream_out = reinterpret_cast(memory_address_output); - JniAssertOkOrThrow(arrow::ExportRecordBatchReader(reader_out, arrow_stream_out)); - JNI_METHOD_END() -} - -/* - * Class: org_apache_arrow_dataset_substrait_JniWrapper - * Method: executeSerializedPlan - * Signature: (Ljava/nio/ByteBuffer;[Ljava/lang/String;J)V - */ -JNIEXPORT void JNICALL - Java_org_apache_arrow_dataset_substrait_JniWrapper_executeSerializedPlan__Ljava_nio_ByteBuffer_2_3Ljava_lang_String_2J ( - JNIEnv* env, jobject, jobject plan, jobjectArray table_to_memory_address_input, - jlong memory_address_output) { - JNI_METHOD_START - // get mapping of table name to memory address - std::unordered_map> map_table_to_reader = - LoadNamedTables(env, table_to_memory_address_input); - // create table provider - arrow::engine::NamedTableProvider table_provider = - [&map_table_to_reader](const std::vector& names, const arrow::Schema&) { - std::shared_ptr output_table = GetTableByName(names, map_table_to_reader); - std::shared_ptr options = - std::make_shared(std::move(output_table)); - return arrow::acero::Declaration("table_source", {}, options, "java_source"); - }; - arrow::engine::ConversionOptions conversion_options; - conversion_options.named_table_provider = std::move(table_provider); - // mapping arrow::Buffer - std::shared_ptr buffer = LoadArrowBufferFromByteBuffer(env, plan); - // execute plan - std::shared_ptr reader_out = - JniGetOrThrow(arrow::engine::ExecuteSerializedPlan(*buffer, nullptr, nullptr, conversion_options)); - auto* arrow_stream_out = reinterpret_cast(memory_address_output); - JniAssertOkOrThrow(arrow::ExportRecordBatchReader(reader_out, arrow_stream_out)); - JNI_METHOD_END() -} diff --git a/java/dataset/src/main/java/module-info.java b/java/dataset/src/main/java/module-info.java deleted file mode 100644 index 3092281a97eae..0000000000000 --- a/java/dataset/src/main/java/module-info.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -open module org.apache.arrow.dataset { - exports org.apache.arrow.dataset.file; - exports org.apache.arrow.dataset.source; - exports org.apache.arrow.dataset.jni; - exports org.apache.arrow.dataset.substrait; - exports org.apache.arrow.dataset.scanner; - - requires org.apache.arrow.c; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/DatasetFileWriter.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/DatasetFileWriter.java deleted file mode 100644 index 2d3b43180bd00..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/DatasetFileWriter.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.file; - -import org.apache.arrow.c.ArrowArrayStream; -import org.apache.arrow.c.Data; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.ArrowReader; - -/** - * JNI-based utility to write datasets into files. It internally depends on C++ static method - * FileSystemDataset::Write. - */ -public class DatasetFileWriter { - - /** - * Write the contents of an ArrowReader as a dataset. - * - * @param reader the datasource for writing - * @param format target file format - * @param uri target file uri - * @param maxPartitions maximum partitions to be included in written files - * @param partitionColumns columns used to partition output files. Empty to disable partitioning - * @param baseNameTemplate file name template used to make partitions. E.g. "dat_{i}", i is - * current partition ID around all written files. - */ - public static void write( - BufferAllocator allocator, - ArrowReader reader, - FileFormat format, - String uri, - String[] partitionColumns, - int maxPartitions, - String baseNameTemplate) { - try (final ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator)) { - Data.exportArrayStream(allocator, reader, stream); - JniWrapper.get() - .writeFromScannerToFile( - stream.memoryAddress(), - format.id(), - uri, - partitionColumns, - maxPartitions, - baseNameTemplate); - } - } - - /** - * Write the contents of an ArrowReader as a dataset, with default partitioning settings. - * - * @param reader the datasource for writing - * @param format target file format - * @param uri target file uri - */ - public static void write( - BufferAllocator allocator, ArrowReader reader, FileFormat format, String uri) { - write(allocator, reader, format, uri, new String[0], 1024, "data_{i}"); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java deleted file mode 100644 index 843a72d5aa2a3..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileFormat.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.file; - -/** File format definitions. */ -public enum FileFormat { - PARQUET(0), - ARROW_IPC(1), - ORC(2), - CSV(3), - JSON(4), - NONE(-1); - - private final int id; - - FileFormat(int id) { - this.id = id; - } - - public int id() { - return id; - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java deleted file mode 100644 index fcf124a61f812..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.file; - -import java.util.Optional; -import org.apache.arrow.dataset.jni.NativeDatasetFactory; -import org.apache.arrow.dataset.jni.NativeMemoryPool; -import org.apache.arrow.dataset.scanner.FragmentScanOptions; -import org.apache.arrow.memory.BufferAllocator; - -/** Java binding of the C++ FileSystemDatasetFactory. */ -public class FileSystemDatasetFactory extends NativeDatasetFactory { - - public FileSystemDatasetFactory( - BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format, String uri) { - super(allocator, memoryPool, createNative(format, uri, Optional.empty())); - } - - public FileSystemDatasetFactory( - BufferAllocator allocator, - NativeMemoryPool memoryPool, - FileFormat format, - String uri, - Optional fragmentScanOptions) { - super(allocator, memoryPool, createNative(format, uri, fragmentScanOptions)); - } - - public FileSystemDatasetFactory( - BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format, String[] uris) { - super(allocator, memoryPool, createNative(format, uris, Optional.empty())); - } - - public FileSystemDatasetFactory( - BufferAllocator allocator, - NativeMemoryPool memoryPool, - FileFormat format, - String[] uris, - Optional fragmentScanOptions) { - super(allocator, memoryPool, createNative(format, uris, fragmentScanOptions)); - } - - private static long createNative( - FileFormat format, String uri, Optional fragmentScanOptions) { - return JniWrapper.get() - .makeFileSystemDatasetFactory( - uri, format.id(), fragmentScanOptions.map(FragmentScanOptions::serialize).orElse(null)); - } - - private static long createNative( - FileFormat format, String[] uris, Optional fragmentScanOptions) { - return JniWrapper.get() - .makeFileSystemDatasetFactoryWithFiles( - uris, - format.id(), - fragmentScanOptions.map(FragmentScanOptions::serialize).orElse(null)); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java deleted file mode 100644 index d2f842f99e588..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.file; - -import org.apache.arrow.dataset.jni.JniLoader; - -/** - * JniWrapper for filesystem based {@link org.apache.arrow.dataset.source.Dataset} implementations. - */ -public class JniWrapper { - - private static final JniWrapper INSTANCE = new JniWrapper(); - - public static JniWrapper get() { - JniLoader.get().ensureLoaded(); - return INSTANCE; - } - - private JniWrapper() {} - - /** - * Create FileSystemDatasetFactory and return its native pointer. The pointer is pointing to a - * intermediate shared_ptr of the factory instance. - * - * @param uri file uri to read, either a file or a directory - * @param fileFormat file format ID. - * @param serializedFragmentScanOptions serialized FragmentScanOptions. - * @return the native pointer of the arrow::dataset::FileSystemDatasetFactory instance. - * @see FileFormat - */ - public native long makeFileSystemDatasetFactory( - String uri, int fileFormat, String[] serializedFragmentScanOptions); - - /** - * Create FileSystemDatasetFactory and return its native pointer. The pointer is pointing to a - * intermediate shared_ptr of the factory instance. - * - * @param uris List of file uris to read, each path pointing to an individual file - * @param fileFormat file format ID. - * @param serializedFragmentScanOptions serialized FragmentScanOptions. - * @return the native pointer of the arrow::dataset::FileSystemDatasetFactory instance. - * @see FileFormat - */ - public native long makeFileSystemDatasetFactoryWithFiles( - String[] uris, int fileFormat, String[] serializedFragmentScanOptions); - - /** - * Write the content in a {@link org.apache.arrow.c.ArrowArrayStream} into files. This internally - * depends on C++ write API: FileSystemDataset::Write. - * - * @param streamAddress the ArrowArrayStream address - * @param fileFormat target file format (ID) - * @param uri target file uri - * @param partitionColumns columns used to partition output files - * @param maxPartitions maximum partitions to be included in written files - * @param baseNameTemplate file name template used to make partitions. E.g. "dat_{i}", i is - * current partition ID around all written files. - */ - public native void writeFromScannerToFile( - long streamAddress, - long fileFormat, - String uri, - String[] partitionColumns, - int maxPartitions, - String baseNameTemplate); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java deleted file mode 100644 index 078affa750297..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.util.concurrent.atomic.AtomicLong; -import org.apache.arrow.util.VisibleForTesting; - -/** - * Reserving Java direct memory bytes from java.nio.Bits. Used by Java Dataset API's C++ memory pool - * implementation. This makes memory allocated by the pool to be controlled by JVM option - * "-XX:MaxDirectMemorySize". - */ -public class DirectReservationListener implements ReservationListener { - private final Method methodReserve; - private final Method methodUnreserve; - - private DirectReservationListener() { - try { - final Class classBits = Class.forName("java.nio.Bits"); - methodReserve = this.getDeclaredMethodBaseOnJDKVersion(classBits, "reserveMemory"); - methodReserve.setAccessible(true); - methodUnreserve = this.getDeclaredMethodBaseOnJDKVersion(classBits, "unreserveMemory"); - methodUnreserve.setAccessible(true); - } catch (Exception e) { - final RuntimeException failure = - new RuntimeException( - "Failed to initialize DirectReservationListener. When starting Java you must include " - + "`--add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED` " - + "(See https://arrow.apache.org/docs/java/install.html)", - e); - failure.printStackTrace(); - throw failure; - } - } - - private static final DirectReservationListener INSTANCE = new DirectReservationListener(); - - public static DirectReservationListener instance() { - return INSTANCE; - } - - /** Reserve bytes by invoking java.nio.java.Bitjava.nio.Bitss#reserveMemory. */ - @Override - public void reserve(long size) { - try { - if (size > Integer.MAX_VALUE) { - throw new IllegalArgumentException( - "reserve size should not be larger than Integer.MAX_VALUE (0x7fffffff)"); - } - methodReserve.invoke(null, (int) size, (int) size); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** Unreserve bytes by invoking java.nio.java.Bitjava.nio.Bitss#unreserveMemory. */ - @Override - public void unreserve(long size) { - try { - if (size > Integer.MAX_VALUE) { - throw new IllegalArgumentException( - "unreserve size should not be larger than Integer.MAX_VALUE (0x7fffffff)"); - } - methodUnreserve.invoke(null, (int) size, (int) size); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** Get current reservation of jVM direct memory. Visible for testing. */ - @VisibleForTesting - public long getCurrentDirectMemReservation() { - try { - final Class classBits = Class.forName("java.nio.Bits"); - Field f; - try { - f = classBits.getDeclaredField("reservedMemory"); - } catch (NoSuchFieldException e) { - try { - f = classBits.getDeclaredField("RESERVED_MEMORY"); - } catch (NoSuchFieldException ex) { - throw new AssertionError(ex); - } - } - f.setAccessible(true); - return ((AtomicLong) f.get(null)).get(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** - * Get the given method via reflection, searching for different signatures based on the Java - * version. - * - * @param classBits The java.nio.Bits class. - * @param name The method being requested. - * @return The method object. - */ - private Method getDeclaredMethodBaseOnJDKVersion(Class classBits, String name) { - try { - return classBits.getDeclaredMethod(name, long.class, int.class); - } catch (NoSuchMethodException e) { - try { - return classBits.getDeclaredMethod(name, long.class, long.class); - } catch (NoSuchMethodException ex) { - throw new AssertionError(ex); - } - } - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniExceptionDescriber.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniExceptionDescriber.java deleted file mode 100644 index 902d21befc993..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniExceptionDescriber.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import java.io.PrintWriter; -import java.io.StringWriter; - -/** For native code to invoke to convert a java/lang/Throwable to jstring. */ -class JniExceptionDescriber { - private JniExceptionDescriber() {} - - /** - * Convert a java/lang/Throwable to jstring. See codes in arrow::dataset::jni::CheckException for - * more details. - * - * @param throwable the exception instance. - * @return a String including error message and stack trace of the exception. - */ - static String describe(Throwable throwable) { - StringWriter sw = new StringWriter(); - PrintWriter pw = new PrintWriter(sw, true); - throwable.printStackTrace(pw); - return sw.getBuffer().toString(); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java deleted file mode 100644 index 631b8b1bbed66..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Set; - -/** The JniLoader for Dataset API's native implementation. */ -public final class JniLoader { - - private static final JniLoader INSTANCE = - new JniLoader(Collections.singletonList("arrow_dataset_jni")); - - public static JniLoader get() { - return INSTANCE; - } - - private final Set librariesToLoad; - - private JniLoader(List libraryNames) { - librariesToLoad = new HashSet<>(libraryNames); - } - - private boolean finished() { - return librariesToLoad.isEmpty(); - } - - /** If required JNI libraries are not loaded, then load them. */ - public void ensureLoaded() { - if (finished()) { - return; - } - loadRemaining(); - ensureS3FinalizedOnShutdown(); - } - - private synchronized void loadRemaining() { - // The method is protected by a mutex via synchronized, if more than one thread race to call - // loadRemaining, at same time only one will do the actual loading and the others will wait for - // the mutex to be acquired then check on the remaining list: if there are libraries that were - // not - // successfully loaded then the mutex owner will try to load them again. - if (finished()) { - return; - } - List libs = new ArrayList<>(librariesToLoad); - for (String lib : libs) { - load(lib); - librariesToLoad.remove(lib); - } - } - - private void load(String name) { - final String libraryToLoad = - name + "/" + getNormalizedArch() + "/" + System.mapLibraryName(name); - try { - File temp = - File.createTempFile("jnilib-", ".tmp", new File(System.getProperty("java.io.tmpdir"))); - temp.deleteOnExit(); - try (final InputStream is = - JniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) { - if (is == null) { - throw new FileNotFoundException(libraryToLoad); - } - Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); - System.load(temp.getAbsolutePath()); - } - } catch (IOException e) { - throw new IllegalStateException("error loading native libraries: " + e); - } - } - - private String getNormalizedArch() { - String arch = System.getProperty("os.arch").toLowerCase(Locale.US); - switch (arch) { - case "amd64": - arch = "x86_64"; - break; - case "aarch64": - arch = "aarch_64"; - break; - default: - break; - } - return arch; - } - - private void ensureS3FinalizedOnShutdown() { - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - JniWrapper.get().ensureS3Finalized(); - })); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java deleted file mode 100644 index 6637c113d9edc..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import java.nio.ByteBuffer; - -/** JNI wrapper for Dataset API's native implementation. */ -public class JniWrapper { - - private static final JniWrapper INSTANCE = new JniWrapper(); - - public static JniWrapper get() { - JniLoader.get().ensureLoaded(); - return INSTANCE; - } - - private JniWrapper() {} - - /** - * Release the DatasetFactory by destroying its reference held by JNI wrapper. - * - * @param datasetFactoryId the native pointer of the arrow::dataset::DatasetFactory instance. - */ - public native void closeDatasetFactory(long datasetFactoryId); - - /** - * Get a serialized schema from native instance of a DatasetFactory. - * - * @param datasetFactoryId the native pointer of the arrow::dataset::DatasetFactory instance. - * @return the serialized schema - * @see org.apache.arrow.vector.types.pojo.Schema - */ - public native byte[] inspectSchema(long datasetFactoryId); - - /** - * Create Dataset from a DatasetFactory and get the native pointer of the Dataset. - * - * @param datasetFactoryId the native pointer of the arrow::dataset::DatasetFactory instance. - * @param schema the predefined schema of the resulting Dataset. - * @return the native pointer of the arrow::dataset::Dataset instance. - */ - public native long createDataset(long datasetFactoryId, byte[] schema); - - /** - * Release the Dataset by destroying its reference held by JNI wrapper. - * - * @param datasetId the native pointer of the arrow::dataset::Dataset instance. - */ - public native void closeDataset(long datasetId); - - /** - * Create Scanner from a Dataset and get the native pointer of the Dataset. - * - * @param datasetId the native pointer of the arrow::dataset::Dataset instance. - * @param columns desired column names. Columns not in this list will not be emitted when - * performing scan operation. Null equals to "all columns". - * @param substraitProjection substrait extended expression to evaluate for project new columns - * @param substraitFilter substrait extended expression to evaluate for apply filter - * @param batchSize batch size of scanned record batches. - * @param fileFormat file format ID. - * @param serializedFragmentScanOptions serialized FragmentScanOptions. - * @param memoryPool identifier of memory pool used in the native scanner. - * @return the native pointer of the arrow::dataset::Scanner instance. - */ - public native long createScanner( - long datasetId, - String[] columns, - ByteBuffer substraitProjection, - ByteBuffer substraitFilter, - long batchSize, - int fileFormat, - String[] serializedFragmentScanOptions, - long memoryPool); - - /** - * Get a serialized schema from native instance of a Scanner. - * - * @param scannerId the native pointer of the arrow::dataset::Scanner instance. - * @return the serialized schema - * @see org.apache.arrow.vector.types.pojo.Schema - */ - public native byte[] getSchemaFromScanner(long scannerId); - - /** - * Release the Scanner by destroying its reference held by JNI wrapper. - * - * @param scannerId the native pointer of the arrow::dataset::Scanner instance. - */ - public native void closeScanner(long scannerId); - - /** - * Read next record batch from the specified scanner. - * - * @param scannerId the native pointer of the arrow::dataset::Scanner instance. - * @param arrowArray pointer to an empty {@link org.apache.arrow.c.ArrowArray} struct to store C++ - * side record batch that conforms to C data interface. - * @return true if valid record batch is returned; false if stream ended. - */ - public native boolean nextRecordBatch(long scannerId, long arrowArray); - - /** - * Release the Buffer by destroying its reference held by JNI wrapper. - * - * @param bufferId the native pointer of the arrow::Buffer instance. - */ - public native void releaseBuffer(long bufferId); - - /** - * Ensure the S3 APIs are shutdown, but only if not already done. If the S3 APIs are - * uninitialized, then this is a noop. - */ - public native void ensureS3Finalized(); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeContext.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeContext.java deleted file mode 100644 index 175cd5af9fee5..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeContext.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import org.apache.arrow.memory.BufferAllocator; - -/** Context for relevant classes of NativeDataset. */ -public class NativeContext { - private final BufferAllocator allocator; - private final NativeMemoryPool memoryPool; - - /** - * Constructor. - * - * @param allocator The allocator in use. - * @param memoryPool Native memory pool. - */ - public NativeContext(BufferAllocator allocator, NativeMemoryPool memoryPool) { - this.allocator = allocator; - this.memoryPool = memoryPool; - } - - /** Returns the allocator which is in use. */ - public BufferAllocator getAllocator() { - return allocator; - } - - /** Returns the native memory pool. */ - public NativeMemoryPool getMemoryPool() { - return memoryPool; - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java deleted file mode 100644 index 8f8cdc49d4877..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import org.apache.arrow.dataset.scanner.FragmentScanOptions; -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.dataset.source.Dataset; - -/** Native implementation of {@link Dataset}. */ -public class NativeDataset implements Dataset { - - private final NativeContext context; - private final long datasetId; - - private boolean closed = false; - - public NativeDataset(NativeContext context, long datasetId) { - this.context = context; - this.datasetId = datasetId; - } - - @Override - public synchronized NativeScanner newScan(ScanOptions options) { - if (closed) { - throw new NativeInstanceReleasedException(); - } - int fileFormatId = -1; - String[] serialized = null; - if (options.getFragmentScanOptions().isPresent()) { - FragmentScanOptions fragmentScanOptions = options.getFragmentScanOptions().get(); - fileFormatId = fragmentScanOptions.fileFormat().id(); - serialized = fragmentScanOptions.serialize(); - } - long scannerId = - JniWrapper.get() - .createScanner( - datasetId, - options.getColumns().orElse(null), - options.getSubstraitProjection().orElse(null), - options.getSubstraitFilter().orElse(null), - options.getBatchSize(), - fileFormatId, - serialized, - context.getMemoryPool().getNativeInstanceId()); - - return new NativeScanner(context, scannerId); - } - - @Override - public synchronized void close() { - if (closed) { - return; - } - closed = true; - JniWrapper.get().closeDataset(datasetId); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDatasetFactory.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDatasetFactory.java deleted file mode 100644 index d96d9b69fa81c..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDatasetFactory.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import java.io.IOException; -import org.apache.arrow.dataset.source.DatasetFactory; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.SchemaUtility; - -/** Native implementation of {@link DatasetFactory}. */ -public class NativeDatasetFactory implements DatasetFactory { - private final long datasetFactoryId; - private final NativeMemoryPool memoryPool; - private final BufferAllocator allocator; - - private boolean closed = false; - - /** - * Constructor. - * - * @param allocator a context allocator associated with this factory. Any buffer that will be - * created natively will be then bound to this allocator. - * @param memoryPool the native memory pool associated with this factory. Any buffer created - * natively should request for memory spaces from this memory pool. This is a mapped instance - * of c++ arrow::MemoryPool. - * @param datasetFactoryId an ID, at the same time the native pointer of the underlying native - * instance of this factory. Make sure in c++ side the pointer is pointing to the shared - * pointer wrapping the actual instance so we could successfully decrease the reference count - * once {@link #close} is called. - * @see #close() - */ - public NativeDatasetFactory( - BufferAllocator allocator, NativeMemoryPool memoryPool, long datasetFactoryId) { - this.allocator = allocator; - this.memoryPool = memoryPool; - this.datasetFactoryId = datasetFactoryId; - } - - @Override - public Schema inspect() { - final byte[] buffer; - synchronized (this) { - if (closed) { - throw new NativeInstanceReleasedException(); - } - buffer = JniWrapper.get().inspectSchema(datasetFactoryId); - } - try { - return SchemaUtility.deserialize(buffer, allocator); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public NativeDataset finish() { - return finish(inspect()); - } - - @Override - public NativeDataset finish(Schema schema) { - try { - byte[] serialized = SchemaUtility.serialize(schema); - synchronized (this) { - if (closed) { - throw new NativeInstanceReleasedException(); - } - return new NativeDataset( - new NativeContext(allocator, memoryPool), - JniWrapper.get().createDataset(datasetFactoryId, serialized)); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** Close this factory by release the pointer of the native instance. */ - @Override - public synchronized void close() { - if (closed) { - return; - } - closed = true; - JniWrapper.get().closeDatasetFactory(datasetFactoryId); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeInstanceReleasedException.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeInstanceReleasedException.java deleted file mode 100644 index 168086bbb410f..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeInstanceReleasedException.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -/** Thrown if trying to operate on a native instance that is already released. */ -public class NativeInstanceReleasedException extends RuntimeException { - public NativeInstanceReleasedException() { - super("Native instance has been released"); - } - - public NativeInstanceReleasedException(String message) { - super(message); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java deleted file mode 100644 index 7ea3b576e6f45..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -/** C++ memory pool(arrow::MemoryPool)'s Java mapped instance. */ -public class NativeMemoryPool implements AutoCloseable { - private final long nativeInstanceId; - - private NativeMemoryPool(long nativeInstanceId) { - this.nativeInstanceId = nativeInstanceId; - } - - /** Get the default memory pool. This will return arrow::default_memory_pool() directly. */ - public static NativeMemoryPool getDefault() { - JniLoader.get().ensureLoaded(); - return new NativeMemoryPool(getDefaultMemoryPool()); - } - - /** - * Create a listenable memory pool (see also: arrow::ReservationListenableMemoryPool) with a - * specific listener. All buffers created from the memory pool should take enough reservation from - * the listener in advance. - */ - public static NativeMemoryPool createListenable(ReservationListener listener) { - JniLoader.get().ensureLoaded(); - return new NativeMemoryPool(createListenableMemoryPool(listener)); - } - - /** Return native instance ID of this memory pool. */ - public long getNativeInstanceId() { - return nativeInstanceId; - } - - /** Get current allocated bytes. */ - public long getBytesAllocated() { - return bytesAllocated(nativeInstanceId); - } - - @Override - public void close() throws Exception { - releaseMemoryPool(nativeInstanceId); - } - - private static native long getDefaultMemoryPool(); - - private static native long createListenableMemoryPool(ReservationListener listener); - - private static native void releaseMemoryPool(long id); - - private static native long bytesAllocated(long id); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanTask.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanTask.java deleted file mode 100644 index bc7bd4b62c774..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanTask.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import org.apache.arrow.dataset.scanner.ScanTask; -import org.apache.arrow.vector.ipc.ArrowReader; - -/** - * Native implementation of {@link ScanTask}. Currently RecordBatches are iterated directly by the - * scanner id via {@link JniWrapper}, thus we allow only one-time execution of method {@link - * #execute()}. If a re-scan operation is expected, call {@link NativeDataset#newScan} to create a - * new scanner instance. - */ -@Deprecated -public class NativeScanTask implements ScanTask { - private final NativeScanner scanner; - - /** Constructor. */ - public NativeScanTask(NativeScanner scanner) { - this.scanner = scanner; - } - - @Override - public ArrowReader execute() { - return scanner.execute(); - } - - @Override - public void close() { - scanner.close(); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanner.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanner.java deleted file mode 100644 index 3367bdd9818fc..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeScanner.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import java.io.IOException; -import java.util.Collections; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; -import org.apache.arrow.c.ArrowArray; -import org.apache.arrow.c.Data; -import org.apache.arrow.dataset.scanner.Scanner; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.SchemaUtility; - -/** - * Native implementation of {@link Scanner}. Note that it currently emits only a single scan task of - * type {@link NativeScanTask}, which is internally a combination of all scan task instances - * returned by the native scanner. - */ -public class NativeScanner implements Scanner { - - private final AtomicBoolean executed = new AtomicBoolean(false); - private final NativeContext context; - private final long scannerId; - - private final ReadWriteLock lock = new ReentrantReadWriteLock(); - private final Lock writeLock = lock.writeLock(); - private final Lock readLock = lock.readLock(); - private boolean closed = false; - - public NativeScanner(NativeContext context, long scannerId) { - this.context = context; - this.scannerId = scannerId; - } - - ArrowReader execute() { - if (closed) { - throw new NativeInstanceReleasedException(); - } - if (!executed.compareAndSet(false, true)) { - throw new UnsupportedOperationException( - "NativeScanner cannot be executed more than once. Consider creating " - + "new scanner instead"); - } - return new NativeReader(context.getAllocator()); - } - - @Override - public ArrowReader scanBatches() { - if (closed) { - throw new NativeInstanceReleasedException(); - } - if (!executed.compareAndSet(false, true)) { - throw new UnsupportedOperationException( - "NativeScanner can only be executed once. Create a " + "new scanner instead"); - } - return new NativeReader(context.getAllocator()); - } - - @Override - @Deprecated - public Iterable scan() { - if (closed) { - throw new NativeInstanceReleasedException(); - } - return Collections.singletonList(new NativeScanTask(this)); - } - - @Override - public Schema schema() { - readLock.lock(); - try { - if (closed) { - throw new NativeInstanceReleasedException(); - } - return SchemaUtility.deserialize( - JniWrapper.get().getSchemaFromScanner(scannerId), context.getAllocator()); - } catch (IOException e) { - throw new RuntimeException(e); - } finally { - readLock.unlock(); - } - } - - @Override - public void close() { - writeLock.lock(); - try { - if (closed) { - return; - } - closed = true; - JniWrapper.get().closeScanner(scannerId); - } finally { - writeLock.unlock(); - } - } - - /** {@link ArrowReader} implementation for NativeDataset. */ - public class NativeReader extends ArrowReader { - - private NativeReader(BufferAllocator allocator) { - super(allocator); - } - - @Override - protected void loadRecordBatch(ArrowRecordBatch batch) { - throw new UnsupportedOperationException(); - } - - @Override - protected void loadDictionary(ArrowDictionaryBatch dictionaryBatch) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean loadNextBatch() throws IOException { - readLock.lock(); - try { - if (closed) { - throw new NativeInstanceReleasedException(); - } - try (ArrowArray arrowArray = ArrowArray.allocateNew(context.getAllocator())) { - if (!JniWrapper.get().nextRecordBatch(scannerId, arrowArray.memoryAddress())) { - return false; - } - final VectorSchemaRoot vsr = getVectorSchemaRoot(); - Data.importIntoVectorSchemaRoot(context.getAllocator(), arrowArray, vsr, this); - } - } finally { - readLock.unlock(); - } - return true; - } - - @Override - public long bytesRead() { - return 0L; - } - - @Override - protected void closeReadSource() throws IOException { - // no-op - } - - @Override - protected Schema readSchema() throws IOException { - return schema(); - } - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java deleted file mode 100644 index 900d5fd895ec4..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -/** Listener of buffer memory reservation. Used by native datasets. */ -public interface ReservationListener { - - /** - * Reserve bytes. - * - * @throws RuntimeException if request size cannot be granted - */ - void reserve(long size); - - /** Unreserve bytes. */ - void unreserve(long size); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ArrowScannerReader.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ArrowScannerReader.java deleted file mode 100644 index acbbe889e5f85..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ArrowScannerReader.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.scanner; - -import java.io.IOException; -import java.util.Iterator; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Schema; - -/** An implementation of {@link ArrowReader} that reads the dataset from {@link Scanner}. */ -public class ArrowScannerReader extends ArrowReader { - private final Scanner scanner; - - private Iterator taskIterator; - - private ScanTask currentTask = null; - private ArrowReader currentReader = null; - - /** - * Constructs a scanner reader using a Scanner. - * - * @param scanner scanning data over dataset - * @param allocator to allocate new buffers - */ - public ArrowScannerReader(Scanner scanner, BufferAllocator allocator) { - super(allocator); - this.scanner = scanner; - this.taskIterator = scanner.scan().iterator(); - if (taskIterator.hasNext()) { - currentTask = taskIterator.next(); - currentReader = currentTask.execute(); - } - } - - @Override - protected void loadRecordBatch(ArrowRecordBatch batch) { - throw new UnsupportedOperationException(); - } - - @Override - protected void loadDictionary(ArrowDictionaryBatch dictionaryBatch) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean loadNextBatch() throws IOException { - if (currentReader == null) { - return false; - } - boolean result = currentReader.loadNextBatch(); - - if (!result) { - try { - currentTask.close(); - currentReader.close(); - } catch (Exception e) { - throw new IOException(e); - } - - while (!result) { - if (!taskIterator.hasNext()) { - return false; - } else { - currentTask = taskIterator.next(); - currentReader = currentTask.execute(); - result = currentReader.loadNextBatch(); - } - } - } - - VectorLoader loader = new VectorLoader(this.getVectorSchemaRoot()); - VectorUnloader unloader = new VectorUnloader(currentReader.getVectorSchemaRoot()); - try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) { - loader.load(recordBatch); - } - return true; - } - - @Override - public long bytesRead() { - return 0L; - } - - @Override - protected void closeReadSource() throws IOException { - try { - currentTask.close(); - currentReader.close(); - scanner.close(); - } catch (Exception e) { - throw new IOException(e); - } - } - - @Override - protected Schema readSchema() throws IOException { - return scanner.schema(); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java deleted file mode 100644 index d48d0bd2b76b9..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.scanner; - -import org.apache.arrow.dataset.file.FileFormat; - -/** The file fragment scan options interface. It is used to transfer to JNI call. */ -public interface FragmentScanOptions { - FileFormat fileFormat(); - - String[] serialize(); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java deleted file mode 100644 index 68fc3943b3edd..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.scanner; - -import java.nio.ByteBuffer; -import java.util.Optional; -import org.apache.arrow.util.Preconditions; - -/** Options used during scanning. */ -public class ScanOptions { - private final long batchSize; - private final Optional columns; - private final Optional substraitProjection; - private final Optional substraitFilter; - - private final Optional fragmentScanOptions; - - /** - * Constructor. - * - * @param columns Projected columns. Empty for scanning all columns. - * @param batchSize Maximum row number of each returned {@link - * org.apache.arrow.vector.ipc.message.ArrowRecordBatch} - * @deprecated Deprecated. Use {@link #ScanOptions(long, Optional)} instead. - */ - @Deprecated - public ScanOptions(String[] columns, long batchSize) { - this( - batchSize, - Optional.of(columns) - .map( - present -> { - if (present.length == 0) { - // Backwards compatibility: See ARROW-13257, in the new constructor, we now use - // null to scan for all columns. - return null; - } - return present; - })); - } - - /** - * Constructor. - * - * @param batchSize Maximum row number of each returned {@link - * org.apache.arrow.vector.ipc.message.ArrowRecordBatch} - * @param columns (Optional) Projected columns. {@link Optional#empty()} for scanning all columns. - * Otherwise, Only columns present in the Array will be scanned. - */ - public ScanOptions(long batchSize, Optional columns) { - Preconditions.checkNotNull(columns); - this.batchSize = batchSize; - this.columns = columns; - this.substraitProjection = Optional.empty(); - this.substraitFilter = Optional.empty(); - this.fragmentScanOptions = Optional.empty(); - } - - public ScanOptions(long batchSize) { - this(batchSize, Optional.empty()); - } - - public Optional getColumns() { - return columns; - } - - public long getBatchSize() { - return batchSize; - } - - public Optional getSubstraitProjection() { - return substraitProjection; - } - - public Optional getSubstraitFilter() { - return substraitFilter; - } - - public Optional getFragmentScanOptions() { - return fragmentScanOptions; - } - - /** Builder for Options used during scanning. */ - public static class Builder { - private final long batchSize; - private Optional columns; - private ByteBuffer substraitProjection; - private ByteBuffer substraitFilter; - private FragmentScanOptions fragmentScanOptions; - - /** - * Constructor. - * - * @param batchSize Maximum row number of each returned {@link - * org.apache.arrow.vector.ipc.message.ArrowRecordBatch} - */ - public Builder(long batchSize) { - this.batchSize = batchSize; - } - - /** - * Set the Projected columns. Empty for scanning all columns. - * - * @param columns Projected columns. Empty for scanning all columns. - * @return the ScanOptions configured. - */ - public Builder columns(Optional columns) { - Preconditions.checkNotNull(columns); - this.columns = columns; - return this; - } - - /** - * Set the Substrait extended expression for Projection new columns. - * - * @param substraitProjection Expressions to evaluate for project new columns. - * @return the ScanOptions configured. - */ - public Builder substraitProjection(ByteBuffer substraitProjection) { - Preconditions.checkNotNull(substraitProjection); - this.substraitProjection = substraitProjection; - return this; - } - - /** - * Set the Substrait extended expression for Filter. - * - * @param substraitFilter Expressions to evaluate for apply Filter. - * @return the ScanOptions configured. - */ - public Builder substraitFilter(ByteBuffer substraitFilter) { - Preconditions.checkNotNull(substraitFilter); - this.substraitFilter = substraitFilter; - return this; - } - - /** - * Set the FragmentScanOptions. - * - * @param fragmentScanOptions fragment scan options - * @return the ScanOptions configured. - */ - public Builder fragmentScanOptions(FragmentScanOptions fragmentScanOptions) { - Preconditions.checkNotNull(fragmentScanOptions); - this.fragmentScanOptions = fragmentScanOptions; - return this; - } - - public ScanOptions build() { - return new ScanOptions(this); - } - } - - private ScanOptions(Builder builder) { - batchSize = builder.batchSize; - columns = builder.columns; - substraitProjection = Optional.ofNullable(builder.substraitProjection); - substraitFilter = Optional.ofNullable(builder.substraitFilter); - fragmentScanOptions = Optional.ofNullable(builder.fragmentScanOptions); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanTask.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanTask.java deleted file mode 100644 index ad19b451a067f..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanTask.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.scanner; - -import java.io.Reader; -import org.apache.arrow.vector.ipc.ArrowReader; - -/** - * Read record batches from a range of a single data fragment. A ScanTask is meant to be a unit of - * work to be dispatched. The implementation must be thread and concurrent safe. - */ -@Deprecated -public interface ScanTask extends AutoCloseable { - - /** Execute this ScanTask and return a {@link Reader} instance. */ - ArrowReader execute(); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/Scanner.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/Scanner.java deleted file mode 100644 index 0131db51a7451..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/Scanner.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.scanner; - -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.Schema; - -/** A high level interface for scanning data over dataset. */ -public interface Scanner extends AutoCloseable { - - /** - * Read the dataset as a stream of record batches. - * - * @return a {@link ArrowReader}. - */ - ArrowReader scanBatches(); - - /** - * Perform the scan operation. - * - * @return a iterable set of {@link ScanTask}s. Each task is considered independent and it is - * allowed to execute the tasks concurrently to gain better performance. - * @deprecated use {@link #scanBatches()} instead. - */ - @Deprecated - Iterable scan(); - - /** - * Get the schema of this Scanner. - * - * @return the schema instance - */ - Schema schema(); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java deleted file mode 100644 index 15e257896b80e..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.scanner.csv; - -import java.util.Map; -import java.util.Optional; -import org.apache.arrow.c.ArrowSchema; - -public class CsvConvertOptions { - - private final Map configs; - - private Optional cSchema = Optional.empty(); - - public CsvConvertOptions(Map configs) { - this.configs = configs; - } - - public Optional getArrowSchema() { - return cSchema; - } - - public Map getConfigs() { - return configs; - } - - public void set(String key, String value) { - configs.put(key, value); - } - - public void setArrowSchema(ArrowSchema cSchema) { - this.cSchema = Optional.of(cSchema); - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java deleted file mode 100644 index dddc36d38714e..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.scanner.csv; - -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.arrow.dataset.file.FileFormat; -import org.apache.arrow.dataset.scanner.FragmentScanOptions; -import org.apache.arrow.dataset.utils.MapUtil; - -public class CsvFragmentScanOptions implements FragmentScanOptions { - private final CsvConvertOptions convertOptions; - private final Map readOptions; - private final Map parseOptions; - - /** - * CSV scan options, map to CPP struct CsvFragmentScanOptions. The key in config map is the field - * name of mapping cpp struct - * - *

    Currently, multi-valued options (which are std::vector values in C++) only support having a - * single value set. For example, for the null_values option, only one string can be set as the - * null value. - * - * @param convertOptions similar to CsvFragmentScanOptions#convert_options in CPP, the ArrowSchema - * represents column_types, convert data option such as null value recognition. - * @param readOptions similar to CsvFragmentScanOptions#read_options in CPP, specify how to read - * the file such as block_size - * @param parseOptions similar to CsvFragmentScanOptions#parse_options in CPP, parse file option - * such as delimiter - */ - public CsvFragmentScanOptions( - CsvConvertOptions convertOptions, - Map readOptions, - Map parseOptions) { - this.convertOptions = convertOptions; - this.readOptions = readOptions; - this.parseOptions = parseOptions; - } - - /** - * File format. - * - * @return file format. - */ - @Override - public FileFormat fileFormat() { - return FileFormat.CSV; - } - - /** - * This is an internal function to invoke by serializer. Serialize this class to string array and - * then called by JNI call. - * - * @return string array as Map JNI bridge format. - */ - @Override - public String[] serialize() { - Map options = - Stream.concat( - Stream.concat(readOptions.entrySet().stream(), parseOptions.entrySet().stream()), - convertOptions.getConfigs().entrySet().stream()) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - - if (convertOptions.getArrowSchema().isPresent()) { - options.put( - "column_types", Long.toString(convertOptions.getArrowSchema().get().memoryAddress())); - } - return MapUtil.convertMapToStringArray(options); - } - - public CsvConvertOptions getConvertOptions() { - return convertOptions; - } - - public Map getReadOptions() { - return readOptions; - } - - public Map getParseOptions() { - return parseOptions; - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/source/Dataset.java b/java/dataset/src/main/java/org/apache/arrow/dataset/source/Dataset.java deleted file mode 100644 index 44b575f7c23c8..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/source/Dataset.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.source; - -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.dataset.scanner.Scanner; - -/** A container of Fragments which are the internal iterable unit of read data. */ -public interface Dataset extends AutoCloseable { - - /** - * Create a new Scanner using the provided scan options. - * - * @param options options used during creating Scanner - * @return the Scanner instance - */ - Scanner newScan(ScanOptions options); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/source/DatasetFactory.java b/java/dataset/src/main/java/org/apache/arrow/dataset/source/DatasetFactory.java deleted file mode 100644 index 9f2fd8c4e7126..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/source/DatasetFactory.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.source; - -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * DatasetFactory provides a way to inspect a Dataset potential schema before materializing it. - * Thus, the user can peek the schema for data sources and decide on a unified schema. - */ -public interface DatasetFactory extends AutoCloseable { - - /** - * Get unified schema for the resulting Dataset. - * - * @return the schema object inspected - */ - Schema inspect(); - - /** - * Create a Dataset with auto-inferred schema. Which means, the schema of the resulting Dataset - * will be the same with calling {@link #inspect()} manually. - * - * @return the Dataset instance - */ - Dataset finish(); - - /** - * Create a Dataset with predefined schema. Schema inference will not be performed. - * - * @param schema a predefined schema - * @return the Dataset instance - */ - Dataset finish(Schema schema); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/AceroSubstraitConsumer.java b/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/AceroSubstraitConsumer.java deleted file mode 100644 index a866d6958b1b7..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/AceroSubstraitConsumer.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.substrait; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.apache.arrow.c.ArrowArrayStream; -import org.apache.arrow.c.Data; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.ipc.ArrowReader; - -/** - * Class to expose Java Substrait API for end users, currently operations supported are only to - * Consume Substrait Plan in Plan format (JSON) or Binary format (ByteBuffer). - */ -public final class AceroSubstraitConsumer { - private final BufferAllocator allocator; - - public AceroSubstraitConsumer(BufferAllocator allocator) { - this.allocator = allocator; - } - - /** - * Run Substrait plan. - * - * @param plan The JSON Substrait plan. - * @return the ArrowReader to iterate for record batches. - */ - public ArrowReader runQuery(String plan) throws Exception { - return runQuery(plan, Collections.emptyMap()); - } - - /** - * Run Substrait plan. - * - * @param plan The JSON Substrait plan. - * @param namedTables A mapping of named tables referenced by the plan to an ArrowReader providing - * the data for the table. Contains the Table Name to Query as a Key and ArrowReader as a - * Value. - *

    {@code ArrowReader nationReader = scanner.scanBatches();
    -   * Map namedTables = new HashMap<>();
    -   * namedTables.put("NATION", nationReader);}
    - * - * @return the ArrowReader to iterate for record batches. - */ - public ArrowReader runQuery(String plan, Map namedTables) throws Exception { - return execute(plan, namedTables); - } - - /** - * Run Substrait plan. - * - * @param plan the binary Substrait plan. - * @return the ArrowReader to iterate for record batches. - */ - public ArrowReader runQuery(ByteBuffer plan) throws Exception { - return runQuery(plan, Collections.emptyMap()); - } - - /** - * Read binary Substrait plan, execute and return an ArrowReader to read Schema and - * ArrowRecordBatches. - * - * @param plan the binary Substrait plan. - * @param namedTables A mapping of named tables referenced by the plan to an ArrowReader providing - * the data for the table. Contains the Table Name to Query as a Key and ArrowReader as a - * Value. - *
    {@code ArrowReader nationReader = scanner.scanBatches();
    -   * Map namedTables = new HashMap<>();
    -   * namedTables.put("NATION", nationReader);}
    - * - * @return the ArrowReader to iterate for record batches. - */ - public ArrowReader runQuery(ByteBuffer plan, Map namedTables) - throws Exception { - return execute(plan, namedTables); - } - - private ArrowReader execute(String plan, Map namedTables) throws Exception { - List arrowArrayStream = new ArrayList<>(); - try (ArrowArrayStream streamOutput = ArrowArrayStream.allocateNew(this.allocator)) { - String[] mapTableToMemoryAddress = getMapTableToMemoryAddress(namedTables, arrowArrayStream); - JniWrapper.get() - .executeSerializedPlan(plan, mapTableToMemoryAddress, streamOutput.memoryAddress()); - return Data.importArrayStream(this.allocator, streamOutput); - } finally { - AutoCloseables.close(arrowArrayStream); - } - } - - private ArrowReader execute(ByteBuffer plan, Map namedTables) - throws Exception { - List arrowArrayStream = new ArrayList<>(); - try (ArrowArrayStream streamOutput = ArrowArrayStream.allocateNew(this.allocator)) { - String[] mapTableToMemoryAddress = getMapTableToMemoryAddress(namedTables, arrowArrayStream); - JniWrapper.get() - .executeSerializedPlan(plan, mapTableToMemoryAddress, streamOutput.memoryAddress()); - return Data.importArrayStream(this.allocator, streamOutput); - } finally { - AutoCloseables.close(arrowArrayStream); - } - } - - private String[] getMapTableToMemoryAddress( - Map mapTableToArrowReader, List listStreamInput) { - String[] mapTableToMemoryAddress = new String[mapTableToArrowReader.size() * 2]; - ArrowArrayStream streamInput; - int pos = 0; - for (Map.Entry entries : mapTableToArrowReader.entrySet()) { - streamInput = ArrowArrayStream.allocateNew(this.allocator); - listStreamInput.add(streamInput); - Data.exportArrayStream(this.allocator, entries.getValue(), streamInput); - mapTableToMemoryAddress[pos] = entries.getKey(); - mapTableToMemoryAddress[pos + 1] = String.valueOf(streamInput.memoryAddress()); - pos += 2; - } - return mapTableToMemoryAddress; - } -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/JniWrapper.java deleted file mode 100644 index ff50ecfb994cb..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/JniWrapper.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.substrait; - -import java.nio.ByteBuffer; -import org.apache.arrow.dataset.jni.JniLoader; - -/** - * Class that contains Native methods to call Acero C++ Substrait API. It internally depends on C++ - * function arrow::engine::ExecuteSerializedPlan. Currently supported input parameters supported - * are: - * - *
    - * - arrow::Buffer: Substrait Plan (JSON or Binary format).
    - * - arrow::engine::ConversionOptions: Mapping for arrow::engine::NamedTableProvider.
    - * 
    - */ -final class JniWrapper { - private static final JniWrapper INSTANCE = new JniWrapper(); - - private JniWrapper() {} - - public static JniWrapper get() { - JniLoader.get().ensureLoaded(); - return INSTANCE; - } - - /** - * Consume the JSON Substrait Plan that contains Named Tables and export the RecordBatchReader - * into C-Data Interface ArrowArrayStream. - * - * @param planInput the JSON Substrait plan. - * @param mapTableToMemoryAddressInput the mapping name of Tables Name on position `i` and theirs - * Memory Address representation on `i+1` position linearly. - *
    {@code String[] mapTableToMemoryAddress = new String[2];
    -   * mapTableToMemoryAddress[0]="NATION";
    -   * mapTableToMemoryAddress[1]="140650250895360";}
    - * - * @param memoryAddressOutput the memory address where RecordBatchReader is exported. - */ - public native void executeSerializedPlan( - String planInput, String[] mapTableToMemoryAddressInput, long memoryAddressOutput); - - /** - * Consume the binary Substrait Plan that contains Named Tables and export the RecordBatchReader - * into C-Data Interface ArrowArrayStream. - * - * @param planInput the binary Substrait plan. - * @param mapTableToMemoryAddressInput the mapping name of Tables Name on position `i` and theirs - * Memory Address representation on `i+1` position linearly. - *
    {@code String[] mapTableToMemoryAddress = new String[2];
    -   * mapTableToMemoryAddress[0]="NATION";
    -   * mapTableToMemoryAddress[1]="140650250895360";}
    - * - * @param memoryAddressOutput the memory address where RecordBatchReader is exported. - */ - public native void executeSerializedPlan( - ByteBuffer planInput, String[] mapTableToMemoryAddressInput, long memoryAddressOutput); -} diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/utils/MapUtil.java b/java/dataset/src/main/java/org/apache/arrow/dataset/utils/MapUtil.java deleted file mode 100644 index 4df6cf1e0e05e..0000000000000 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/utils/MapUtil.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.utils; - -import java.util.Map; - -/** The utility class for Map. */ -public class MapUtil { - private MapUtil() {} - - /** - * Convert the map to string array as JNI bridge. - * - * @param config config map - * @return string array for serialization - */ - public static String[] convertMapToStringArray(Map config) { - if (config.isEmpty()) { - return null; - } - String[] configs = new String[config.size() * 2]; - int i = 0; - for (Map.Entry entry : config.entrySet()) { - configs[i++] = entry.getKey(); - configs[i++] = entry.getValue(); - } - return configs; - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/OrcWriteSupport.java b/java/dataset/src/test/java/org/apache/arrow/dataset/OrcWriteSupport.java deleted file mode 100644 index e4daa001c6ee8..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/OrcWriteSupport.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset; - -import java.io.IOException; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.orc.OrcFile; -import org.apache.orc.TypeDescription; -import org.apache.orc.Writer; - -public class OrcWriteSupport { - public static void writeTempFile(TypeDescription orcSchema, Path path, Integer[] values) - throws IOException { - Writer writer = - OrcFile.createWriter(path, OrcFile.writerOptions(new Configuration()).setSchema(orcSchema)); - VectorizedRowBatch batch = orcSchema.createRowBatch(); - LongColumnVector longColumnVector = (LongColumnVector) batch.cols[0]; - for (int idx = 0; idx < values.length; idx++) { - longColumnVector.vector[idx] = values[idx]; - } - batch.size = values.length; - writer.addRowBatch(batch); - writer.close(); - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java b/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java deleted file mode 100644 index ae121424b6a64..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset; - -import java.io.File; -import java.io.InputStream; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Random; -import org.apache.arrow.util.Preconditions; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericRecord; -import org.apache.parquet.avro.AvroParquetWriter; -import org.apache.parquet.hadoop.ParquetWriter; - -/** Utility class for writing Parquet files using Avro based tools. */ -public class ParquetWriteSupport implements AutoCloseable { - - private final String path; - private final String uri; - private final ParquetWriter writer; - private final Schema avroSchema; - private final List writtenRecords = new ArrayList<>(); - private final GenericRecordListBuilder recordListBuilder = new GenericRecordListBuilder(); - private final Random random = new Random(); - - public ParquetWriteSupport(String schemaName, File outputFolder) throws Exception { - avroSchema = getSchema(schemaName); - path = outputFolder.getPath() + "/" + "generated-" + random.nextLong() + ".parquet"; - uri = "file://" + path; - writer = - AvroParquetWriter.builder(new org.apache.hadoop.fs.Path(path)) - .withSchema(avroSchema) - .build(); - } - - public static Schema getSchema(String schemaName) throws Exception { - try { - // Attempt to use JDK 9 behavior of getting the module then the resource stream from the - // module. - // Note that this code is caller-sensitive. - Method getModuleMethod = Class.class.getMethod("getModule"); - Object module = getModuleMethod.invoke(ParquetWriteSupport.class); - Method getResourceAsStreamFromModule = - module.getClass().getMethod("getResourceAsStream", String.class); - try (InputStream is = - (InputStream) getResourceAsStreamFromModule.invoke(module, "/avroschema/" + schemaName)) { - return new Schema.Parser().parse(is); - } - } catch (NoSuchMethodException ex) { - // Use JDK8 behavior. - try (InputStream is = - ParquetWriteSupport.class.getResourceAsStream("/avroschema/" + schemaName)) { - return new Schema.Parser().parse(is); - } - } - } - - public static ParquetWriteSupport writeTempFile( - String schemaName, File outputFolder, Object... values) throws Exception { - try (final ParquetWriteSupport writeSupport = - new ParquetWriteSupport(schemaName, outputFolder)) { - writeSupport.writeRecords(values); - return writeSupport; - } - } - - public void writeRecords(Object... values) throws Exception { - final List valueList = getRecordListBuilder().createRecordList(values); - writeRecords(valueList); - } - - public void writeRecords(List records) throws Exception { - for (GenericRecord record : records) { - writeRecord(record); - } - } - - public void writeRecord(GenericRecord record) throws Exception { - writtenRecords.add(record); - writer.write(record); - } - - public String getOutputURI() { - return uri; - } - - public Schema getAvroSchema() { - return avroSchema; - } - - public GenericRecordListBuilder getRecordListBuilder() { - return recordListBuilder; - } - - public List getWrittenRecords() { - return Collections.unmodifiableList(writtenRecords); - } - - @Override - public void close() throws Exception { - writer.close(); - } - - public class GenericRecordListBuilder { - public final List createRecordList(Object... values) { - final int fieldCount = avroSchema.getFields().size(); - Preconditions.checkArgument( - values.length % fieldCount == 0, "arg count of values should be divide by field number"); - final List recordList = new ArrayList<>(); - for (int i = 0; i < values.length / fieldCount; i++) { - final GenericRecord record = new GenericData.Record(avroSchema); - for (int j = 0; j < fieldCount; j++) { - record.put(j, values[i * fieldCount + j]); - } - recordList.add(record); - } - return Collections.unmodifiableList(recordList); - } - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java deleted file mode 100644 index eb73663191414..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.channels.SeekableByteChannel; -import java.nio.channels.WritableByteChannel; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import org.apache.arrow.dataset.file.DatasetFileWriter; -import org.apache.arrow.dataset.file.FileFormat; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.Float16; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float2Vector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter; -import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.test.util.ArrowTestDataUtil; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class TestAllTypes extends TestDataset { - - @TempDir public Path TMP; - - private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { - // Notes: - // - IntervalMonthDayNano is not supported by Parquet. - // - Map (GH-38250) and SparseUnion are resulting in serialization errors when writing with the - // Dataset API. - // "Unhandled type for Arrow to Parquet schema conversion" errors: IntervalDay, IntervalYear, - // DenseUnion - List childFields = new ArrayList<>(); - childFields.add( - new Field( - "int-child", new FieldType(false, new ArrowType.Int(32, true), null, null), null)); - Field structField = - new Field( - "struct", new FieldType(true, ArrowType.Struct.INSTANCE, null, null), childFields); - Field[] fields = - new Field[] { - Field.nullablePrimitive("null", ArrowType.Null.INSTANCE), - Field.nullablePrimitive("bool", ArrowType.Bool.INSTANCE), - Field.nullablePrimitive("int8", new ArrowType.Int(8, true)), - Field.nullablePrimitive("int16", new ArrowType.Int(16, true)), - Field.nullablePrimitive("int32", new ArrowType.Int(32, true)), - Field.nullablePrimitive("int64", new ArrowType.Int(64, true)), - Field.nullablePrimitive("uint8", new ArrowType.Int(8, false)), - Field.nullablePrimitive("uint16", new ArrowType.Int(16, false)), - Field.nullablePrimitive("uint32", new ArrowType.Int(32, false)), - Field.nullablePrimitive("uint64", new ArrowType.Int(64, false)), - Field.nullablePrimitive( - "float16", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)), - Field.nullablePrimitive( - "float32", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - Field.nullablePrimitive( - "float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullablePrimitive("utf8", ArrowType.Utf8.INSTANCE), - Field.nullablePrimitive("binary", ArrowType.Binary.INSTANCE), - Field.nullablePrimitive("largeutf8", ArrowType.LargeUtf8.INSTANCE), - Field.nullablePrimitive("largebinary", ArrowType.LargeBinary.INSTANCE), - Field.nullablePrimitive("fixed_size_binary", new ArrowType.FixedSizeBinary(1)), - Field.nullablePrimitive("date_ms", new ArrowType.Date(DateUnit.MILLISECOND)), - Field.nullablePrimitive("time_ms", new ArrowType.Time(TimeUnit.MILLISECOND, 32)), - Field.nullablePrimitive( - "timestamp_ms", new ArrowType.Timestamp(TimeUnit.MILLISECOND, null)), - Field.nullablePrimitive( - "timestamptz_ms", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), - Field.nullablePrimitive("time_ns", new ArrowType.Time(TimeUnit.NANOSECOND, 64)), - Field.nullablePrimitive( - "timestamp_ns", new ArrowType.Timestamp(TimeUnit.NANOSECOND, null)), - Field.nullablePrimitive( - "timestamptz_ns", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC")), - Field.nullablePrimitive("duration", new ArrowType.Duration(TimeUnit.MILLISECOND)), - Field.nullablePrimitive("decimal128", new ArrowType.Decimal(10, 2, 128)), - Field.nullablePrimitive("decimal256", new ArrowType.Decimal(10, 2, 256)), - new Field( - "list", - FieldType.nullable(ArrowType.List.INSTANCE), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - new Field( - "largelist", - FieldType.nullable(ArrowType.LargeList.INSTANCE), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - new Field( - "fixedsizelist", - FieldType.nullable(new ArrowType.FixedSizeList(2)), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - structField - }; - VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(Arrays.asList(fields)), allocator); - root.allocateNew(); - root.setRowCount(2); - - root.getVector("null").setNull(0); - root.getVector("bool").setNull(0); - root.getVector("int8").setNull(0); - root.getVector("int16").setNull(0); - root.getVector("int32").setNull(0); - root.getVector("int64").setNull(0); - root.getVector("uint8").setNull(0); - root.getVector("uint16").setNull(0); - root.getVector("uint32").setNull(0); - root.getVector("uint64").setNull(0); - root.getVector("float16").setNull(0); - root.getVector("float32").setNull(0); - root.getVector("float64").setNull(0); - root.getVector("utf8").setNull(0); - root.getVector("binary").setNull(0); - root.getVector("largeutf8").setNull(0); - root.getVector("largebinary").setNull(0); - root.getVector("fixed_size_binary").setNull(0); - root.getVector("date_ms").setNull(0); - root.getVector("time_ms").setNull(0); - root.getVector("time_ns").setNull(0); - root.getVector("timestamp_ms").setNull(0); - root.getVector("timestamp_ns").setNull(0); - root.getVector("timestamptz_ms").setNull(0); - root.getVector("timestamptz_ns").setNull(0); - root.getVector("duration").setNull(0); - root.getVector("decimal128").setNull(0); - root.getVector("decimal256").setNull(0); - root.getVector("fixedsizelist").setNull(0); - root.getVector("list").setNull(0); - root.getVector("largelist").setNull(0); - root.getVector("struct").setNull(0); - - root.getVector("null").setNull(1); - ((BitVector) root.getVector("bool")).set(1, 1); - ((TinyIntVector) root.getVector("int8")).set(1, 1); - ((SmallIntVector) root.getVector("int16")).set(1, 1); - ((IntVector) root.getVector("int32")).set(1, 1); - ((BigIntVector) root.getVector("int64")).set(1, 1); - ((UInt1Vector) root.getVector("uint8")).set(1, 1); - ((UInt2Vector) root.getVector("uint16")).set(1, 1); - ((UInt4Vector) root.getVector("uint32")).set(1, 1); - ((UInt8Vector) root.getVector("uint64")).set(1, 1); - ((Float2Vector) root.getVector("float16")).set(1, Float16.toFloat16(+32.875f)); - ((Float4Vector) root.getVector("float32")).set(1, 1.0f); - ((Float8Vector) root.getVector("float64")).set(1, 1.0); - ((VarCharVector) root.getVector("utf8")).set(1, new Text("a")); - ((VarBinaryVector) root.getVector("binary")).set(1, new byte[] {0x01}); - ((LargeVarCharVector) root.getVector("largeutf8")).set(1, new Text("a")); - ((LargeVarBinaryVector) root.getVector("largebinary")).set(1, new byte[] {0x01}); - ((FixedSizeBinaryVector) root.getVector("fixed_size_binary")).set(1, new byte[] {0x01}); - ((DateMilliVector) root.getVector("date_ms")).set(1, 0); - ((TimeMilliVector) root.getVector("time_ms")).set(1, 0); - ((TimeNanoVector) root.getVector("time_ns")).set(1, 0); - ((TimeStampMilliVector) root.getVector("timestamp_ms")).set(1, 0); - ((TimeStampNanoVector) root.getVector("timestamp_ns")).set(1, 0); - ((TimeStampMilliTZVector) root.getVector("timestamptz_ms")).set(1, 0); - ((TimeStampNanoTZVector) root.getVector("timestamptz_ns")).set(1, 0); - ((DurationVector) root.getVector("duration")).set(1, 0); - ((DecimalVector) root.getVector("decimal128")).set(1, 0); - ((Decimal256Vector) root.getVector("decimal256")).set(1, 0); - UnionFixedSizeListWriter fixedListWriter = - ((FixedSizeListVector) root.getVector("fixedsizelist")).getWriter(); - fixedListWriter.allocate(); - fixedListWriter.setPosition(1); - fixedListWriter.startList(); - fixedListWriter.integer().writeInt(1); - fixedListWriter.endList(); - - UnionListWriter listWriter = ((ListVector) root.getVector("list")).getWriter(); - listWriter.allocate(); - listWriter.setPosition(1); - listWriter.startList(); - listWriter.integer().writeInt(1); - listWriter.endList(); - - UnionLargeListWriter largeListWriter = - ((LargeListVector) root.getVector("largelist")).getWriter(); - largeListWriter.allocate(); - largeListWriter.setPosition(1); - largeListWriter.startList(); - largeListWriter.integer().writeInt(1); - largeListWriter.endList(); - - ((StructVector) root.getVector("struct")).getChild("int-child", IntVector.class).set(1, 1); - return root; - } - - private byte[] serializeFile(VectorSchemaRoot root) { - try (ByteArrayOutputStream out = new ByteArrayOutputStream(); - WritableByteChannel channel = Channels.newChannel(out); - ArrowStreamWriter writer = new ArrowStreamWriter(root, null, channel)) { - writer.start(); - writer.writeBatch(); - writer.end(); - return out.toByteArray(); - } catch (IOException e) { - throw new IllegalArgumentException("Failed to serialize arrow file", e); - } - } - - @Test - public void testAllTypesParquet() throws Exception { - try (VectorSchemaRoot root = generateAllTypesVector(rootAllocator())) { - byte[] featherData = serializeFile(root); - try (SeekableByteChannel channel = new ByteArrayReadableSeekableByteChannel(featherData)) { - try (ArrowStreamReader reader = new ArrowStreamReader(channel, rootAllocator())) { - final Path writtenFolder = Files.createTempDirectory(TMP, "writtenFolder"); - final String writtenParquet = writtenFolder.toUri().toString(); - DatasetFileWriter.write(rootAllocator(), reader, FileFormat.PARQUET, writtenParquet); - - // Load the reference file from the test resources and write to a temporary file on the - // OS. - String referenceFile = - ArrowTestDataUtil.getTestDataRoot() - .resolve("parquet") - .resolve("alltypes-java.parquet") - .toUri() - .toString(); - assertParquetFileEquals( - referenceFile, - Objects.requireNonNull(writtenFolder.toFile().listFiles())[0].toURI().toString()); - } - } - } - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestDataset.java deleted file mode 100644 index f3ca04d77b39b..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestDataset.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Spliterator; -import java.util.Spliterators; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import java.util.stream.StreamSupport; -import org.apache.arrow.dataset.file.FileFormat; -import org.apache.arrow.dataset.file.FileSystemDatasetFactory; -import org.apache.arrow.dataset.jni.NativeMemoryPool; -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.dataset.scanner.Scanner; -import org.apache.arrow.dataset.source.Dataset; -import org.apache.arrow.dataset.source.DatasetFactory; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; - -public abstract class TestDataset { - private BufferAllocator allocator = null; - - @BeforeEach - public void setUp() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - protected BufferAllocator rootAllocator() { - return allocator; - } - - protected List collectResultFromFactory( - DatasetFactory factory, ScanOptions options) { - final Dataset dataset = factory.finish(); - final Scanner scanner = dataset.newScan(options); - try { - final List ret = collectTaskData(scanner); - AutoCloseables.close(scanner, dataset); - return ret; - } catch (RuntimeException e) { - throw e; - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - protected List collectTaskData(Scanner scan) { - try (ArrowReader reader = scan.scanBatches()) { - List batches = new ArrayList<>(); - while (reader.loadNextBatch()) { - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - final VectorUnloader unloader = new VectorUnloader(root); - batches.add(unloader.getRecordBatch()); - } - return batches; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - protected Schema inferResultSchemaFromFactory(DatasetFactory factory, ScanOptions options) { - final Dataset dataset = factory.finish(); - final Scanner scanner = dataset.newScan(options); - final Schema schema = scanner.schema(); - try { - AutoCloseables.close(scanner, dataset); - } catch (Exception e) { - throw new RuntimeException(e); - } - return schema; - } - - protected void assertParquetFileEquals(String expectedURI, String actualURI) throws Exception { - final FileSystemDatasetFactory expectedFactory = - new FileSystemDatasetFactory( - rootAllocator(), NativeMemoryPool.getDefault(), FileFormat.PARQUET, expectedURI); - final FileSystemDatasetFactory actualFactory = - new FileSystemDatasetFactory( - rootAllocator(), NativeMemoryPool.getDefault(), FileFormat.PARQUET, actualURI); - List expectedBatches = - collectResultFromFactory(expectedFactory, new ScanOptions(new String[0], 100)); - List actualBatches = - collectResultFromFactory(actualFactory, new ScanOptions(new String[0], 100)); - try (VectorSchemaRoot expectVsr = - VectorSchemaRoot.create(expectedFactory.inspect(), rootAllocator()); - VectorSchemaRoot actualVsr = - VectorSchemaRoot.create(actualFactory.inspect(), rootAllocator())) { - - // fast-fail by comparing metadata - assertEquals(expectedBatches.toString(), actualBatches.toString()); - // compare ArrowRecordBatches - assertEquals(expectedBatches.size(), actualBatches.size()); - VectorLoader expectLoader = new VectorLoader(expectVsr); - VectorLoader actualLoader = new VectorLoader(actualVsr); - for (int i = 0; i < expectedBatches.size(); i++) { - expectLoader.load(expectedBatches.get(i)); - actualLoader.load(actualBatches.get(i)); - for (int j = 0; j < expectVsr.getFieldVectors().size(); j++) { - FieldVector vector = expectVsr.getFieldVectors().get(i); - FieldVector otherVector = actualVsr.getFieldVectors().get(i); - // TODO: ARROW-18140 Use VectorSchemaRoot#equals() method to compare - assertTrue(VectorEqualsVisitor.vectorEquals(vector, otherVector)); - } - } - } finally { - AutoCloseables.close(expectedBatches, actualBatches); - } - } - - protected Stream stream(Iterable iterable) { - return StreamSupport.stream(iterable.spliterator(), false); - } - - protected List collect(Iterable iterable) { - return stream(iterable).collect(Collectors.toList()); - } - - protected Stream stream(Iterator iterator) { - return StreamSupport.stream( - Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false); - } - - protected List collect(Iterator iterator) { - return stream(iterator).collect(Collectors.toList()); - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java deleted file mode 100644 index ed6344f0f9cb7..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import com.google.common.collect.ImmutableMap; -import java.util.Arrays; -import java.util.Collections; -import java.util.Map; -import java.util.Optional; -import org.apache.arrow.c.ArrowSchema; -import org.apache.arrow.c.CDataDictionaryProvider; -import org.apache.arrow.c.Data; -import org.apache.arrow.dataset.file.FileFormat; -import org.apache.arrow.dataset.file.FileSystemDatasetFactory; -import org.apache.arrow.dataset.jni.NativeMemoryPool; -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.dataset.scanner.Scanner; -import org.apache.arrow.dataset.scanner.csv.CsvConvertOptions; -import org.apache.arrow.dataset.scanner.csv.CsvFragmentScanOptions; -import org.apache.arrow.dataset.source.Dataset; -import org.apache.arrow.dataset.source.DatasetFactory; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.hamcrest.collection.IsIterableContainingInOrder; -import org.junit.jupiter.api.Test; - -public class TestFragmentScanOptions { - - private CsvFragmentScanOptions create( - ArrowSchema cSchema, - Map convertOptionsMap, - Map readOptions, - Map parseOptions) { - CsvConvertOptions convertOptions = new CsvConvertOptions(convertOptionsMap); - convertOptions.setArrowSchema(cSchema); - return new CsvFragmentScanOptions(convertOptions, readOptions, parseOptions); - } - - @Test - public void testCsvConvertOptions() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("Id", new ArrowType.Int(32, true)), - Field.nullable("Name", new ArrowType.Utf8()), - Field.nullable("Language", new ArrowType.Utf8())), - null); - String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv"; - BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - try (ArrowSchema cSchema = ArrowSchema.allocateNew(allocator); - ArrowSchema cSchema2 = ArrowSchema.allocateNew(allocator); - CDataDictionaryProvider provider = new CDataDictionaryProvider()) { - Data.exportSchema(allocator, schema, provider, cSchema); - Data.exportSchema(allocator, schema, provider, cSchema2); - CsvFragmentScanOptions fragmentScanOptions1 = - create(cSchema, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of("delimiter", ";")); - CsvFragmentScanOptions fragmentScanOptions2 = - create(cSchema2, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of("delimiter", ";")); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .fragmentScanOptions(fragmentScanOptions1) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - allocator, - NativeMemoryPool.getDefault(), - FileFormat.CSV, - path, - Optional.of(fragmentScanOptions2)); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); - int rowCount = 0; - while (reader.loadNextBatch()) { - final ValueIterableVector idVector = - (ValueIterableVector) reader.getVectorSchemaRoot().getVector("Id"); - assertThat(idVector.getValueIterable(), IsIterableContainingInOrder.contains(1, 2, 3)); - rowCount += reader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(3, rowCount); - } - } - } - - @Test - public void testCsvConvertOptionsDelimiterNotSet() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("Id", new ArrowType.Int(32, true)), - Field.nullable("Name", new ArrowType.Utf8()), - Field.nullable("Language", new ArrowType.Utf8())), - null); - String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv"; - BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - try (ArrowSchema cSchema = ArrowSchema.allocateNew(allocator); - ArrowSchema cSchema2 = ArrowSchema.allocateNew(allocator); - CDataDictionaryProvider provider = new CDataDictionaryProvider()) { - Data.exportSchema(allocator, schema, provider, cSchema); - Data.exportSchema(allocator, schema, provider, cSchema2); - CsvFragmentScanOptions fragmentScanOptions1 = - create(cSchema, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of()); - CsvFragmentScanOptions fragmentScanOptions2 = - create(cSchema2, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of()); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .fragmentScanOptions(fragmentScanOptions1) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - allocator, - NativeMemoryPool.getDefault(), - FileFormat.CSV, - path, - Optional.of(fragmentScanOptions2)); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - int rowCount = 0; - while (reader.loadNextBatch()) { - final ValueIterableVector idVector = - (ValueIterableVector) - reader.getVectorSchemaRoot().getVector("Id;Name;Language"); - assertThat( - idVector.getValueIterable(), - IsIterableContainingInOrder.contains( - new Text("1;Juno;Java"), new Text("2;Peter;Python"), new Text("3;Celin;C++"))); - rowCount += reader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(3, rowCount); - } - } - } - - @Test - public void testCsvConvertOptionsNoOption() throws Exception { - final Schema schema = - new Schema( - Collections.singletonList(Field.nullable("Id;Name;Language", new ArrowType.Utf8())), - null); - String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv"; - BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768).columns(Optional.empty()).build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - - assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); - int rowCount = 0; - while (reader.loadNextBatch()) { - final ValueIterableVector idVector = - (ValueIterableVector) reader.getVectorSchemaRoot().getVector("Id;Name;Language"); - assertThat( - idVector.getValueIterable(), - IsIterableContainingInOrder.contains( - new Text("1;Juno;Java"), new Text("2;Peter;Python"), new Text("3;Celin;C++"))); - rowCount += reader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(3, rowCount); - } - } - - @Test - public void testCsvReadParseAndReadOptions() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("Id", new ArrowType.Int(64, true)), - Field.nullable("Name", new ArrowType.Utf8()), - Field.nullable("Language", new ArrowType.Utf8())), - null); - String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv"; - BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - CsvFragmentScanOptions fragmentScanOptions = - new CsvFragmentScanOptions( - new CsvConvertOptions(ImmutableMap.of()), - ImmutableMap.of("skip_rows_after_names", "1"), - ImmutableMap.of("delimiter", ";")); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .fragmentScanOptions(fragmentScanOptions) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - allocator, - NativeMemoryPool.getDefault(), - FileFormat.CSV, - path, - Optional.of(fragmentScanOptions)); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - - assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); - int rowCount = 0; - while (reader.loadNextBatch()) { - final ValueIterableVector idVector = - (ValueIterableVector) reader.getVectorSchemaRoot().getVector("Id"); - assertThat(idVector.getValueIterable(), IsIterableContainingInOrder.contains(2L, 3L)); - rowCount += reader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(2, rowCount); - } - } - - @Test - public void testCsvReadOtherOptions() throws Exception { - String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv"; - BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - Map convertOption = - ImmutableMap.of( - "check_utf8", - "true", - "null_values", - "NULL", - "true_values", - "True", - "false_values", - "False", - "quoted_strings_can_be_null", - "true", - "auto_dict_encode", - "false", - "auto_dict_max_cardinality", - "3456", - "decimal_point", - ".", - "include_missing_columns", - "false"); - Map readOption = - ImmutableMap.of( - "use_threads", - "true", - "block_size", - "1024", - "skip_rows", - "12", - "skip_rows_after_names", - "12", - "autogenerate_column_names", - "false"); - Map parseOption = - ImmutableMap.of( - "delimiter", - ".", - "quoting", - "true", - "quote_char", - "'", - "double_quote", - "False", - "escaping", - "true", - "escape_char", - "v", - "newlines_in_values", - "false", - "ignore_empty_lines", - "true"); - CsvFragmentScanOptions fragmentScanOptions = - new CsvFragmentScanOptions(new CsvConvertOptions(convertOption), readOption, parseOption); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .fragmentScanOptions(fragmentScanOptions) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options)) { - assertNotNull(scanner); - } - } - - @Test - public void testCsvInvalidOption() throws Exception { - String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv"; - BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - Map convertOption = ImmutableMap.of("not_exists_key_check_utf8", "true"); - CsvFragmentScanOptions fragmentScanOptions = - new CsvFragmentScanOptions( - new CsvConvertOptions(convertOption), ImmutableMap.of(), ImmutableMap.of()); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .fragmentScanOptions(fragmentScanOptions) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path); - Dataset dataset = datasetFactory.finish()) { - assertThrows(RuntimeException.class, () -> dataset.newScan(options)); - } - - CsvFragmentScanOptions fragmentScanOptionsFaultValue = - new CsvFragmentScanOptions( - new CsvConvertOptions(ImmutableMap.of()), - ImmutableMap.of("", ""), - ImmutableMap.of("escape_char", "vbvb")); - ScanOptions optionsFault = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .fragmentScanOptions(fragmentScanOptionsFaultValue) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path); - Dataset dataset = datasetFactory.finish()) { - assertThrows(RuntimeException.class, () -> dataset.newScan(optionsFault)); - } - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java deleted file mode 100644 index e3495bd81ca79..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset; - -import java.io.File; -import java.io.IOException; -import java.io.Writer; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.StandardOpenOption; -import java.util.Random; - -public class TextBasedWriteSupport { - private final URI uri; - private final Random random = new Random(); - - public TextBasedWriteSupport(File outputFolder, String fileExtension) throws URISyntaxException { - uri = - new URI( - "file", - outputFolder.getPath() - + File.separator - + "generated-" - + random.nextLong() - + fileExtension, - null); - } - - public static TextBasedWriteSupport writeTempFile( - File outputFolder, String fileExtension, String... values) - throws URISyntaxException, IOException { - TextBasedWriteSupport writer = new TextBasedWriteSupport(outputFolder, fileExtension); - try (Writer addValues = - Files.newBufferedWriter( - new File(writer.uri).toPath(), - StandardCharsets.UTF_8, - StandardOpenOption.CREATE, - StandardOpenOption.APPEND)) { - for (Object value : values) { - addValues.write(value + "\n"); - } - } - return writer; - } - - public String getOutputURI() { - return uri.toString(); - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestDatasetFileWriter.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestDatasetFileWriter.java deleted file mode 100644 index ba99392a6c4b7..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestDatasetFileWriter.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.file; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.File; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; -import org.apache.arrow.dataset.ParquetWriteSupport; -import org.apache.arrow.dataset.TestDataset; -import org.apache.arrow.dataset.jni.NativeMemoryPool; -import org.apache.arrow.dataset.scanner.ArrowScannerReader; -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.dataset.scanner.Scanner; -import org.apache.arrow.dataset.source.Dataset; -import org.apache.commons.io.FileUtils; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class TestDatasetFileWriter extends TestDataset { - - @TempDir public File TMP; - - public static final String AVRO_SCHEMA_USER = "user.avsc"; - - @Test - public void testParquetWriteSimple() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a", 2, "b", 3, "c", 2, "d"); - String sampleParquet = writeSupport.getOutputURI(); - ScanOptions options = new ScanOptions(new String[0], 100); - final File writtenFolder = new File(TMP, "writtenFolder"); - writtenFolder.mkdirs(); - final String writtenParquet = writtenFolder.toURI().toString(); - try (FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), NativeMemoryPool.getDefault(), FileFormat.PARQUET, sampleParquet); - final Dataset dataset = factory.finish(); - final Scanner scanner = dataset.newScan(options); - final ArrowScannerReader reader = new ArrowScannerReader(scanner, rootAllocator()); ) { - DatasetFileWriter.write(rootAllocator(), reader, FileFormat.PARQUET, writtenParquet); - assertParquetFileEquals( - sampleParquet, Objects.requireNonNull(writtenFolder.listFiles())[0].toURI().toString()); - } - } - - @Test - public void testParquetWriteWithPartitions() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a", 2, "b", 3, "c", 2, "d"); - String sampleParquet = writeSupport.getOutputURI(); - ScanOptions options = new ScanOptions(new String[0], 100); - final File writtenFolder = new File(TMP, "writtenFolder"); - writtenFolder.mkdirs(); - final String writtenParquet = writtenFolder.toURI().toString(); - - try (FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), NativeMemoryPool.getDefault(), FileFormat.PARQUET, sampleParquet); - final Dataset dataset = factory.finish(); - final Scanner scanner = dataset.newScan(options); - final ArrowScannerReader reader = new ArrowScannerReader(scanner, rootAllocator()); ) { - DatasetFileWriter.write( - rootAllocator(), - reader, - FileFormat.PARQUET, - writtenParquet, - new String[] {"id", "name"}, - 100, - "data_{i}"); - final Set expectedOutputFiles = - new HashSet<>( - Arrays.asList( - "id=1/name=a/data_0", - "id=2/name=b/data_0", - "id=3/name=c/data_0", - "id=2/name=d/data_0")); - final Set outputFiles = - FileUtils.listFiles(writtenFolder, null, true).stream() - .map( - file -> { - return writtenFolder.toURI().relativize(file.toURI()).toString(); - }) - .collect(Collectors.toSet()); - assertEquals(expectedOutputFiles, outputFiles); - } - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java deleted file mode 100644 index 89ce208e8c6f6..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java +++ /dev/null @@ -1,598 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.file; - -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.primitives.Primitives; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.stream.Collectors; -import org.apache.arrow.dataset.OrcWriteSupport; -import org.apache.arrow.dataset.ParquetWriteSupport; -import org.apache.arrow.dataset.TextBasedWriteSupport; -import org.apache.arrow.dataset.jni.NativeDataset; -import org.apache.arrow.dataset.jni.NativeInstanceReleasedException; -import org.apache.arrow.dataset.jni.NativeMemoryPool; -import org.apache.arrow.dataset.jni.NativeScanner; -import org.apache.arrow.dataset.jni.TestNativeDataset; -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.generic.GenericRecordBuilder; -import org.apache.hadoop.fs.Path; -import org.apache.orc.TypeDescription; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class TestFileSystemDataset extends TestNativeDataset { - - @TempDir public File TMP; - - public static final String AVRO_SCHEMA_USER = "user.avsc"; - - @Test - public void testBaseParquetRead() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - ScanOptions options = new ScanOptions(100); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(1, datum.size()); - assertEquals(2, schema.getFields().size()); - assertEquals("id", schema.getFields().get(0).getName()); - assertEquals("name", schema.getFields().get(1).getName()); - assertEquals(Types.MinorType.INT.getType(), schema.getFields().get(0).getType()); - assertEquals(Types.MinorType.VARCHAR.getType(), schema.getFields().get(1).getType()); - checkParquetReadResult(schema, writeSupport.getWrittenRecords(), datum); - - AutoCloseables.close(datum); - AutoCloseables.close(factory); - } - - @Test - public void testMultipleParquetReadFromUris() throws Exception { - ParquetWriteSupport writeSupport1 = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - ParquetWriteSupport writeSupport2 = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 2, "b"); - String expectedJsonUnordered = "[[1,\"a\"],[2,\"b\"]]"; - - ScanOptions options = new ScanOptions(1); - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - new String[] {writeSupport1.getOutputURI(), writeSupport2.getOutputURI()}); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(2, datum.size()); - datum.forEach(batch -> assertEquals(1, batch.getLength())); - checkParquetReadResult(schema, expectedJsonUnordered, datum); - - AutoCloseables.close(datum); - AutoCloseables.close(factory); - } - - @Test - public void testMultipleParquetInvalidUri() throws Exception { - RuntimeException exc = - assertThrows( - RuntimeException.class, - () -> - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - new String[] {"https://example.com", "file:///test/location"})); - assertEquals("Unrecognized filesystem type in URI: https://example.com", exc.getMessage()); - } - - @Test - public void testMultipleParquetMultipleFilesystemTypes() throws Exception { - RuntimeException exc = - assertThrows( - RuntimeException.class, - () -> - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - new String[] {"file:///test/location", "s3:///test/bucket/file"})); - assertTrue( - exc.getMessage() - .startsWith( - "The filesystem expected a URI with one of the schemes (file) but received s3")); - } - - @Test - public void testParquetProjectSingleColumn() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - ScanOptions options = new ScanOptions(100, Optional.of(new String[] {"id"})); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - org.apache.avro.Schema expectedSchema = truncateAvroSchema(writeSupport.getAvroSchema(), 0, 1); - - assertScanBatchesProduced(factory, options); - assertEquals(1, schema.getFields().size()); - assertEquals("id", schema.getFields().get(0).getName()); - assertEquals(Types.MinorType.INT.getType(), schema.getFields().get(0).getType()); - assertEquals(1, datum.size()); - checkParquetReadResult( - schema, - Collections.singletonList(new GenericRecordBuilder(expectedSchema).set("id", 1).build()), - datum); - - AutoCloseables.close(datum); - AutoCloseables.close(factory); - } - - @Test - public void testParquetBatchSize() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a", 2, "b", 3, "c"); - - ScanOptions options = new ScanOptions(1); - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(3, datum.size()); - datum.forEach(batch -> assertEquals(1, batch.getLength())); - checkParquetReadResult(schema, writeSupport.getWrittenRecords(), datum); - - AutoCloseables.close(datum); - AutoCloseables.close(factory); - } - - @Test - public void testParquetDirectoryRead() throws Exception { - final File outputFolder = TMP; - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, outputFolder, 1, "a", 2, "b", 3, "c"); - ParquetWriteSupport.writeTempFile( - AVRO_SCHEMA_USER, outputFolder, 4, "e", 5, "f", 6, "g", 7, "h"); - String expectedJsonUnordered = - "[[1,\"a\"],[2,\"b\"],[3,\"c\"],[4,\"e\"],[5,\"f\"],[6,\"g\"],[7,\"h\"]]"; - - ScanOptions options = new ScanOptions(new String[0], 1); - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - outputFolder.toURI().toString()); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(7, datum.size()); - datum.forEach(batch -> assertEquals(1, batch.getLength())); - checkParquetReadResult(schema, expectedJsonUnordered, datum); - - AutoCloseables.close(datum); - } - - @Test - public void testEmptyProjectSelectsZeroColumns() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - ScanOptions options = new ScanOptions(100, Optional.of(new String[0])); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - org.apache.avro.Schema expectedSchema = - org.apache.avro.Schema.createRecord(Collections.emptyList()); - - assertScanBatchesProduced(factory, options); - assertEquals(0, schema.getFields().size()); - assertEquals(1, datum.size()); - checkParquetReadResult( - schema, Collections.singletonList(new GenericRecordBuilder(expectedSchema).build()), datum); - - AutoCloseables.close(datum); - } - - @Test - public void testNullProjectSelectsAllColumns() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - ScanOptions options = new ScanOptions(100, Optional.empty()); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(1, datum.size()); - assertEquals(2, schema.getFields().size()); - assertEquals("id", schema.getFields().get(0).getName()); - assertEquals("name", schema.getFields().get(1).getName()); - assertEquals(Types.MinorType.INT.getType(), schema.getFields().get(0).getType()); - assertEquals(Types.MinorType.VARCHAR.getType(), schema.getFields().get(1).getType()); - checkParquetReadResult(schema, writeSupport.getWrittenRecords(), datum); - - AutoCloseables.close(datum); - } - - @Test - public void testNoErrorWhenCloseAgain() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - - assertDoesNotThrow( - () -> { - NativeDataset dataset = factory.finish(); - dataset.close(); - dataset.close(); - }); - - AutoCloseables.close(factory); - } - - @Test - public void testErrorThrownWhenScanBatchesAgain() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - NativeDataset dataset = factory.finish(); - ScanOptions options = new ScanOptions(100); - NativeScanner scanner = dataset.newScan(options); - List datum = collectTaskData(scanner); - AutoCloseables.close(datum); - UnsupportedOperationException uoe = - assertThrows(UnsupportedOperationException.class, scanner::scanBatches); - assertEquals( - "NativeScanner can only be executed once. Create a new scanner instead", uoe.getMessage()); - - AutoCloseables.close(scanner, dataset, factory); - } - - @Test - public void testScanBatchesInOtherThread() throws Exception { - ExecutorService executor = Executors.newSingleThreadExecutor(); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - NativeDataset dataset = factory.finish(); - ScanOptions options = new ScanOptions(100); - NativeScanner scanner = dataset.newScan(options); - List datum = executor.submit(() -> collectTaskData(scanner)).get(); - - AutoCloseables.close(datum); - AutoCloseables.close(scanner, dataset, factory); - } - - @Test - public void testErrorThrownWhenScanBatchesAfterScannerClose() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - NativeDataset dataset = factory.finish(); - ScanOptions options = new ScanOptions(100); - NativeScanner scanner = dataset.newScan(options); - scanner.close(); - assertThrows(NativeInstanceReleasedException.class, scanner::scanBatches); - - AutoCloseables.close(factory); - } - - @Test - public void testErrorThrownWhenReadAfterNativeReaderClose() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - NativeDataset dataset = factory.finish(); - ScanOptions options = new ScanOptions(100); - NativeScanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches(); - scanner.close(); - assertThrows(NativeInstanceReleasedException.class, reader::loadNextBatch); - - AutoCloseables.close(factory); - } - - @Test - public void testBaseArrowIpcRead() throws Exception { - File dataFile = new File(TMP, "datafile"); - Schema sourceSchema = - new Schema(Collections.singletonList(Field.nullable("ints", new ArrowType.Int(32, true)))); - try (VectorSchemaRoot root = VectorSchemaRoot.create(sourceSchema, rootAllocator()); - FileOutputStream sink = new FileOutputStream(dataFile); - ArrowFileWriter writer = - new ArrowFileWriter(root, /* provider= */ null, sink.getChannel())) { - IntVector ints = (IntVector) root.getVector(0); - ints.setSafe(0, 0); - ints.setSafe(1, 1024); - ints.setSafe(2, Integer.MAX_VALUE); - root.setRowCount(3); - writer.start(); - writer.writeBatch(); - writer.end(); - } - - String arrowDataURI = dataFile.toURI().toString(); - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), NativeMemoryPool.getDefault(), FileFormat.ARROW_IPC, arrowDataURI); - ScanOptions options = new ScanOptions(100); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(1, datum.size()); - assertEquals(1, schema.getFields().size()); - assertEquals("ints", schema.getFields().get(0).getName()); - - String expectedJsonUnordered = String.format("[[0],[1024],[%d]]", Integer.MAX_VALUE); - checkParquetReadResult(schema, expectedJsonUnordered, datum); - - AutoCloseables.close(datum); - AutoCloseables.close(factory); - } - - @Test - public void testBaseOrcRead() throws Exception { - String dataName = "test-orc"; - String basePath = TMP.getAbsolutePath(); - - TypeDescription orcSchema = TypeDescription.fromString("struct"); - Path path = new Path(basePath, dataName); - OrcWriteSupport.writeTempFile( - orcSchema, path, new Integer[] {Integer.MIN_VALUE, Integer.MAX_VALUE}); - - String orcDatasetUri = new File(basePath, dataName).toURI().toString(); - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), NativeMemoryPool.getDefault(), FileFormat.ORC, orcDatasetUri); - ScanOptions options = new ScanOptions(100); - Schema schema = inferResultSchemaFromFactory(factory, options); - List datum = collectResultFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(1, datum.size()); - assertEquals(1, schema.getFields().size()); - assertEquals("ints", schema.getFields().get(0).getName()); - - String expectedJsonUnordered = "[[2147483647], [-2147483648]]"; - checkParquetReadResult(schema, expectedJsonUnordered, datum); - - AutoCloseables.close(datum); - AutoCloseables.close(factory); - } - - @Test - public void testBaseCsvRead() throws Exception { - TextBasedWriteSupport writeSupport = - TextBasedWriteSupport.writeTempFile( - TMP, ".csv", "Name,Language", "Juno,Java", "Peter,Python", "Celin,C++"); - String expectedJsonUnordered = - "[[\"Juno\", \"Java\"], [\"Peter\", \"Python\"], [\"Celin\", \"C++\"]]"; - ScanOptions options = new ScanOptions(100); - try (FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.CSV, - writeSupport.getOutputURI())) { - List datum = collectResultFromFactory(factory, options); - Schema schema = inferResultSchemaFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(1, datum.size()); - assertEquals(2, schema.getFields().size()); - assertEquals("Name", schema.getFields().get(0).getName()); - - checkParquetReadResult(schema, expectedJsonUnordered, datum); - - AutoCloseables.close(datum); - } - } - - @Test - public void testBaseJsonRead() throws Exception { - TextBasedWriteSupport writeSupport = - TextBasedWriteSupport.writeTempFile( - TMP, - ".json", - "{\"Type\": \"Compiled\", \"Language\": \"Java\"}", - "{\"Type\": \"Interpreted\", \"Language\": \"Python\"}"); - String expectedJsonUnordered = "[[\"Compiled\", \"Java\"], " + "[\"Interpreted\", \"Python\"]]"; - ScanOptions options = new ScanOptions(100); - try (FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.JSON, - writeSupport.getOutputURI())) { - List datum = collectResultFromFactory(factory, options); - Schema schema = inferResultSchemaFromFactory(factory, options); - - assertScanBatchesProduced(factory, options); - assertEquals(1, datum.size()); - assertEquals(2, schema.getFields().size()); - assertEquals("Type", schema.getFields().get(0).getName()); - assertEquals("Language", schema.getFields().get(1).getName()); - - checkParquetReadResult(schema, expectedJsonUnordered, datum); - - AutoCloseables.close(datum); - } - } - - private void checkParquetReadResult( - Schema schema, String expectedJson, List actual) throws IOException { - final ObjectMapper json = new ObjectMapper(); - final Set expectedSet = json.readValue(expectedJson, Set.class); - final Set> actualSet = new HashSet<>(); - final int fieldCount = schema.getFields().size(); - try (VectorSchemaRoot vsr = VectorSchemaRoot.create(schema, rootAllocator())) { - VectorLoader loader = new VectorLoader(vsr); - for (ArrowRecordBatch batch : actual) { - loader.load(batch); - int batchRowCount = vsr.getRowCount(); - for (int i = 0; i < batchRowCount; i++) { - List row = new ArrayList<>(); - for (int j = 0; j < fieldCount; j++) { - Object object = vsr.getVector(j).getObject(i); - if (Primitives.isWrapperType(object.getClass())) { - row.add(object); - } else { - row.add(object.toString()); - } - } - actualSet.add(row); - } - } - } - assertEquals( - expectedSet, - actualSet, - "Mismatched data read from Parquet, actual: " + json.writeValueAsString(actualSet) + ";"); - } - - private void checkParquetReadResult( - Schema schema, List expected, List actual) { - assertEquals(expected.size(), actual.stream().mapToInt(ArrowRecordBatch::getLength).sum()); - final int fieldCount = schema.getFields().size(); - ArrayList expectedRemovable = new ArrayList<>(expected); - try (VectorSchemaRoot vsr = VectorSchemaRoot.create(schema, rootAllocator())) { - VectorLoader loader = new VectorLoader(vsr); - for (ArrowRecordBatch batch : actual) { - assertEquals(fieldCount, batch.getNodes().size()); - loader.load(batch); - int batchRowCount = vsr.getRowCount(); - for (int i = 0; i < fieldCount; i++) { - FieldVector vector = vsr.getVector(i); - for (int j = 0; j < batchRowCount; j++) { - Object object = vector.getObject(j); - Object expectedObject = expectedRemovable.get(j).get(i); - assertEquals(Objects.toString(expectedObject), Objects.toString(object)); - } - } - for (int i = 0; i < batchRowCount; i++) { - expectedRemovable.remove(0); - } - } - assertTrue(expectedRemovable.isEmpty()); - } - } - - private org.apache.avro.Schema truncateAvroSchema( - org.apache.avro.Schema schema, int from, int to) { - List fields = schema.getFields().subList(from, to); - return org.apache.avro.Schema.createRecord( - fields.stream() - .map( - f -> - new org.apache.avro.Schema.Field( - f.name(), f.schema(), f.doc(), f.defaultVal(), f.order())) - .collect(Collectors.toList())); - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDatasetFactory.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDatasetFactory.java deleted file mode 100644 index 69d80c334169e..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDatasetFactory.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.file; - -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import org.apache.arrow.dataset.jni.NativeMemoryPool; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -public class TestFileSystemDatasetFactory { - - @Test - public void testErrorHandling() { - RuntimeException e = - assertThrows( - RuntimeException.class, - () -> { - new FileSystemDatasetFactory( - new RootAllocator(Long.MAX_VALUE), - NativeMemoryPool.getDefault(), - FileFormat.NONE, - "file:///NON_EXIST_FILE"); - }); - assertEquals("illegal file format id: -1", e.getMessage()); - } - - @Test - public void testCloseAgain() { - assertDoesNotThrow( - () -> { - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - new RootAllocator(Long.MAX_VALUE), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - "file:///NON_EXIST_FILE"); - factory.close(); - factory.close(); - }); - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestNativeDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestNativeDataset.java deleted file mode 100644 index 3a262c5b8580b..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestNativeDataset.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import static org.junit.jupiter.api.Assertions.assertNotNull; - -import org.apache.arrow.dataset.TestDataset; -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.dataset.scanner.Scanner; -import org.apache.arrow.dataset.source.Dataset; -import org.apache.arrow.dataset.source.DatasetFactory; - -public abstract class TestNativeDataset extends TestDataset { - protected void assertScanBatchesProduced(DatasetFactory factory, ScanOptions options) { - final Dataset dataset = factory.finish(); - final Scanner scanner = dataset.newScan(options); - assertNotNull(scanner.scanBatches()); - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java b/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java deleted file mode 100644 index 9fabc4a257fb3..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.jni; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.util.List; -import java.util.concurrent.atomic.AtomicLong; -import org.apache.arrow.dataset.ParquetWriteSupport; -import org.apache.arrow.dataset.TestDataset; -import org.apache.arrow.dataset.file.FileFormat; -import org.apache.arrow.dataset.file.FileSystemDatasetFactory; -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class TestReservationListener extends TestDataset { - - @TempDir public File TMP; - - public static final String AVRO_SCHEMA_USER = "user.avsc"; - - @Test - public void testDirectReservationListener() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - NativeMemoryPool pool = NativeMemoryPool.createListenable(DirectReservationListener.instance()); - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), pool, FileFormat.PARQUET, writeSupport.getOutputURI()); - ScanOptions options = new ScanOptions(100); - long initReservation = DirectReservationListener.instance().getCurrentDirectMemReservation(); - List datum = collectResultFromFactory(factory, options); - long reservation = DirectReservationListener.instance().getCurrentDirectMemReservation(); - AutoCloseables.close(datum); - AutoCloseables.close(pool); - long finalReservation = DirectReservationListener.instance().getCurrentDirectMemReservation(); - assertTrue(reservation >= initReservation); - assertEquals(initReservation, finalReservation); - } - - @Test - public void testCustomReservationListener() throws Exception { - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - final AtomicLong reserved = new AtomicLong(0L); - ReservationListener listener = - new ReservationListener() { - @Override - public void reserve(long size) { - reserved.getAndAdd(size); - } - - @Override - public void unreserve(long size) { - reserved.getAndAdd(-size); - } - }; - NativeMemoryPool pool = NativeMemoryPool.createListenable(listener); - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), pool, FileFormat.PARQUET, writeSupport.getOutputURI()); - ScanOptions options = new ScanOptions(100); - long initReservation = reserved.get(); - List datum = collectResultFromFactory(factory, options); - long reservation = reserved.get(); - AutoCloseables.close(datum); - AutoCloseables.close(pool); - long finalReservation = reserved.get(); - assertTrue(reservation >= initReservation); - assertEquals(initReservation, finalReservation); - } - - @Test - @SuppressWarnings("UnnecessaryAsync") - public void testErrorThrownFromReservationListener() throws Exception { - final String errorMessage = "ERROR_MESSAGE"; - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a"); - final AtomicLong reserved = new AtomicLong(0L); - ReservationListener listener = - new ReservationListener() { - @Override - public void reserve(long size) { - throw new IllegalArgumentException(errorMessage); - } - - @Override - public void unreserve(long size) { - // no-op - } - }; - NativeMemoryPool pool = NativeMemoryPool.createListenable(listener); - FileSystemDatasetFactory factory = - new FileSystemDatasetFactory( - rootAllocator(), pool, FileFormat.PARQUET, writeSupport.getOutputURI()); - ScanOptions options = new ScanOptions(100); - long initReservation = reserved.get(); - assertThrows( - IllegalArgumentException.class, - () -> { - collectResultFromFactory(factory, options); - }, - errorMessage); - long reservation = reserved.get(); - AutoCloseables.close(pool); - long finalReservation = reserved.get(); - assertEquals(initReservation, reservation); - assertEquals(initReservation, finalReservation); - } -} diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java deleted file mode 100644 index eec6570a639f2..0000000000000 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java +++ /dev/null @@ -1,640 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.dataset.substrait; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.Arrays; -import java.util.Base64; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import org.apache.arrow.dataset.ParquetWriteSupport; -import org.apache.arrow.dataset.TestDataset; -import org.apache.arrow.dataset.file.FileFormat; -import org.apache.arrow.dataset.file.FileSystemDatasetFactory; -import org.apache.arrow.dataset.jni.NativeMemoryPool; -import org.apache.arrow.dataset.scanner.ScanOptions; -import org.apache.arrow.dataset.scanner.Scanner; -import org.apache.arrow.dataset.source.Dataset; -import org.apache.arrow.dataset.source.DatasetFactory; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ipc.ArrowReader; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.hamcrest.collection.IsIterableContainingInOrder; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class TestAceroSubstraitConsumer extends TestDataset { - - @TempDir public File TMP; - - public static final String AVRO_SCHEMA_USER = "user.avsc"; - - @Test - public void testRunQueryLocalFiles() throws Exception { - // Query: - // SELECT id, name FROM Users - // Isthmus: - // ./isthmus-macOS-0.7.0 -c "CREATE TABLE USERS ( id INT NOT NULL, name VARCHAR(150));" "SELECT - // id, name FROM Users" - // VARCHAR(150) -> is mapping to -> {ARROW:extension:name=varchar, - // ARROW:extension:metadata=varchar{length:150}} - Map metadataName = new HashMap<>(); - metadataName.put("ARROW:extension:name", "varchar"); - metadataName.put("ARROW:extension:metadata", "varchar{length:150}"); - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ID", new ArrowType.Int(32, true)), - new Field( - "NAME", new FieldType(true, new ArrowType.Utf8(), null, metadataName), null)), - Collections.emptyMap()); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a", 11, "b", 21, "c"); - try (ArrowReader arrowReader = - new AceroSubstraitConsumer(rootAllocator()) - .runQuery( - new String( - Files.readAllBytes( - Paths.get( - TestAceroSubstraitConsumer.class - .getClassLoader() - .getResource("substrait/local_files_users.json") - .toURI())), - StandardCharsets.UTF_8) - .replace("FILENAME_PLACEHOLDER", writeSupport.getOutputURI()))) { - assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema()); - int rowcount = 0; - while (arrowReader.loadNextBatch()) { - rowcount += arrowReader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(3, rowcount); - } - } - - @Test - public void testRunQueryNamedTable() throws Exception { - // Query: - // SELECT id, name FROM Users - // Isthmus: - // ./isthmus-macOS-0.7.0 -c "CREATE TABLE USERS ( id INT NOT NULL, name VARCHAR(150));" "SELECT - // id, name FROM Users" - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ID", new ArrowType.Int(32, true)), - Field.nullable("NAME", new ArrowType.Utf8())), - Collections.emptyMap()); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a", 11, "b", 21, "c"); - ScanOptions options = new ScanOptions(/*batchSize*/ 32768); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - Map mapTableToArrowReader = new HashMap<>(); - mapTableToArrowReader.put("USERS", reader); - try (ArrowReader arrowReader = - new AceroSubstraitConsumer(rootAllocator()) - .runQuery( - new String( - Files.readAllBytes( - Paths.get( - TestAceroSubstraitConsumer.class - .getClassLoader() - .getResource("substrait/named_table_users.json") - .toURI())), - StandardCharsets.UTF_8), - mapTableToArrowReader)) { - assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema()); - assertEquals(arrowReader.getVectorSchemaRoot().getSchema(), schema); - int rowcount = 0; - while (arrowReader.loadNextBatch()) { - rowcount += arrowReader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(3, rowcount); - } - } - } - - @Test - public void testRunQueryNamedTableWithException() throws Exception { - // Query: - // SELECT id, name FROM Users - // Isthmus: - // ./isthmus-macOS-0.7.0 -c "CREATE TABLE USERS ( id INT NOT NULL, name VARCHAR(150));" "SELECT - // id, name FROM Users" - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ID", new ArrowType.Int(32, true)), - Field.nullable("NAME", new ArrowType.Utf8())), - Collections.emptyMap()); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a", 11, "b", 21, "c"); - ScanOptions options = new ScanOptions(/*batchSize*/ 32768); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - Map mapTableToArrowReader = new HashMap<>(); - mapTableToArrowReader.put("USERS_INVALID_MAP", reader); - assertThrows( - RuntimeException.class, - () -> { - try (ArrowReader arrowReader = - new AceroSubstraitConsumer(rootAllocator()) - .runQuery( - new String( - Files.readAllBytes( - Paths.get( - TestAceroSubstraitConsumer.class - .getClassLoader() - .getResource("substrait/named_table_users.json") - .toURI())), - StandardCharsets.UTF_8), - mapTableToArrowReader)) { - assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema()); - int rowcount = 0; - while (arrowReader.loadNextBatch()) { - rowcount += arrowReader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(3, rowcount); - } - }); - } - } - - @Test - public void testRunBinaryQueryNamedTable() throws Exception { - // Query: - // SELECT id, name FROM Users - // Isthmus: - // ./isthmus-macOS-0.7.0 -c "CREATE TABLE USERS ( id INT NOT NULL, name VARCHAR(150));" "SELECT - // id, name FROM Users" - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("ID", new ArrowType.Int(32, true)), - Field.nullable("NAME", new ArrowType.Utf8())), - Collections.emptyMap()); - // Base64.getEncoder().encodeToString(plan.toByteArray()); - String binaryPlan = - "Gl8SXQpROk8KBhIECgICAxIvCi0KAgoAEh4KAklECgROQU1FEhIKBCoCEAEKC" - + "LIBBQiWARgBGAI6BwoFVVNFUlMaCBIGCgISACIAGgoSCAoEEgIIASIAEgJJRBIETkFNRQ=="; - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP, 1, "a", 11, "b", 21, "c"); - ScanOptions options = new ScanOptions(/*batchSize*/ 32768); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - // map table to reader - Map mapTableToArrowReader = new HashMap<>(); - mapTableToArrowReader.put("USERS", reader); - // get binary plan - ByteBuffer substraitPlan = getByteBuffer(binaryPlan); - // run query - try (ArrowReader arrowReader = - new AceroSubstraitConsumer(rootAllocator()) - .runQuery(substraitPlan, mapTableToArrowReader)) { - assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema()); - int rowcount = 0; - while (arrowReader.loadNextBatch()) { - rowcount += arrowReader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(3, rowcount); - } - } - } - - @Test - public void testRunExtendedExpressionsFilter() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("id", new ArrowType.Int(32, true)), - Field.nullable("name", new ArrowType.Utf8())), - null); - // Substrait Extended Expression: Filter: - // Expression 01: WHERE ID < 20 - String base64EncodedSubstraitFilter = - "Ch4IARIaL2Z1bmN0aW9uc19jb21wYXJpc29uLnlhbWwSEhoQCAIQAhoKbHQ6YW55X2F" - + "ueRo3ChwaGggCGgQKAhABIggaBhIECgISACIGGgQKAigUGhdmaWx0ZXJfaWRfbG93ZXJfdGhhbl8yMCIaCgJJRAoETkFNRRIOCgQqAhA" - + "BCgRiAhABGAI="; - ByteBuffer substraitExpressionFilter = getByteBuffer(base64EncodedSubstraitFilter); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile( - AVRO_SCHEMA_USER, - TMP, - 19, - "value_19", - 1, - "value_1", - 11, - "value_11", - 21, - "value_21", - 45, - "value_45"); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .substraitFilter(substraitExpressionFilter) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); - int rowcount = 0; - while (reader.loadNextBatch()) { - rowcount += reader.getVectorSchemaRoot().getRowCount(); - final ValueIterableVector idVector = - (ValueIterableVector) reader.getVectorSchemaRoot().getVector("id"); - assertThat(idVector.getValueIterable(), IsIterableContainingInOrder.contains(19, 1, 11)); - final ValueIterableVector nameVector = - (ValueIterableVector) reader.getVectorSchemaRoot().getVector("name"); - assertThat( - nameVector.getValueIterable(), - IsIterableContainingInOrder.contains( - new Text("value_19"), new Text("value_1"), new Text("value_11"))); - } - assertEquals(3, rowcount); - } - } - - @Test - public void testRunExtendedExpressionsFilterWithProjectionsInsteadOfFilterException() - throws Exception { - // Substrait Extended Expression: Project New Column: - // Expression ADD: id + 2 - // Expression CONCAT: name + '-' + name - String base64EncodedSubstraitFilter = - "Ch4IARIaL2Z1bmN0aW9uc19hcml0aG1ldGljLnlhbWwSERoPCAEaC2FkZDppM" - + "zJfaTMyEhQaEggCEAEaDGNvbmNhdDp2Y2hhchoxChoaGBoEKgIQASIIGgYSBAoCEgAiBhoECgIoAhoTYWRkX3R3b190b19jb2x1" - + "bW5fYRpGCi0aKwgBGgRiAhABIgoaCBIGCgQSAggBIgkaBwoFYgMgLSAiChoIEgYKBBICCAEaFWNvbmNhdF9jb2x1bW5fYV9hbmR" - + "fYiIaCgJJRAoETkFNRRIOCgQqAhABCgRiAhABGAI="; - ByteBuffer substraitExpressionFilter = getByteBuffer(base64EncodedSubstraitFilter); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile( - AVRO_SCHEMA_USER, - TMP, - 19, - "value_19", - 1, - "value_1", - 11, - "value_11", - 21, - "value_21", - 45, - "value_45"); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .substraitFilter(substraitExpressionFilter) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish()) { - Exception e = assertThrows(RuntimeException.class, () -> dataset.newScan(options)); - assertTrue( - e.getMessage().startsWith("There is no filter expression in the expression provided")); - } - } - - @Test - public void testRunExtendedExpressionsFilterWithEmptyFilterException() throws Exception { - String base64EncodedSubstraitFilter = ""; - ByteBuffer substraitExpressionFilter = getByteBuffer(base64EncodedSubstraitFilter); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile( - AVRO_SCHEMA_USER, - TMP, - 19, - "value_19", - 1, - "value_1", - 11, - "value_11", - 21, - "value_21", - 45, - "value_45"); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .substraitFilter(substraitExpressionFilter) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish()) { - Exception e = assertThrows(RuntimeException.class, () -> dataset.newScan(options)); - assertTrue( - e.getMessage() - .contains("no anonymous struct type was provided to which names could be attached.")); - } - } - - @Test - public void testRunExtendedExpressionsProjection() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("add_two_to_column_a", new ArrowType.Int(32, true)), - Field.nullable("concat_column_a_and_b", new ArrowType.Utf8())), - null); - // Substrait Extended Expression: Project New Column: - // Expression ADD: id + 2 - // Expression CONCAT: name + '-' + name - String binarySubstraitExpressionProject = - "Ch4IARIaL2Z1bmN0aW9uc19hcml0aG1ldGljLnlhbWwSERoPCAEaC2FkZDppM" - + "zJfaTMyEhQaEggCEAEaDGNvbmNhdDp2Y2hhchoxChoaGBoEKgIQASIIGgYSBAoCEgAiBhoECgIoAhoTYWRkX3R3b190b19jb2x1" - + "bW5fYRpGCi0aKwgBGgRiAhABIgoaCBIGCgQSAggBIgkaBwoFYgMgLSAiChoIEgYKBBICCAEaFWNvbmNhdF9jb2x1bW5fYV9hbmR" - + "fYiIaCgJJRAoETkFNRRIOCgQqAhABCgRiAhABGAI="; - ByteBuffer substraitExpressionProject = getByteBuffer(binarySubstraitExpressionProject); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile( - AVRO_SCHEMA_USER, - TMP, - 19, - "value_19", - 1, - "value_1", - 11, - "value_11", - 21, - "value_21", - 45, - "value_45"); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .substraitProjection(substraitExpressionProject) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); - int rowcount = 0; - while (reader.loadNextBatch()) { - final ValueIterableVector sumVector = - (ValueIterableVector) - reader.getVectorSchemaRoot().getVector("add_two_to_column_a"); - assertThat( - sumVector.getValueIterable(), IsIterableContainingInOrder.contains(21, 3, 13, 23, 47)); - final ValueIterableVector nameVector = - (ValueIterableVector) - reader.getVectorSchemaRoot().getVector("concat_column_a_and_b"); - assertThat( - nameVector.getValueIterable(), - IsIterableContainingInOrder.contains( - new Text("value_19 - value_19"), - new Text("value_1 - value_1"), - new Text("value_11 - value_11"), - new Text("value_21 - value_21"), - new Text("value_45 - value_45"))); - rowcount += reader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(5, rowcount); - } - } - - @Test - public void testRunExtendedExpressionsProjectionWithFilterInsteadOfProjectionException() - throws Exception { - final Schema schema = - new Schema( - Arrays.asList(Field.nullable("filter_id_lower_than_20", new ArrowType.Bool())), null); - // Substrait Extended Expression: Filter: - // Expression 01: WHERE ID < 20 - String binarySubstraitExpressionFilter = - "Ch4IARIaL2Z1bmN0aW9uc19jb21wYXJpc29uLnlhbWwSEhoQCAIQAhoKbHQ6YW55X2F" - + "ueRo3ChwaGggCGgQKAhABIggaBhIECgISACIGGgQKAigUGhdmaWx0ZXJfaWRfbG93ZXJfdGhhbl8yMCIaCgJJRAoETkFNRRIOCgQqAhA" - + "BCgRiAhABGAI="; - ByteBuffer substraitExpressionFilter = getByteBuffer(binarySubstraitExpressionFilter); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile( - AVRO_SCHEMA_USER, - TMP, - 19, - "value_19", - 1, - "value_1", - 11, - "value_11", - 21, - "value_21", - 45, - "value_45"); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .substraitProjection(substraitExpressionFilter) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); - int rowcount = 0; - while (reader.loadNextBatch()) { - final ValueIterableVector booleanVector = - (ValueIterableVector) - reader.getVectorSchemaRoot().getVector("filter_id_lower_than_20"); - assertThat( - booleanVector.getValueIterable(), - IsIterableContainingInOrder.contains(true, true, true, false, false)); - rowcount += reader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(5, rowcount); - } - } - - @Test - public void testRunExtendedExpressionsProjectionWithEmptyProjectionException() throws Exception { - String base64EncodedSubstraitFilter = ""; - ByteBuffer substraitExpressionProjection = getByteBuffer(base64EncodedSubstraitFilter); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile( - AVRO_SCHEMA_USER, - TMP, - 19, - "value_19", - 1, - "value_1", - 11, - "value_11", - 21, - "value_21", - 45, - "value_45"); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .substraitProjection(substraitExpressionProjection) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish()) { - Exception e = assertThrows(RuntimeException.class, () -> dataset.newScan(options)); - assertTrue( - e.getMessage() - .contains("no anonymous struct type was provided to which names could be attached.")); - } - } - - @Test - public void testRunExtendedExpressionsProjectAndFilter() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("add_two_to_column_a", new ArrowType.Int(32, true)), - Field.nullable("concat_column_a_and_b", new ArrowType.Utf8())), - null); - // Substrait Extended Expression: Project New Column: - // Expression ADD: id + 2 - // Expression CONCAT: name + '-' + name - String binarySubstraitExpressionProject = - "Ch4IARIaL2Z1bmN0aW9uc19hcml0aG1ldGljLnlhbWwSERoPCAEaC2FkZDppM" - + "zJfaTMyEhQaEggCEAEaDGNvbmNhdDp2Y2hhchoxChoaGBoEKgIQASIIGgYSBAoCEgAiBhoECgIoAhoTYWRkX3R3b190b19jb2x1" - + "bW5fYRpGCi0aKwgBGgRiAhABIgoaCBIGCgQSAggBIgkaBwoFYgMgLSAiChoIEgYKBBICCAEaFWNvbmNhdF9jb2x1bW5fYV9hbmR" - + "fYiIaCgJJRAoETkFNRRIOCgQqAhABCgRiAhABGAI="; - ByteBuffer substraitExpressionProject = getByteBuffer(binarySubstraitExpressionProject); - // Substrait Extended Expression: Filter: - // Expression 01: WHERE ID < 20 - String base64EncodedSubstraitFilter = - "Ch4IARIaL2Z1bmN0aW9uc19jb21wYXJpc29uLnlhbWwSEhoQCAIQAhoKbHQ6YW55X2F" - + "ueRo3ChwaGggCGgQKAhABIggaBhIECgISACIGGgQKAigUGhdmaWx0ZXJfaWRfbG93ZXJfdGhhbl8yMCIaCgJJRAoETkFNRRIOCgQqAhA" - + "BCgRiAhABGAI="; - ByteBuffer substraitExpressionFilter = getByteBuffer(base64EncodedSubstraitFilter); - ParquetWriteSupport writeSupport = - ParquetWriteSupport.writeTempFile( - AVRO_SCHEMA_USER, - TMP, - 19, - "value_19", - 1, - "value_1", - 11, - "value_11", - 21, - "value_21", - 45, - "value_45"); - ScanOptions options = - new ScanOptions.Builder(/*batchSize*/ 32768) - .columns(Optional.empty()) - .substraitProjection(substraitExpressionProject) - .substraitFilter(substraitExpressionFilter) - .build(); - try (DatasetFactory datasetFactory = - new FileSystemDatasetFactory( - rootAllocator(), - NativeMemoryPool.getDefault(), - FileFormat.PARQUET, - writeSupport.getOutputURI()); - Dataset dataset = datasetFactory.finish(); - Scanner scanner = dataset.newScan(options); - ArrowReader reader = scanner.scanBatches()) { - assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); - int rowcount = 0; - while (reader.loadNextBatch()) { - final ValueIterableVector sumVector = - (ValueIterableVector) - reader.getVectorSchemaRoot().getVector("add_two_to_column_a"); - assertThat(sumVector.getValueIterable(), IsIterableContainingInOrder.contains(21, 3, 13)); - final ValueIterableVector nameVector = - (ValueIterableVector) - reader.getVectorSchemaRoot().getVector("concat_column_a_and_b"); - assertThat( - nameVector.getValueIterable(), - IsIterableContainingInOrder.contains( - new Text("value_19 - value_19"), - new Text("value_1 - value_1"), - new Text("value_11 - value_11"))); - rowcount += reader.getVectorSchemaRoot().getRowCount(); - } - assertEquals(3, rowcount); - } - } - - private static ByteBuffer getByteBuffer(String base64EncodedSubstrait) { - byte[] decodedSubstrait = Base64.getDecoder().decode(base64EncodedSubstrait); - ByteBuffer substraitExpression = ByteBuffer.allocateDirect(decodedSubstrait.length); - substraitExpression.put(decodedSubstrait); - return substraitExpression; - } -} diff --git a/java/dataset/src/test/resources/avroschema/user.avsc b/java/dataset/src/test/resources/avroschema/user.avsc deleted file mode 100644 index 5a4635b6dce7d..0000000000000 --- a/java/dataset/src/test/resources/avroschema/user.avsc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -{ - "namespace": "org.apache.arrow.dataset", - "type": "record", - "name": "Users", - "fields": [ - {"name": "id", "type": ["int", "null"]}, - {"name": "name", "type": ["string", "null"]} - ] -} diff --git a/java/dataset/src/test/resources/data/student.csv b/java/dataset/src/test/resources/data/student.csv deleted file mode 100644 index 3291946092156..0000000000000 --- a/java/dataset/src/test/resources/data/student.csv +++ /dev/null @@ -1,4 +0,0 @@ -Id;Name;Language -1;Juno;Java -2;Peter;Python -3;Celin;C++ diff --git a/java/dataset/src/test/resources/substrait/local_files_users.json b/java/dataset/src/test/resources/substrait/local_files_users.json deleted file mode 100644 index a2f5af1b3b80c..0000000000000 --- a/java/dataset/src/test/resources/substrait/local_files_users.json +++ /dev/null @@ -1,75 +0,0 @@ -{ - "extensionUris": [], - "extensions": [], - "relations": [{ - "root": { - "input": { - "project": { - "common": { - "emit": { - "outputMapping": [2, 3] - } - }, - "input": { - "read": { - "common": { - "direct": { - } - }, - "baseSchema": { - "names": ["ID", "NAME"], - "struct": { - "types": [{ - "i32": { - "typeVariationReference": 0, - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "varchar": { - "length": 150, - "typeVariationReference": 0, - "nullability": "NULLABILITY_NULLABLE" - } - }], - "typeVariationReference": 0, - "nullability": "NULLABILITY_REQUIRED" - } - }, - "local_files": { - "items": [ - { - "uri_file": "FILENAME_PLACEHOLDER", - "parquet": {} - } - ] - } - } - }, - "expressions": [{ - "selection": { - "directReference": { - "structField": { - "field": 0 - } - }, - "rootReference": { - } - } - }, { - "selection": { - "directReference": { - "structField": { - "field": 1 - } - }, - "rootReference": { - } - } - }] - } - }, - "names": ["ID", "NAME"] - } - }], - "expectedTypeUrls": [] -} \ No newline at end of file diff --git a/java/dataset/src/test/resources/substrait/named_table_users.json b/java/dataset/src/test/resources/substrait/named_table_users.json deleted file mode 100644 index 629eebd059776..0000000000000 --- a/java/dataset/src/test/resources/substrait/named_table_users.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "extensionUris": [], - "extensions": [], - "relations": [{ - "root": { - "input": { - "project": { - "common": { - "emit": { - "outputMapping": [2, 3] - } - }, - "input": { - "read": { - "common": { - "direct": { - } - }, - "baseSchema": { - "names": ["ID", "NAME"], - "struct": { - "types": [{ - "i32": { - "typeVariationReference": 0, - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "varchar": { - "length": 150, - "typeVariationReference": 0, - "nullability": "NULLABILITY_NULLABLE" - } - }], - "typeVariationReference": 0, - "nullability": "NULLABILITY_REQUIRED" - } - }, - "namedTable": { - "names": ["USERS"] - } - } - }, - "expressions": [{ - "selection": { - "directReference": { - "structField": { - "field": 0 - } - }, - "rootReference": { - } - } - }, { - "selection": { - "directReference": { - "structField": { - "field": 1 - } - }, - "rootReference": { - } - } - }] - } - }, - "names": ["ID", "NAME"] - } - }], - "expectedTypeUrls": [] -} \ No newline at end of file diff --git a/java/dev/checkstyle/checkstyle.xml b/java/dev/checkstyle/checkstyle.xml deleted file mode 100644 index 4b546ac56ea23..0000000000000 --- a/java/dev/checkstyle/checkstyle.xml +++ /dev/null @@ -1,235 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/java/dev/checkstyle/suppressions.xml b/java/dev/checkstyle/suppressions.xml deleted file mode 100644 index e8669c54e61fd..0000000000000 --- a/java/dev/checkstyle/suppressions.xml +++ /dev/null @@ -1,47 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/java/dev/license/asf-java.license b/java/dev/license/asf-java.license deleted file mode 100644 index 3e7c6c26f5578..0000000000000 --- a/java/dev/license/asf-java.license +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ diff --git a/java/dev/license/asf-xml.license b/java/dev/license/asf-xml.license deleted file mode 100644 index b1abef4bf4b64..0000000000000 --- a/java/dev/license/asf-xml.license +++ /dev/null @@ -1,19 +0,0 @@ - - diff --git a/java/flight/flight-core/README.md b/java/flight/flight-core/README.md deleted file mode 100644 index 82b56f2e83b8a..0000000000000 --- a/java/flight/flight-core/README.md +++ /dev/null @@ -1,30 +0,0 @@ - - -# Arrow Flight Java Package - -Exposing Apache Arrow data on the wire. - -[Protocol Description Slides](https://www.slideshare.net/JacquesNadeau5/apache-arrow-flight-overview) - -[GRPC Protocol Definition](https://github.com/apache/arrow/blob/main/format/Flight.proto) - -## Example Usage - -See the [Arrow Cookbook](https://arrow.apache.org/cookbook/java/flight.html). diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml deleted file mode 100644 index 374f6fcda7e09..0000000000000 --- a/java/flight/flight-core/pom.xml +++ /dev/null @@ -1,205 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-flight - 19.0.0-SNAPSHOT - - - flight-core - jar - Arrow Flight Core - An RPC mechanism for transferring ValueVectors. - - - 1 - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - - org.apache.arrow - arrow-format - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - arrow-memory-netty - runtime - - - io.grpc - grpc-netty - - - io.grpc - grpc-core - - - io.grpc - grpc-protobuf - - - io.netty - netty-tcnative-boringssl-static - - - io.netty - netty-buffer - - - io.netty - netty-handler - - - io.netty - netty-transport - - - com.google.guava - guava - - - io.grpc - grpc-stub - - - com.google.protobuf - protobuf-java - - - com.google.protobuf - protobuf-java-util - - - io.grpc - grpc-api - - - io.grpc - grpc-services - test - - - io.grpc - grpc-inprocess - test - - - - com.fasterxml.jackson.core - jackson-databind - - - org.slf4j - slf4j-api - - - javax.annotation - javax.annotation-api - - - org.immutables - value-annotations - - - - com.google.api.grpc - proto-google-common-protos - 2.49.0 - test - - - org.apache.arrow - arrow-vector - ${project.version} - tests - test-jar - test - - - - - - - maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - false - - ${project.basedir}/../../../testing/data - - - - - org.xolstice.maven.plugins - protobuf-maven-plugin - - - src - - compile - compile-custom - - - ${basedir}/../../../format/ - - - - test - - test-compile - test-compile-custom - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - - analyze-only - - verify - - - io.netty:netty-tcnative-boringssl-static:* - - - - - - - - diff --git a/java/flight/flight-core/src/main/java/module-info.java b/java/flight/flight-core/src/main/java/module-info.java deleted file mode 100644 index 28dbb732c4713..0000000000000 --- a/java/flight/flight-core/src/main/java/module-info.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.flight.core { - exports org.apache.arrow.flight; - exports org.apache.arrow.flight.auth; - exports org.apache.arrow.flight.auth2; - exports org.apache.arrow.flight.client; - exports org.apache.arrow.flight.impl; - exports org.apache.arrow.flight.sql.impl; - - requires com.fasterxml.jackson.databind; - requires com.google.common; - requires com.google.errorprone.annotations; - requires com.google.protobuf; - requires com.google.protobuf.util; - requires io.grpc; - requires io.grpc.internal; - requires io.grpc.netty; - requires io.grpc.protobuf; - requires io.grpc.stub; - requires io.netty.buffer; - requires io.netty.common; - requires io.netty.handler; - requires io.netty.transport; - requires org.apache.arrow.format; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; - requires org.slf4j; -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Action.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Action.java deleted file mode 100644 index b1fece50000e0..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Action.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.protobuf.ByteString; -import org.apache.arrow.flight.impl.Flight; - -/** - * An opaque action for the service to perform. - * - *

    This is a POJO wrapper around the message of the same name in Flight.proto. - */ -public class Action { - - private final String type; - private final byte[] body; - - public Action(String type) { - this(type, null); - } - - public Action(String type, byte[] body) { - this.type = type; - this.body = body == null ? new byte[0] : body; - } - - Action(Flight.Action action) { - this(action.getType(), action.getBody().toByteArray()); - } - - public String getType() { - return type; - } - - public byte[] getBody() { - return body; - } - - Flight.Action toProtocol() { - return Flight.Action.newBuilder() - .setType(getType()) - .setBody(ByteString.copyFrom(getBody())) - .build(); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java deleted file mode 100644 index 46ef3ae5ca104..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import org.apache.arrow.flight.impl.Flight; - -/** POJO wrapper around protocol specifics for Flight actions. */ -public class ActionType { - private final String type; - private final String description; - - /** - * Construct a new instance. - * - * @param type The type of action to perform - * @param description The description of the type. - */ - public ActionType(String type, String description) { - super(); - this.type = type; - this.description = description; - } - - /** Constructs a new instance from the corresponding protocol buffer object. */ - ActionType(Flight.ActionType type) { - this.type = type.getType(); - this.description = type.getDescription(); - } - - public String getType() { - return type; - } - - public String getDescription() { - return description; - } - - /** Converts the POJO to the corresponding protocol buffer type. */ - Flight.ActionType toProtocol() { - return Flight.ActionType.newBuilder().setType(type).setDescription(description).build(); - } - - @Override - public String toString() { - return "ActionType{" + "type='" + type + '\'' + ", description='" + description + '\'' + '}'; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java deleted file mode 100644 index 9cefccb3fe66f..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java +++ /dev/null @@ -1,583 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Iterables; -import com.google.common.io.ByteStreams; -import com.google.protobuf.ByteString; -import com.google.protobuf.CodedInputStream; -import com.google.protobuf.CodedOutputStream; -import com.google.protobuf.WireFormat; -import io.grpc.Drainable; -import io.grpc.MethodDescriptor.Marshaller; -import io.grpc.protobuf.ProtoUtils; -import io.netty.buffer.ByteBuf; -import io.netty.buffer.ByteBufInputStream; -import io.netty.buffer.CompositeByteBuf; -import io.netty.buffer.Unpooled; -import io.netty.buffer.UnpooledByteBufAllocator; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flight.grpc.AddWritableBuffer; -import org.apache.arrow.flight.grpc.GetReadableBuffer; -import org.apache.arrow.flight.impl.Flight.FlightData; -import org.apache.arrow.flight.impl.Flight.FlightDescriptor; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageMetadataResult; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.pojo.Schema; - -/** The in-memory representation of FlightData used to manage a stream of Arrow messages. */ -class ArrowMessage implements AutoCloseable { - - // If true, deserialize Arrow data by giving Arrow a reference to the underlying gRPC buffer - // instead of copying the data. Defaults to true. - public static final boolean ENABLE_ZERO_COPY_READ; - // If true, serialize Arrow data by giving gRPC a reference to the underlying Arrow buffer - // instead of copying the data. Defaults to false. - public static final boolean ENABLE_ZERO_COPY_WRITE; - - static { - String zeroCopyReadFlag = System.getProperty("arrow.flight.enable_zero_copy_read"); - if (zeroCopyReadFlag == null) { - zeroCopyReadFlag = System.getenv("ARROW_FLIGHT_ENABLE_ZERO_COPY_READ"); - } - String zeroCopyWriteFlag = System.getProperty("arrow.flight.enable_zero_copy_write"); - if (zeroCopyWriteFlag == null) { - zeroCopyWriteFlag = System.getenv("ARROW_FLIGHT_ENABLE_ZERO_COPY_WRITE"); - } - ENABLE_ZERO_COPY_READ = !"false".equalsIgnoreCase(zeroCopyReadFlag); - ENABLE_ZERO_COPY_WRITE = "true".equalsIgnoreCase(zeroCopyWriteFlag); - } - - private static final int DESCRIPTOR_TAG = - (FlightData.FLIGHT_DESCRIPTOR_FIELD_NUMBER << 3) | WireFormat.WIRETYPE_LENGTH_DELIMITED; - private static final int BODY_TAG = - (FlightData.DATA_BODY_FIELD_NUMBER << 3) | WireFormat.WIRETYPE_LENGTH_DELIMITED; - private static final int HEADER_TAG = - (FlightData.DATA_HEADER_FIELD_NUMBER << 3) | WireFormat.WIRETYPE_LENGTH_DELIMITED; - private static final int APP_METADATA_TAG = - (FlightData.APP_METADATA_FIELD_NUMBER << 3) | WireFormat.WIRETYPE_LENGTH_DELIMITED; - - private static final Marshaller NO_BODY_MARSHALLER = - ProtoUtils.marshaller(FlightData.getDefaultInstance()); - - /** - * Get the application-specific metadata in this message. The ArrowMessage retains ownership of - * the buffer. - */ - public ArrowBuf getApplicationMetadata() { - return appMetadata; - } - - /** Types of messages that can be sent. */ - public enum HeaderType { - NONE, - SCHEMA, - DICTIONARY_BATCH, - RECORD_BATCH, - TENSOR; - - public static HeaderType getHeader(byte b) { - switch (b) { - case 0: - return NONE; - case 1: - return SCHEMA; - case 2: - return DICTIONARY_BATCH; - case 3: - return RECORD_BATCH; - case 4: - return TENSOR; - default: - throw new UnsupportedOperationException("unknown type: " + b); - } - } - } - - // Pre-allocated buffers for padding serialized ArrowMessages. - private static final List PADDING_BUFFERS = - Arrays.asList( - null, - Unpooled.copiedBuffer(new byte[] {0}), - Unpooled.copiedBuffer(new byte[] {0, 0}), - Unpooled.copiedBuffer(new byte[] {0, 0, 0}), - Unpooled.copiedBuffer(new byte[] {0, 0, 0, 0}), - Unpooled.copiedBuffer(new byte[] {0, 0, 0, 0, 0}), - Unpooled.copiedBuffer(new byte[] {0, 0, 0, 0, 0, 0}), - Unpooled.copiedBuffer(new byte[] {0, 0, 0, 0, 0, 0, 0})); - - private final IpcOption writeOption; - private final FlightDescriptor descriptor; - private final MessageMetadataResult message; - private final ArrowBuf appMetadata; - private final List bufs; - private final boolean tryZeroCopyWrite; - - public ArrowMessage(FlightDescriptor descriptor, Schema schema, IpcOption option) { - this.writeOption = option; - ByteBuffer serializedMessage = MessageSerializer.serializeMetadata(schema, writeOption); - this.message = - MessageMetadataResult.create(serializedMessage.slice(), serializedMessage.remaining()); - bufs = ImmutableList.of(); - this.descriptor = descriptor; - this.appMetadata = null; - this.tryZeroCopyWrite = false; - } - - /** - * Create an ArrowMessage from a record batch and app metadata. - * - * @param batch The record batch. - * @param appMetadata The app metadata. May be null. Takes ownership of the buffer otherwise. - * @param tryZeroCopy Whether to enable the zero-copy optimization. - */ - public ArrowMessage( - ArrowRecordBatch batch, ArrowBuf appMetadata, boolean tryZeroCopy, IpcOption option) { - this.writeOption = option; - ByteBuffer serializedMessage = MessageSerializer.serializeMetadata(batch, writeOption); - this.message = - MessageMetadataResult.create(serializedMessage.slice(), serializedMessage.remaining()); - this.bufs = ImmutableList.copyOf(batch.getBuffers()); - this.descriptor = null; - this.appMetadata = appMetadata; - this.tryZeroCopyWrite = tryZeroCopy; - } - - public ArrowMessage(ArrowDictionaryBatch batch, IpcOption option) { - this.writeOption = option; - ByteBuffer serializedMessage = MessageSerializer.serializeMetadata(batch, writeOption); - serializedMessage = serializedMessage.slice(); - this.message = MessageMetadataResult.create(serializedMessage, serializedMessage.remaining()); - // asInputStream will free the buffers implicitly, so increment the reference count - batch.getDictionary().getBuffers().forEach(buf -> buf.getReferenceManager().retain()); - this.bufs = ImmutableList.copyOf(batch.getDictionary().getBuffers()); - this.descriptor = null; - this.appMetadata = null; - this.tryZeroCopyWrite = false; - } - - /** - * Create an ArrowMessage containing only application metadata. - * - * @param appMetadata The application-provided metadata buffer. - */ - public ArrowMessage(ArrowBuf appMetadata) { - // No need to take IpcOption as it's not used to serialize this kind of message. - this.writeOption = IpcOption.DEFAULT; - this.message = null; - this.bufs = ImmutableList.of(); - this.descriptor = null; - this.appMetadata = appMetadata; - this.tryZeroCopyWrite = false; - } - - public ArrowMessage(FlightDescriptor descriptor) { - // No need to take IpcOption as it's not used to serialize this kind of message. - this.writeOption = IpcOption.DEFAULT; - this.message = null; - this.bufs = ImmutableList.of(); - this.descriptor = descriptor; - this.appMetadata = null; - this.tryZeroCopyWrite = false; - } - - private ArrowMessage( - FlightDescriptor descriptor, - MessageMetadataResult message, - ArrowBuf appMetadata, - ArrowBuf buf) { - // No need to take IpcOption as this is used for deserialized ArrowMessage coming from the wire. - this.writeOption = - message != null - ? - // avoid writing legacy ipc format by default - new IpcOption(false, MetadataVersion.fromFlatbufID(message.getMessage().version())) - : IpcOption.DEFAULT; - this.message = message; - this.descriptor = descriptor; - this.appMetadata = appMetadata; - this.bufs = buf == null ? ImmutableList.of() : ImmutableList.of(buf); - this.tryZeroCopyWrite = false; - } - - public MessageMetadataResult asSchemaMessage() { - return message; - } - - public FlightDescriptor getDescriptor() { - return descriptor; - } - - public HeaderType getMessageType() { - if (message == null) { - // Null message occurs for metadata-only messages (in DoExchange) - return HeaderType.NONE; - } - return HeaderType.getHeader(message.headerType()); - } - - public Schema asSchema() { - Preconditions.checkArgument(bufs.size() == 0); - Preconditions.checkArgument(getMessageType() == HeaderType.SCHEMA); - return MessageSerializer.deserializeSchema(message); - } - - public ArrowRecordBatch asRecordBatch() throws IOException { - Preconditions.checkArgument( - bufs.size() == 1, "A batch can only be consumed if it contains a single ArrowBuf."); - Preconditions.checkArgument(getMessageType() == HeaderType.RECORD_BATCH); - - ArrowBuf underlying = bufs.get(0); - - underlying.getReferenceManager().retain(); - return MessageSerializer.deserializeRecordBatch(message, underlying); - } - - public ArrowDictionaryBatch asDictionaryBatch() throws IOException { - Preconditions.checkArgument( - bufs.size() == 1, "A batch can only be consumed if it contains a single ArrowBuf."); - Preconditions.checkArgument(getMessageType() == HeaderType.DICTIONARY_BATCH); - ArrowBuf underlying = bufs.get(0); - // Retain a reference to keep the batch alive when the message is closed - underlying.getReferenceManager().retain(); - // Do not set drained - we still want to release our reference - return MessageSerializer.deserializeDictionaryBatch(message, underlying); - } - - public Iterable getBufs() { - return Iterables.unmodifiableIterable(bufs); - } - - private static ArrowMessage frame(BufferAllocator allocator, final InputStream stream) { - - try { - FlightDescriptor descriptor = null; - MessageMetadataResult header = null; - ArrowBuf body = null; - ArrowBuf appMetadata = null; - while (stream.available() > 0) { - int tag = readRawVarint32(stream); - switch (tag) { - case DESCRIPTOR_TAG: - { - int size = readRawVarint32(stream); - byte[] bytes = new byte[size]; - ByteStreams.readFully(stream, bytes); - descriptor = FlightDescriptor.parseFrom(bytes); - break; - } - case HEADER_TAG: - { - int size = readRawVarint32(stream); - byte[] bytes = new byte[size]; - ByteStreams.readFully(stream, bytes); - header = MessageMetadataResult.create(ByteBuffer.wrap(bytes), size); - break; - } - case APP_METADATA_TAG: - { - int size = readRawVarint32(stream); - appMetadata = allocator.buffer(size); - GetReadableBuffer.readIntoBuffer(stream, appMetadata, size, ENABLE_ZERO_COPY_READ); - break; - } - case BODY_TAG: - if (body != null) { - // only read last body. - body.getReferenceManager().release(); - body = null; - } - int size = readRawVarint32(stream); - body = allocator.buffer(size); - GetReadableBuffer.readIntoBuffer(stream, body, size, ENABLE_ZERO_COPY_READ); - break; - - default: - // ignore unknown fields. - } - } - // Protobuf implementations can omit empty fields, such as body; for some message types, like - // RecordBatch, - // this will fail later as we still expect an empty buffer. In those cases only, fill in an - // empty buffer here - - // in other cases, like Schema, having an unexpected empty buffer will also cause failures. - // We don't fill in defaults for fields like header, for which there is no reasonable default, - // or for appMetadata - // or descriptor, which are intended to be empty in some cases. - if (header != null) { - switch (HeaderType.getHeader(header.headerType())) { - case SCHEMA: - // Ignore 0-length buffers in case a Protobuf implementation wrote it out - if (body != null && body.capacity() == 0) { - body.close(); - body = null; - } - break; - case DICTIONARY_BATCH: - case RECORD_BATCH: - // A Protobuf implementation can skip 0-length bodies, so ensure we fill it in here - if (body == null) { - body = allocator.getEmpty(); - } - break; - case NONE: - case TENSOR: - default: - // Do nothing - break; - } - } - return new ArrowMessage(descriptor, header, appMetadata, body); - } catch (Exception ioe) { - throw new RuntimeException(ioe); - } - } - - private static int readRawVarint32(InputStream is) throws IOException { - int firstByte = is.read(); - return CodedInputStream.readRawVarint32(firstByte, is); - } - - /** - * Convert the ArrowMessage to an InputStream. - * - *

    Implicitly, this transfers ownership of the contained buffers to the InputStream. - * - * @return InputStream - */ - private InputStream asInputStream() { - if (message == null) { - // If we have no IPC message, it's a pure-metadata message - final FlightData.Builder builder = FlightData.newBuilder(); - if (descriptor != null) { - builder.setFlightDescriptor(descriptor); - } - if (appMetadata != null) { - builder.setAppMetadata(ByteString.copyFrom(appMetadata.nioBuffer())); - } - return NO_BODY_MARSHALLER.stream(builder.build()); - } - - try { - final ByteString bytes = - ByteString.copyFrom(message.getMessageBuffer(), message.bytesAfterMessage()); - - if (getMessageType() == HeaderType.SCHEMA) { - - final FlightData.Builder builder = FlightData.newBuilder().setDataHeader(bytes); - - if (descriptor != null) { - builder.setFlightDescriptor(descriptor); - } - - Preconditions.checkArgument(bufs.isEmpty()); - return NO_BODY_MARSHALLER.stream(builder.build()); - } - - Preconditions.checkArgument( - getMessageType() == HeaderType.RECORD_BATCH - || getMessageType() == HeaderType.DICTIONARY_BATCH); - // There may be no buffers in the case that we write only a null array - Preconditions.checkArgument( - descriptor == null, "Descriptor should only be included in the schema message."); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - CodedOutputStream cos = CodedOutputStream.newInstance(baos); - cos.writeBytes(FlightData.DATA_HEADER_FIELD_NUMBER, bytes); - - if (appMetadata != null && appMetadata.capacity() > 0) { - // Must call slice() as CodedOutputStream#writeByteBuffer writes -capacity- bytes, not - // -limit- bytes - cos.writeByteBuffer(FlightData.APP_METADATA_FIELD_NUMBER, appMetadata.nioBuffer().slice()); - } - - cos.writeTag(FlightData.DATA_BODY_FIELD_NUMBER, WireFormat.WIRETYPE_LENGTH_DELIMITED); - int size = 0; - List allBufs = new ArrayList<>(); - for (ArrowBuf b : bufs) { - // [ARROW-11066] This creates a Netty buffer whose refcnt is INDEPENDENT of the backing - // Arrow buffer. This is susceptible to use-after-free, so we subclass CompositeByteBuf - // below to tie the Arrow buffer refcnt to the Netty buffer refcnt - allBufs.add(Unpooled.wrappedBuffer(b.nioBuffer()).retain()); - size += (int) b.readableBytes(); - // [ARROW-4213] These buffers must be aligned to an 8-byte boundary in order to be readable - // from C++. - if (b.readableBytes() % 8 != 0) { - int paddingBytes = (int) (8 - (b.readableBytes() % 8)); - assert paddingBytes > 0 && paddingBytes < 8; - size += paddingBytes; - allBufs.add(PADDING_BUFFERS.get(paddingBytes).retain()); - } - } - // rawvarint is used for length definition. - cos.writeUInt32NoTag(size); - cos.flush(); - - ByteBuf initialBuf = Unpooled.buffer(baos.size()); - initialBuf.writeBytes(baos.toByteArray()); - final CompositeByteBuf bb; - final ImmutableList byteBufs = - ImmutableList.builder().add(initialBuf).addAll(allBufs).build(); - // See: https://github.com/apache/arrow/issues/40039 - // CompositeByteBuf requires us to pass maxNumComponents to constructor. - // This number will be used to decide when to stop adding new components as separate buffers - // and instead merge existing components into a new buffer by performing a memory copy. - // We want to avoind memory copies as much as possible so we want to set the limit that won't - // be reached. - // At a first glance it seems reasonable to set limit to byteBufs.size() + 1, - // because it will be enough to avoid merges of byteBufs that we pass to constructor. - // But later this buffer will be written to socket by Netty - // and DefaultHttp2ConnectionEncoder uses CoalescingBufferQueue to combine small buffers into - // one. - // Method CoalescingBufferQueue.compose will check if current buffer is already a - // CompositeByteBuf - // and if it's the case it will just add a new component to this buffer. - // But in out case if we set maxNumComponents=byteBufs.size() + 1 it will happen on the first - // attempt - // to write data to socket because header message is small and Netty will always try to - // compine it with the - // large CompositeByteBuf we're creating here. - // We never want additional memory copies so setting the limit to Integer.MAX_VALUE - final int maxNumComponents = Integer.MAX_VALUE; - if (tryZeroCopyWrite) { - bb = new ArrowBufRetainingCompositeByteBuf(maxNumComponents, byteBufs, bufs); - } else { - // Don't retain the buffers in the non-zero-copy path since we're copying them - bb = - new CompositeByteBuf( - UnpooledByteBufAllocator.DEFAULT, /* direct */ true, maxNumComponents, byteBufs); - } - return new DrainableByteBufInputStream(bb, tryZeroCopyWrite); - } catch (Exception ex) { - throw new RuntimeException("Unexpected IO Exception", ex); - } - } - - /** - * ARROW-11066: enable the zero-copy optimization and protect against use-after-free. - * - *

    When you send a message through gRPC, the following happens: 1. gRPC immediately serializes - * the message, eventually calling asInputStream above. 2. gRPC buffers the serialized message for - * sending. 3. Later, gRPC will actually write out the message. - * - *

    The problem with this is that when the zero-copy optimization is enabled, Flight - * "serializes" the message by handing gRPC references to Arrow data. That means we need a way to - * keep the Arrow buffers valid until gRPC actually writes them, else, we'll read invalid data or - * segfault. gRPC doesn't know anything about Arrow buffers, either. - * - *

    This class solves that issue by bridging Arrow and Netty/gRPC. We increment the refcnt on a - * set of Arrow backing buffers and decrement them once the Netty buffers are freed by gRPC. - */ - private static final class ArrowBufRetainingCompositeByteBuf extends CompositeByteBuf { - // Arrow buffers that back the Netty ByteBufs here; ByteBufs held by this class are - // either slices of one of the ArrowBufs or independently allocated. - final List backingBuffers; - boolean freed; - - ArrowBufRetainingCompositeByteBuf( - int maxNumComponents, Iterable buffers, List backingBuffers) { - super(UnpooledByteBufAllocator.DEFAULT, /* direct */ true, maxNumComponents, buffers); - this.backingBuffers = backingBuffers; - this.freed = false; - // N.B. the Netty superclass avoids enhanced-for to reduce GC pressure, so follow that here - for (int i = 0; i < backingBuffers.size(); i++) { - backingBuffers.get(i).getReferenceManager().retain(); - } - } - - @Override - protected void deallocate() { - super.deallocate(); - if (freed) { - return; - } - freed = true; - for (int i = 0; i < backingBuffers.size(); i++) { - backingBuffers.get(i).getReferenceManager().release(); - } - } - } - - private static class DrainableByteBufInputStream extends ByteBufInputStream implements Drainable { - - private final CompositeByteBuf buf; - private final boolean isZeroCopy; - - public DrainableByteBufInputStream(CompositeByteBuf buffer, boolean isZeroCopy) { - super(buffer, buffer.readableBytes(), true); - this.buf = buffer; - this.isZeroCopy = isZeroCopy; - } - - @Override - public int drainTo(OutputStream target) throws IOException { - int size = buf.readableBytes(); - AddWritableBuffer.add(buf, target, isZeroCopy); - return size; - } - - @Override - public void close() { - buf.release(); - } - } - - public static Marshaller createMarshaller(BufferAllocator allocator) { - return new ArrowMessageHolderMarshaller(allocator); - } - - private static class ArrowMessageHolderMarshaller implements Marshaller { - - private final BufferAllocator allocator; - - public ArrowMessageHolderMarshaller(BufferAllocator allocator) { - this.allocator = allocator; - } - - @Override - public InputStream stream(ArrowMessage value) { - return value.asInputStream(); - } - - @Override - public ArrowMessage parse(InputStream stream) { - return ArrowMessage.frame(allocator, stream); - } - } - - @Override - public void close() throws Exception { - AutoCloseables.close(Iterables.concat(bufs, Collections.singletonList(appMetadata))); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/AsyncPutListener.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/AsyncPutListener.java deleted file mode 100644 index 85dde4ed7c1c6..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/AsyncPutListener.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import org.apache.arrow.flight.grpc.StatusUtils; - -/** - * A handler for server-sent application metadata messages during a Flight DoPut operation. - * - *

    To handle messages, create an instance of this class overriding {@link #onNext(PutResult)}. - * The other methods should not be overridden. - */ -public class AsyncPutListener implements FlightClient.PutListener { - - private CompletableFuture completed; - - public AsyncPutListener() { - completed = new CompletableFuture<>(); - } - - /** - * Wait for the stream to finish on the server side. You must call this to be notified of any - * errors that may have happened during the upload. - */ - @Override - public final void getResult() { - try { - completed.get(); - } catch (ExecutionException e) { - throw StatusUtils.fromThrowable(e.getCause()); - } catch (InterruptedException e) { - throw StatusUtils.fromThrowable(e); - } - } - - @Override - public void onNext(PutResult val) {} - - @Override - public final void onError(Throwable t) { - completed.completeExceptionally(StatusUtils.fromThrowable(t)); - } - - @Override - public final void onCompleted() { - completed.complete(null); - } - - @Override - public boolean isCancelled() { - return completed.isDone(); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/BackpressureStrategy.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/BackpressureStrategy.java deleted file mode 100644 index 1409ea7003ed7..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/BackpressureStrategy.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.base.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; - -/** - * Helper interface to dynamically handle backpressure when implementing FlightProducers. This must - * only be used in FlightProducer implementations that are non-blocking. - */ -public interface BackpressureStrategy { - /** The state of the client after a call to waitForListener. */ - enum WaitResult { - /** Listener is ready. */ - READY, - - /** Listener was cancelled by the client. */ - CANCELLED, - - /** Timed out waiting for the listener to change state. */ - TIMEOUT, - - /** Indicates that the wait was interrupted for a reason unrelated to the listener itself. */ - OTHER - } - - /** - * Set up operations to work against the given listener. - * - *

    This must be called exactly once and before any calls to {@link #waitForListener(long)} and - * {@link OutboundStreamListener#start(VectorSchemaRoot)} - * - * @param listener The listener this strategy applies to. - */ - void register(FlightProducer.ServerStreamListener listener); - - /** - * Waits for the listener to be ready or cancelled up to the given timeout. - * - * @param timeout The timeout in milliseconds. Infinite if timeout is <= 0. - * @return The result of the wait. - */ - WaitResult waitForListener(long timeout); - - /** - * A back pressure strategy that uses callbacks to notify when the client is ready or cancelled. - */ - class CallbackBackpressureStrategy implements BackpressureStrategy { - private final Object lock = new Object(); - private FlightProducer.ServerStreamListener listener; - - @Override - public void register(FlightProducer.ServerStreamListener listener) { - this.listener = listener; - listener.setOnReadyHandler(this::onReady); - listener.setOnCancelHandler(this::onCancel); - } - - @Override - public WaitResult waitForListener(long timeout) { - Preconditions.checkNotNull(listener); - long remainingTimeout = timeout; - final long startTime = System.currentTimeMillis(); - synchronized (lock) { - while (!listener.isReady() && !listener.isCancelled()) { - try { - lock.wait(remainingTimeout); - if (timeout != 0) { // If timeout was zero explicitly, we should never report timeout. - remainingTimeout = startTime + timeout - System.currentTimeMillis(); - if (remainingTimeout <= 0) { - return WaitResult.TIMEOUT; - } - } - if (!shouldContinueWaiting(listener, remainingTimeout)) { - return WaitResult.OTHER; - } - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - return WaitResult.OTHER; - } - } - - if (listener.isReady()) { - return WaitResult.READY; - } else if (listener.isCancelled()) { - return WaitResult.CANCELLED; - } else if (System.currentTimeMillis() > startTime + timeout) { - return WaitResult.TIMEOUT; - } - throw new RuntimeException("Invalid state when waiting for listener."); - } - } - - /** - * Interrupt waiting on the listener to change state. - * - *

    This method can be used in conjunction with {@link - * #shouldContinueWaiting(FlightProducer.ServerStreamListener, long)} to allow FlightProducers - * to terminate streams internally and notify clients. - */ - public void interruptWait() { - synchronized (lock) { - lock.notifyAll(); - } - } - - /** - * Callback function to run to check if the listener should continue to be waited on if it - * leaves the waiting state without being cancelled, ready, or timed out. - * - *

    This method should be used to determine if the wait on the listener was interrupted - * explicitly using a call to {@link #interruptWait()} or if it was woken up due to a spurious - * wake. - */ - protected boolean shouldContinueWaiting( - FlightProducer.ServerStreamListener listener, long remainingTimeout) { - return true; - } - - /** Callback to execute when the listener becomes ready. */ - protected void readyCallback() {} - - /** Callback to execute when the listener is cancelled. */ - protected void cancelCallback() {} - - private void onReady() { - synchronized (lock) { - readyCallback(); - lock.notifyAll(); - } - } - - private void onCancel() { - synchronized (lock) { - cancelCallback(); - lock.notifyAll(); - } - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallHeaders.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallHeaders.java deleted file mode 100644 index f4f6486a3c5df..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallHeaders.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.Set; - -/** A set of metadata key value pairs for a call (request or response). */ -public interface CallHeaders { - /** Get the value of a metadata key. If multiple values are present, then get the last one. */ - String get(String key); - - /** Get the value of a metadata key. If multiple values are present, then get the last one. */ - byte[] getByte(String key); - - /** Get all values present for the given metadata key. */ - Iterable getAll(String key); - - /** Get all values present for the given metadata key. */ - Iterable getAllByte(String key); - - /** - * Insert a metadata pair with the given value. - * - *

    Duplicate metadata are permitted. - */ - void insert(String key, String value); - - /** - * Insert a metadata pair with the given value. - * - *

    Duplicate metadata are permitted. - */ - void insert(String key, byte[] value); - - /** Get a set of all the metadata keys. */ - Set keys(); - - /** Check whether the given metadata key is present. */ - boolean containsKey(String key); -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallInfo.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallInfo.java deleted file mode 100644 index 24ef2053f3b42..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallInfo.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** A description of a Flight call for middleware to inspect. */ -public final class CallInfo { - private final FlightMethod method; - - public CallInfo(FlightMethod method) { - this.method = method; - } - - public FlightMethod method() { - return method; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOption.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOption.java deleted file mode 100644 index bb11ce79e50f7..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOption.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** Per-call RPC options. These are hints to the underlying RPC layer and may not be respected. */ -public interface CallOption {} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOptions.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOptions.java deleted file mode 100644 index b6e052d223678..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallOptions.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import io.grpc.stub.AbstractStub; -import java.util.concurrent.TimeUnit; - -/** Common call options. */ -public class CallOptions { - public static CallOption timeout(long duration, TimeUnit unit) { - return new Timeout(duration, unit); - } - - static > T wrapStub(T stub, CallOption[] options) { - for (CallOption option : options) { - if (option instanceof GrpcCallOption) { - stub = ((GrpcCallOption) option).wrapStub(stub); - } - } - return stub; - } - - private static class Timeout implements GrpcCallOption { - long timeout; - TimeUnit timeoutUnit; - - Timeout(long timeout, TimeUnit timeoutUnit) { - this.timeout = timeout; - this.timeoutUnit = timeoutUnit; - } - - @Override - public > T wrapStub(T stub) { - return stub.withDeadlineAfter(timeout, timeoutUnit); - } - } - - /** CallOptions specific to GRPC stubs. */ - public interface GrpcCallOption extends CallOption { - > T wrapStub(T stub); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java deleted file mode 100644 index 809834f961d3e..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.Objects; -import org.apache.arrow.flight.FlightProducer.ServerStreamListener; -import org.apache.arrow.flight.FlightProducer.StreamListener; - -/** - * The result of a Flight RPC, consisting of a status code with an optional description and/or - * exception that led to the status. - * - *

    If raised or sent through {@link StreamListener#onError(Throwable)} or {@link - * ServerStreamListener#error(Throwable)}, the client call will raise the same error (a {@link - * FlightRuntimeException} with the same {@link FlightStatusCode} and description). The exception - * within, if present, will not be sent to the client. - */ -public class CallStatus { - - private final FlightStatusCode code; - private final Throwable cause; - private final String description; - private final ErrorFlightMetadata metadata; - - public static final CallStatus UNKNOWN = FlightStatusCode.UNKNOWN.toStatus(); - public static final CallStatus INTERNAL = FlightStatusCode.INTERNAL.toStatus(); - public static final CallStatus INVALID_ARGUMENT = FlightStatusCode.INVALID_ARGUMENT.toStatus(); - public static final CallStatus TIMED_OUT = FlightStatusCode.TIMED_OUT.toStatus(); - public static final CallStatus NOT_FOUND = FlightStatusCode.NOT_FOUND.toStatus(); - public static final CallStatus ALREADY_EXISTS = FlightStatusCode.ALREADY_EXISTS.toStatus(); - public static final CallStatus CANCELLED = FlightStatusCode.CANCELLED.toStatus(); - public static final CallStatus UNAUTHENTICATED = FlightStatusCode.UNAUTHENTICATED.toStatus(); - public static final CallStatus UNAUTHORIZED = FlightStatusCode.UNAUTHORIZED.toStatus(); - public static final CallStatus UNIMPLEMENTED = FlightStatusCode.UNIMPLEMENTED.toStatus(); - public static final CallStatus UNAVAILABLE = FlightStatusCode.UNAVAILABLE.toStatus(); - public static final CallStatus RESOURCE_EXHAUSTED = - FlightStatusCode.RESOURCE_EXHAUSTED.toStatus(); - - /** - * Create a new status. - * - * @param code The status code. - * @param cause An exception that resulted in this status (or null). - * @param description A description of the status (or null). - */ - public CallStatus( - FlightStatusCode code, Throwable cause, String description, ErrorFlightMetadata metadata) { - this.code = Objects.requireNonNull(code); - this.cause = cause; - this.description = description == null ? "" : description; - this.metadata = metadata == null ? new ErrorFlightMetadata() : metadata; - } - - /** - * Create a new status with no cause or description. - * - * @param code The status code. - */ - public CallStatus(FlightStatusCode code) { - this(code, /* no cause */ null, /* no description */ null, /* no metadata */ null); - } - - /** The status code describing the result of the RPC. */ - public FlightStatusCode code() { - return code; - } - - /** The exception that led to this result. May be null. */ - public Throwable cause() { - return cause; - } - - /** A description of the result. */ - public String description() { - return description; - } - - /** - * Metadata associated with the exception. - * - *

    May be null. - */ - public ErrorFlightMetadata metadata() { - return metadata; - } - - /** Return a copy of this status with an error message. */ - public CallStatus withDescription(String message) { - return new CallStatus(code, cause, message, metadata); - } - - /** - * Return a copy of this status with the given exception as the cause. This will not be sent over - * the wire. - */ - public CallStatus withCause(Throwable t) { - return new CallStatus(code, t, description, metadata); - } - - /** Return a copy of this status with associated exception metadata. */ - public CallStatus withMetadata(ErrorFlightMetadata metadata) { - return new CallStatus(code, cause, description, metadata); - } - - /** Convert the status to an equivalent exception. */ - public FlightRuntimeException toRuntimeException() { - return new FlightRuntimeException(this); - } - - @Override - public String toString() { - return "CallStatus{" - + "code=" - + code - + ", cause=" - + cause - + ", description='" - + description - + "', metadata='" - + metadata - + '\'' - + '}'; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoRequest.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoRequest.java deleted file mode 100644 index 041a765455797..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoRequest.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.util.Objects; -import org.apache.arrow.flight.impl.Flight; - -/** A request to cancel a FlightInfo. */ -public class CancelFlightInfoRequest { - private final FlightInfo info; - - public CancelFlightInfoRequest(FlightInfo info) { - this.info = Objects.requireNonNull(info); - } - - CancelFlightInfoRequest(Flight.CancelFlightInfoRequest proto) throws URISyntaxException { - this(new FlightInfo(proto.getInfo())); - } - - public FlightInfo getInfo() { - return info; - } - - Flight.CancelFlightInfoRequest toProtocol() { - Flight.CancelFlightInfoRequest.Builder b = Flight.CancelFlightInfoRequest.newBuilder(); - b.setInfo(info.toProtocol()); - return b.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static CancelFlightInfoRequest deserialize(ByteBuffer serialized) - throws IOException, URISyntaxException { - return new CancelFlightInfoRequest(Flight.CancelFlightInfoRequest.parseFrom(serialized)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoResult.java deleted file mode 100644 index 1282f8abe0c9b..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoResult.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Objects; -import org.apache.arrow.flight.impl.Flight; - -/** The result of cancelling a FlightInfo. */ -public class CancelFlightInfoResult { - private final CancelStatus status; - - public CancelFlightInfoResult(CancelStatus status) { - this.status = status; - } - - CancelFlightInfoResult(Flight.CancelFlightInfoResult proto) { - switch (proto.getStatus()) { - case CANCEL_STATUS_UNSPECIFIED: - status = CancelStatus.UNSPECIFIED; - break; - case CANCEL_STATUS_CANCELLED: - status = CancelStatus.CANCELLED; - break; - case CANCEL_STATUS_CANCELLING: - status = CancelStatus.CANCELLING; - break; - case CANCEL_STATUS_NOT_CANCELLABLE: - status = CancelStatus.NOT_CANCELLABLE; - break; - default: - throw new IllegalArgumentException(""); - } - } - - public CancelStatus getStatus() { - return status; - } - - Flight.CancelFlightInfoResult toProtocol() { - Flight.CancelFlightInfoResult.Builder b = Flight.CancelFlightInfoResult.newBuilder(); - switch (status) { - case UNSPECIFIED: - b.setStatus(Flight.CancelStatus.CANCEL_STATUS_UNSPECIFIED); - break; - case CANCELLED: - b.setStatus(Flight.CancelStatus.CANCEL_STATUS_CANCELLED); - break; - case CANCELLING: - b.setStatus(Flight.CancelStatus.CANCEL_STATUS_CANCELLING); - break; - case NOT_CANCELLABLE: - b.setStatus(Flight.CancelStatus.CANCEL_STATUS_NOT_CANCELLABLE); - break; - default: - // Not possible - throw new AssertionError(); - } - return b.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static CancelFlightInfoResult deserialize(ByteBuffer serialized) throws IOException { - return new CancelFlightInfoResult(Flight.CancelFlightInfoResult.parseFrom(serialized)); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof CancelFlightInfoResult)) { - return false; - } - CancelFlightInfoResult that = (CancelFlightInfoResult) o; - return status == that.status; - } - - @Override - public int hashCode() { - return Objects.hash(status); - } - - @Override - public String toString() { - return "CancelFlightInfoResult{" + "status=" + status + '}'; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelStatus.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelStatus.java deleted file mode 100644 index 745b67db70596..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelStatus.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** The result of cancelling a FlightInfo. */ -public enum CancelStatus { - /** - * The cancellation status is unknown. Servers should avoid using this value (send a NOT_FOUND - * error if the requested query is not known). Clients can retry the request. - */ - UNSPECIFIED, - /** - * The cancellation request is complete. Subsequent requests with the same payload may return - * CANCELLED or a NOT_FOUND error. - */ - CANCELLED, - /** The cancellation request is in progress. The client may retry the cancellation request. */ - CANCELLING, - /** The query is not cancellable. The client should not retry the cancellation request. */ - NOT_CANCELLABLE, - ; -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionRequest.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionRequest.java deleted file mode 100644 index 7eea788d7c7e6..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionRequest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.nio.ByteBuffer; -import org.apache.arrow.flight.impl.Flight; - -/** A request to close/invalidate a server session context. */ -public class CloseSessionRequest { - public CloseSessionRequest() {} - - CloseSessionRequest(Flight.CloseSessionRequest proto) {} - - Flight.CloseSessionRequest toProtocol() { - return Flight.CloseSessionRequest.getDefaultInstance(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static CloseSessionRequest deserialize(ByteBuffer serialized) throws IOException { - return new CloseSessionRequest(Flight.CloseSessionRequest.parseFrom(serialized)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionResult.java deleted file mode 100644 index 73d7bc16a1a7d..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionResult.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.nio.ByteBuffer; -import org.apache.arrow.flight.impl.Flight; - -/** The result of attempting to close/invalidate a server session context. */ -public class CloseSessionResult { - /** Close operation result status values. */ - public enum Status { - /** - * The session close status is unknown. Servers should avoid using this value (send a NOT_FOUND - * error if the requested session is not known). Clients can retry the request. - */ - UNSPECIFIED, - /** The session close request is complete. */ - CLOSED, - /** The session close request is in progress. The client may retry the request. */ - CLOSING, - /** The session is not closeable. */ - NOT_CLOSABLE, - ; - - public static Status fromProtocol(Flight.CloseSessionResult.Status proto) { - return values()[proto.getNumber()]; - } - - public Flight.CloseSessionResult.Status toProtocol() { - return Flight.CloseSessionResult.Status.values()[ordinal()]; - } - } - - private final Status status; - - public CloseSessionResult(Status status) { - this.status = status; - } - - CloseSessionResult(Flight.CloseSessionResult proto) { - status = Status.fromProtocol(proto.getStatus()); - if (status == null) { - // Unreachable - throw new IllegalArgumentException(""); - } - } - - public Status getStatus() { - return status; - } - - Flight.CloseSessionResult toProtocol() { - Flight.CloseSessionResult.Builder b = Flight.CloseSessionResult.newBuilder(); - b.setStatus(status.toProtocol()); - return b.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static CloseSessionResult deserialize(ByteBuffer serialized) throws IOException { - return new CloseSessionResult(Flight.CloseSessionResult.parseFrom(serialized)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Criteria.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Criteria.java deleted file mode 100644 index 7698b83f98b35..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Criteria.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.protobuf.ByteString; -import org.apache.arrow.flight.impl.Flight; - -/** - * An opaque object that can be used to filter a list of streams available from a server. - * - *

    This is a POJO wrapper around the protobuf Criteria message. - */ -public class Criteria { - - public static Criteria ALL = new Criteria((byte[]) null); - - private final byte[] bytes; - - public Criteria(byte[] bytes) { - this.bytes = bytes; - } - - Criteria(Flight.Criteria criteria) { - this.bytes = criteria.getExpression().toByteArray(); - } - - /** Get the contained filter criteria. */ - public byte[] getExpression() { - return bytes; - } - - Flight.Criteria asCriteria() { - Flight.Criteria.Builder b = Flight.Criteria.newBuilder(); - if (bytes != null) { - b.setExpression(ByteString.copyFrom(bytes)); - } - - return b.build(); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/DictionaryUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/DictionaryUtils.java deleted file mode 100644 index cecc1b876ecd9..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/DictionaryUtils.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.function.Consumer; -import java.util.stream.Collectors; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.DictionaryUtility; -import org.apache.arrow.vector.validate.MetadataV4UnionChecker; - -/** Utilities to work with dictionaries in Flight. */ -final class DictionaryUtils { - - private DictionaryUtils() { - throw new UnsupportedOperationException("Do not instantiate this class."); - } - - /** - * Generate all the necessary Flight messages to send a schema and associated dictionaries. - * - * @throws Exception if there was an error closing {@link ArrowMessage} objects. This is not - * generally expected. - */ - static Schema generateSchemaMessages( - final Schema originalSchema, - final FlightDescriptor descriptor, - final DictionaryProvider provider, - final IpcOption option, - final Consumer messageCallback) - throws Exception { - final Set dictionaryIds = new HashSet<>(); - final Schema schema = generateSchema(originalSchema, provider, dictionaryIds); - MetadataV4UnionChecker.checkForUnion(schema.getFields().iterator(), option.metadataVersion); - // Send the schema message - final Flight.FlightDescriptor protoDescriptor = - descriptor == null ? null : descriptor.toProtocol(); - try (final ArrowMessage message = new ArrowMessage(protoDescriptor, schema, option)) { - messageCallback.accept(message); - } - // Create and write dictionary batches - for (Long id : dictionaryIds) { - final Dictionary dictionary = provider.lookup(id); - final FieldVector vector = dictionary.getVector(); - final int count = vector.getValueCount(); - // Do NOT close this root, as it does not actually own the vector. - final VectorSchemaRoot dictRoot = - new VectorSchemaRoot( - Collections.singletonList(vector.getField()), - Collections.singletonList(vector), - count); - final VectorUnloader unloader = new VectorUnloader(dictRoot); - try (final ArrowDictionaryBatch dictionaryBatch = - new ArrowDictionaryBatch(id, unloader.getRecordBatch()); - final ArrowMessage message = new ArrowMessage(dictionaryBatch, option)) { - messageCallback.accept(message); - } - } - return schema; - } - - static void closeDictionaries(final Schema schema, final DictionaryProvider provider) - throws Exception { - // Close dictionaries - final Set dictionaryIds = new HashSet<>(); - schema - .getFields() - .forEach(field -> DictionaryUtility.toMessageFormat(field, provider, dictionaryIds)); - - final List dictionaryVectors = - dictionaryIds.stream() - .map(id -> (AutoCloseable) provider.lookup(id).getVector()) - .collect(Collectors.toList()); - AutoCloseables.close(dictionaryVectors); - } - - /** - * Generates the schema to send with flight messages. If the schema contains no field with a - * dictionary, it will return the schema as is. Otherwise, it will return a newly created a new - * schema after converting the fields. - * - * @param originalSchema the original schema. - * @param provider the dictionary provider. - * @param dictionaryIds dictionary IDs that are used. - * @return the schema to send with the flight messages. - */ - static Schema generateSchema( - final Schema originalSchema, final DictionaryProvider provider, Set dictionaryIds) { - // first determine if a new schema needs to be created. - boolean createSchema = false; - for (Field field : originalSchema.getFields()) { - if (DictionaryUtility.needConvertToMessageFormat(field)) { - createSchema = true; - break; - } - } - - if (!createSchema) { - return originalSchema; - } else { - final List fields = new ArrayList<>(originalSchema.getFields().size()); - for (final Field field : originalSchema.getFields()) { - fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIds)); - } - return new Schema(fields, originalSchema.getCustomMetadata()); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java deleted file mode 100644 index 8f6c0513ac75a..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.collect.Iterables; -import com.google.common.collect.LinkedListMultimap; -import com.google.common.collect.Multimap; -import java.nio.charset.StandardCharsets; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - -/** metadata container specific to the binary metadata held in the grpc trailer. */ -public class ErrorFlightMetadata implements CallHeaders { - private final Multimap metadata = LinkedListMultimap.create(); - - public ErrorFlightMetadata() {} - - @Override - public String get(String key) { - return new String(getByte(key), StandardCharsets.US_ASCII); - } - - @Override - public byte[] getByte(String key) { - return Iterables.getLast(metadata.get(key)); - } - - @Override - public Iterable getAll(String key) { - return StreamSupport.stream(getAllByte(key).spliterator(), false) - .map(b -> new String(b, StandardCharsets.US_ASCII)) - .collect(Collectors.toList()); - } - - @Override - public Iterable getAllByte(String key) { - return metadata.get(key); - } - - @Override - public void insert(String key, String value) { - metadata.put(key, value.getBytes(StandardCharsets.UTF_8)); - } - - @Override - public void insert(String key, byte[] value) { - metadata.put(key, value); - } - - @Override - public Set keys() { - return metadata.keySet(); - } - - @Override - public boolean containsKey(String key) { - return metadata.containsKey(key); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightBindingService.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightBindingService.java deleted file mode 100644 index b68f3aa86caf3..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightBindingService.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.collect.ImmutableSet; -import io.grpc.BindableService; -import io.grpc.MethodDescriptor; -import io.grpc.MethodDescriptor.MethodType; -import io.grpc.ServerMethodDefinition; -import io.grpc.ServerServiceDefinition; -import io.grpc.ServiceDescriptor; -import io.grpc.protobuf.ProtoUtils; -import io.grpc.stub.ServerCalls; -import io.grpc.stub.StreamObserver; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import org.apache.arrow.flight.auth.ServerAuthHandler; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.flight.impl.Flight.PutResult; -import org.apache.arrow.flight.impl.FlightServiceGrpc; -import org.apache.arrow.memory.BufferAllocator; - -/** Extends the basic flight service to override some methods for more efficient implementations. */ -class FlightBindingService implements BindableService { - - private static final String DO_GET = - MethodDescriptor.generateFullMethodName(FlightConstants.SERVICE, "DoGet"); - private static final String DO_PUT = - MethodDescriptor.generateFullMethodName(FlightConstants.SERVICE, "DoPut"); - private static final String DO_EXCHANGE = - MethodDescriptor.generateFullMethodName(FlightConstants.SERVICE, "DoExchange"); - private static final Set OVERRIDE_METHODS = ImmutableSet.of(DO_GET, DO_PUT, DO_EXCHANGE); - - private final FlightService delegate; - private final BufferAllocator allocator; - - public FlightBindingService( - BufferAllocator allocator, - FlightProducer producer, - ServerAuthHandler authHandler, - ExecutorService executor) { - this.allocator = allocator; - this.delegate = new FlightService(allocator, producer, authHandler, executor); - } - - public static MethodDescriptor getDoGetDescriptor( - BufferAllocator allocator) { - return MethodDescriptor.newBuilder() - .setType(io.grpc.MethodDescriptor.MethodType.SERVER_STREAMING) - .setFullMethodName(DO_GET) - .setSampledToLocalTracing(false) - .setRequestMarshaller(ProtoUtils.marshaller(Flight.Ticket.getDefaultInstance())) - .setResponseMarshaller(ArrowMessage.createMarshaller(allocator)) - .setSchemaDescriptor(FlightServiceGrpc.getDoGetMethod().getSchemaDescriptor()) - .build(); - } - - public static MethodDescriptor getDoPutDescriptor( - BufferAllocator allocator) { - return MethodDescriptor.newBuilder() - .setType(MethodType.BIDI_STREAMING) - .setFullMethodName(DO_PUT) - .setSampledToLocalTracing(false) - .setRequestMarshaller(ArrowMessage.createMarshaller(allocator)) - .setResponseMarshaller(ProtoUtils.marshaller(Flight.PutResult.getDefaultInstance())) - .setSchemaDescriptor(FlightServiceGrpc.getDoPutMethod().getSchemaDescriptor()) - .build(); - } - - public static MethodDescriptor getDoExchangeDescriptor( - BufferAllocator allocator) { - return MethodDescriptor.newBuilder() - .setType(MethodType.BIDI_STREAMING) - .setFullMethodName(DO_EXCHANGE) - .setSampledToLocalTracing(false) - .setRequestMarshaller(ArrowMessage.createMarshaller(allocator)) - .setResponseMarshaller(ArrowMessage.createMarshaller(allocator)) - .setSchemaDescriptor(FlightServiceGrpc.getDoExchangeMethod().getSchemaDescriptor()) - .build(); - } - - @Override - public ServerServiceDefinition bindService() { - final ServerServiceDefinition baseDefinition = delegate.bindService(); - - final MethodDescriptor doGetDescriptor = - getDoGetDescriptor(allocator); - final MethodDescriptor doPutDescriptor = - getDoPutDescriptor(allocator); - final MethodDescriptor doExchangeDescriptor = - getDoExchangeDescriptor(allocator); - - // Make sure we preserve SchemaDescriptor fields on methods so that gRPC reflection still works. - final ServiceDescriptor.Builder serviceDescriptorBuilder = - ServiceDescriptor.newBuilder(FlightConstants.SERVICE) - .setSchemaDescriptor(baseDefinition.getServiceDescriptor().getSchemaDescriptor()); - serviceDescriptorBuilder.addMethod(doGetDescriptor); - serviceDescriptorBuilder.addMethod(doPutDescriptor); - serviceDescriptorBuilder.addMethod(doExchangeDescriptor); - for (MethodDescriptor definition : baseDefinition.getServiceDescriptor().getMethods()) { - if (OVERRIDE_METHODS.contains(definition.getFullMethodName())) { - continue; - } - - serviceDescriptorBuilder.addMethod(definition); - } - - final ServiceDescriptor serviceDescriptor = serviceDescriptorBuilder.build(); - ServerServiceDefinition.Builder serviceBuilder = - ServerServiceDefinition.builder(serviceDescriptor); - serviceBuilder.addMethod( - doGetDescriptor, ServerCalls.asyncServerStreamingCall(new DoGetMethod(delegate))); - serviceBuilder.addMethod( - doPutDescriptor, ServerCalls.asyncBidiStreamingCall(new DoPutMethod(delegate))); - serviceBuilder.addMethod( - doExchangeDescriptor, ServerCalls.asyncBidiStreamingCall(new DoExchangeMethod(delegate))); - - // copy over not-overridden methods. - for (ServerMethodDefinition definition : baseDefinition.getMethods()) { - if (OVERRIDE_METHODS.contains(definition.getMethodDescriptor().getFullMethodName())) { - continue; - } - - serviceBuilder.addMethod(definition); - } - - return serviceBuilder.build(); - } - - private static class DoGetMethod - implements ServerCalls.ServerStreamingMethod { - - private final FlightService delegate; - - public DoGetMethod(FlightService delegate) { - this.delegate = delegate; - } - - @Override - public void invoke(Flight.Ticket request, StreamObserver responseObserver) { - delegate.doGetCustom(request, responseObserver); - } - } - - private static class DoPutMethod - implements ServerCalls.BidiStreamingMethod { - private final FlightService delegate; - - public DoPutMethod(FlightService delegate) { - this.delegate = delegate; - } - - @Override - public StreamObserver invoke(StreamObserver responseObserver) { - return delegate.doPutCustom(responseObserver); - } - } - - private static class DoExchangeMethod - implements ServerCalls.BidiStreamingMethod { - private final FlightService delegate; - - public DoExchangeMethod(FlightService delegate) { - this.delegate = delegate; - } - - @Override - public StreamObserver invoke(StreamObserver responseObserver) { - return delegate.doExchangeCustom(responseObserver); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java deleted file mode 100644 index b7b1fdcacc2a4..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.base.Preconditions; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.Iterables; -import com.google.common.collect.Multimap; -import io.grpc.Metadata; -import java.nio.charset.StandardCharsets; -import java.util.Collection; -import java.util.Set; -import java.util.stream.Collectors; - -/** An implementation of the Flight headers interface for headers. */ -public class FlightCallHeaders implements CallHeaders { - private final Multimap keysAndValues; - - public FlightCallHeaders() { - this.keysAndValues = ArrayListMultimap.create(); - } - - @Override - public String get(String key) { - final Collection values = this.keysAndValues.get(key); - if (values.isEmpty()) { - return null; - } - - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - return new String((byte[]) Iterables.get(values, 0), StandardCharsets.UTF_8); - } - - return (String) Iterables.get(values, 0); - } - - @Override - public byte[] getByte(String key) { - final Collection values = this.keysAndValues.get(key); - if (values.isEmpty()) { - return null; - } - - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - return (byte[]) Iterables.get(values, 0); - } - - return ((String) Iterables.get(values, 0)).getBytes(StandardCharsets.UTF_8); - } - - @Override - public Iterable getAll(String key) { - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - return this.keysAndValues.get(key).stream() - .map(o -> new String((byte[]) o, StandardCharsets.UTF_8)) - .collect(Collectors.toList()); - } - return (Collection) (Collection) this.keysAndValues.get(key); - } - - @Override - public Iterable getAllByte(String key) { - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - return (Collection) (Collection) this.keysAndValues.get(key); - } - return this.keysAndValues.get(key).stream() - .map(o -> ((String) o).getBytes(StandardCharsets.UTF_8)) - .collect(Collectors.toList()); - } - - @Override - public void insert(String key, String value) { - this.keysAndValues.put(key, value); - } - - @Override - public void insert(String key, byte[] value) { - Preconditions.checkArgument( - key.endsWith("-bin"), "Binary header is named %s. It must end with %s", key, "-bin"); - Preconditions.checkArgument(key.length() > "-bin".length(), "empty key name"); - - this.keysAndValues.put(key, value); - } - - @Override - public Set keys() { - return this.keysAndValues.keySet(); - } - - @Override - public boolean containsKey(String key) { - return this.keysAndValues.containsKey(key); - } - - @Override - public String toString() { - return this.keysAndValues.toString(); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java deleted file mode 100644 index a15c3049aa6ad..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java +++ /dev/null @@ -1,956 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import io.grpc.Channel; -import io.grpc.ClientCall; -import io.grpc.ClientInterceptor; -import io.grpc.ClientInterceptors; -import io.grpc.ManagedChannel; -import io.grpc.MethodDescriptor; -import io.grpc.StatusRuntimeException; -import io.grpc.netty.GrpcSslContexts; -import io.grpc.netty.NettyChannelBuilder; -import io.grpc.stub.ClientCallStreamObserver; -import io.grpc.stub.ClientCalls; -import io.grpc.stub.ClientResponseObserver; -import io.grpc.stub.StreamObserver; -import io.netty.channel.EventLoopGroup; -import io.netty.channel.ServerChannel; -import io.netty.handler.ssl.SslContextBuilder; -import io.netty.handler.ssl.util.InsecureTrustManagerFactory; -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.InvocationTargetException; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.function.BooleanSupplier; -import javax.net.ssl.SSLException; -import org.apache.arrow.flight.FlightProducer.StreamListener; -import org.apache.arrow.flight.auth.BasicClientAuthHandler; -import org.apache.arrow.flight.auth.ClientAuthHandler; -import org.apache.arrow.flight.auth.ClientAuthInterceptor; -import org.apache.arrow.flight.auth.ClientAuthWrapper; -import org.apache.arrow.flight.auth2.BasicAuthCredentialWriter; -import org.apache.arrow.flight.auth2.ClientBearerHeaderHandler; -import org.apache.arrow.flight.auth2.ClientHandshakeWrapper; -import org.apache.arrow.flight.auth2.ClientIncomingAuthHeaderMiddleware; -import org.apache.arrow.flight.grpc.ClientInterceptorAdapter; -import org.apache.arrow.flight.grpc.CredentialCallOption; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.flight.impl.Flight.Empty; -import org.apache.arrow.flight.impl.FlightServiceGrpc; -import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceBlockingStub; -import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceStub; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; - -/** Client for Flight services. */ -public class FlightClient implements AutoCloseable { - private static final int PENDING_REQUESTS = 5; - /** - * The maximum number of trace events to keep on the gRPC Channel. This value disables channel - * tracing. - */ - private static final int MAX_CHANNEL_TRACE_EVENTS = 0; - - private final BufferAllocator allocator; - private final ManagedChannel channel; - - private final FlightServiceBlockingStub blockingStub; - private final FlightServiceStub asyncStub; - private final ClientAuthInterceptor authInterceptor = new ClientAuthInterceptor(); - private final MethodDescriptor doGetDescriptor; - private final MethodDescriptor doPutDescriptor; - private final MethodDescriptor doExchangeDescriptor; - private final List middleware; - - /** Create a Flight client from an allocator and a gRPC channel. */ - FlightClient( - BufferAllocator incomingAllocator, - ManagedChannel channel, - List middleware) { - this.allocator = incomingAllocator.newChildAllocator("flight-client", 0, Long.MAX_VALUE); - this.channel = channel; - this.middleware = middleware; - - final ClientInterceptor[] interceptors; - interceptors = - new ClientInterceptor[] {authInterceptor, new ClientInterceptorAdapter(middleware)}; - - // Create a channel with interceptors pre-applied for DoGet and DoPut - Channel interceptedChannel = ClientInterceptors.intercept(channel, interceptors); - - blockingStub = FlightServiceGrpc.newBlockingStub(interceptedChannel); - asyncStub = FlightServiceGrpc.newStub(interceptedChannel); - doGetDescriptor = FlightBindingService.getDoGetDescriptor(allocator); - doPutDescriptor = FlightBindingService.getDoPutDescriptor(allocator); - doExchangeDescriptor = FlightBindingService.getDoExchangeDescriptor(allocator); - } - - /** - * Get a list of available flights. - * - * @param criteria Criteria for selecting flights - * @param options RPC-layer hints for the call. - * @return FlightInfo Iterable - */ - public Iterable listFlights(Criteria criteria, CallOption... options) { - final Iterator flights; - try { - flights = CallOptions.wrapStub(blockingStub, options).listFlights(criteria.asCriteria()); - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - return () -> - StatusUtils.wrapIterator( - flights, - t -> { - try { - return new FlightInfo(t); - } catch (URISyntaxException e) { - // We don't expect this will happen for conforming Flight implementations. For - // instance, a Java server - // itself wouldn't be able to construct an invalid Location. - throw new RuntimeException(e); - } - }); - } - - /** - * Lists actions available on the Flight service. - * - * @param options RPC-layer hints for the call. - */ - public Iterable listActions(CallOption... options) { - final Iterator actions; - try { - actions = CallOptions.wrapStub(blockingStub, options).listActions(Empty.getDefaultInstance()); - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - return () -> StatusUtils.wrapIterator(actions, ActionType::new); - } - - /** - * Performs an action on the Flight service. - * - * @param action The action to perform. - * @param options RPC-layer hints for this call. - * @return An iterator of results. - */ - public Iterator doAction(Action action, CallOption... options) { - return StatusUtils.wrapIterator( - CallOptions.wrapStub(blockingStub, options).doAction(action.toProtocol()), Result::new); - } - - /** Authenticates with a username and password. */ - public void authenticateBasic(String username, String password) { - BasicClientAuthHandler basicClient = new BasicClientAuthHandler(username, password); - authenticate(basicClient); - } - - /** - * Authenticates against the Flight service. - * - * @param options RPC-layer hints for this call. - * @param handler The auth mechanism to use. - */ - public void authenticate(ClientAuthHandler handler, CallOption... options) { - Preconditions.checkArgument(!authInterceptor.hasAuthHandler(), "Auth already completed."); - ClientAuthWrapper.doClientAuth(handler, CallOptions.wrapStub(asyncStub, options)); - authInterceptor.setAuthHandler(handler); - } - - /** - * Authenticates with a username and password. - * - * @param username the username. - * @param password the password. - * @return a CredentialCallOption containing a bearer token if the server emitted one, or empty if - * no bearer token was returned. This can be used in subsequent API calls. - */ - public Optional authenticateBasicToken(String username, String password) { - final ClientIncomingAuthHeaderMiddleware.Factory clientAuthMiddleware = - new ClientIncomingAuthHeaderMiddleware.Factory(new ClientBearerHeaderHandler()); - middleware.add(clientAuthMiddleware); - handshake(new CredentialCallOption(new BasicAuthCredentialWriter(username, password))); - - return Optional.ofNullable(clientAuthMiddleware.getCredentialCallOption()); - } - - /** - * Executes the handshake against the Flight service. - * - * @param options RPC-layer hints for this call. - */ - public void handshake(CallOption... options) { - ClientHandshakeWrapper.doClientHandshake(CallOptions.wrapStub(asyncStub, options)); - } - - /** - * Create or append a descriptor with another stream. - * - * @param descriptor FlightDescriptor the descriptor for the data - * @param root VectorSchemaRoot the root containing data - * @param metadataListener A handler for metadata messages from the server. This will be passed - * buffers that will be freed after {@link StreamListener#onNext(Object)} is called! - * @param options RPC-layer hints for this call. - * @return ClientStreamListener an interface to control uploading data - */ - public ClientStreamListener startPut( - FlightDescriptor descriptor, - VectorSchemaRoot root, - PutListener metadataListener, - CallOption... options) { - return startPut(descriptor, root, new MapDictionaryProvider(), metadataListener, options); - } - - /** - * Create or append a descriptor with another stream. - * - * @param descriptor FlightDescriptor the descriptor for the data - * @param root VectorSchemaRoot the root containing data - * @param metadataListener A handler for metadata messages from the server. - * @param options RPC-layer hints for this call. - * @return ClientStreamListener an interface to control uploading data. {@link - * ClientStreamListener#start(VectorSchemaRoot, DictionaryProvider)} will already have been - * called. - */ - public ClientStreamListener startPut( - FlightDescriptor descriptor, - VectorSchemaRoot root, - DictionaryProvider provider, - PutListener metadataListener, - CallOption... options) { - Preconditions.checkNotNull(root, "root must not be null"); - Preconditions.checkNotNull(provider, "provider must not be null"); - final ClientStreamListener writer = startPut(descriptor, metadataListener, options); - writer.start(root, provider); - return writer; - } - - /** - * Create or append a descriptor with another stream. - * - * @param descriptor FlightDescriptor the descriptor for the data - * @param metadataListener A handler for metadata messages from the server. - * @param options RPC-layer hints for this call. - * @return ClientStreamListener an interface to control uploading data. {@link - * ClientStreamListener#start(VectorSchemaRoot, DictionaryProvider)} will NOT already have - * been called. - */ - public ClientStreamListener startPut( - FlightDescriptor descriptor, PutListener metadataListener, CallOption... options) { - Preconditions.checkNotNull(descriptor, "descriptor must not be null"); - Preconditions.checkNotNull(metadataListener, "metadataListener must not be null"); - - try { - final ClientCall call = - asyncStubNewCall(doPutDescriptor, options); - final SetStreamObserver resultObserver = new SetStreamObserver(allocator, metadataListener); - ClientCallStreamObserver observer = - (ClientCallStreamObserver) - ClientCalls.asyncBidiStreamingCall(call, resultObserver); - return new PutObserver( - descriptor, observer, metadataListener::isCancelled, metadataListener::getResult); - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - } - - /** - * Get info on a stream. - * - * @param descriptor The descriptor for the stream. - * @param options RPC-layer hints for this call. - */ - public FlightInfo getInfo(FlightDescriptor descriptor, CallOption... options) { - try { - return new FlightInfo( - CallOptions.wrapStub(blockingStub, options).getFlightInfo(descriptor.toProtocol())); - } catch (URISyntaxException e) { - // We don't expect this will happen for conforming Flight implementations. For instance, a - // Java server - // itself wouldn't be able to construct an invalid Location. - throw new RuntimeException(e); - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - } - - /** - * Start or get info on execution of a long-running query. - * - * @param descriptor The descriptor for the stream. - * @param options RPC-layer hints for this call. - * @return Metadata about execution. - */ - public PollInfo pollInfo(FlightDescriptor descriptor, CallOption... options) { - try { - return new PollInfo( - CallOptions.wrapStub(blockingStub, options).pollFlightInfo(descriptor.toProtocol())); - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - } - - /** - * Get schema for a stream. - * - * @param descriptor The descriptor for the stream. - * @param options RPC-layer hints for this call. - */ - public SchemaResult getSchema(FlightDescriptor descriptor, CallOption... options) { - try { - return SchemaResult.fromProtocol( - CallOptions.wrapStub(blockingStub, options).getSchema(descriptor.toProtocol())); - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - } - - /** - * Retrieve a stream from the server. - * - * @param ticket The ticket granting access to the data stream. - * @param options RPC-layer hints for this call. - */ - public FlightStream getStream(Ticket ticket, CallOption... options) { - final ClientCall call = asyncStubNewCall(doGetDescriptor, options); - FlightStream stream = - new FlightStream( - allocator, - PENDING_REQUESTS, - (String message, Throwable cause) -> call.cancel(message, cause), - (count) -> call.request(count)); - - final StreamObserver delegate = stream.asObserver(); - ClientResponseObserver clientResponseObserver = - new ClientResponseObserver() { - - @Override - public void beforeStart( - ClientCallStreamObserver requestStream) { - requestStream.disableAutoInboundFlowControl(); - } - - @Override - public void onNext(ArrowMessage value) { - delegate.onNext(value); - } - - @Override - public void onError(Throwable t) { - delegate.onError(StatusUtils.toGrpcException(t)); - } - - @Override - public void onCompleted() { - delegate.onCompleted(); - } - }; - - ClientCalls.asyncServerStreamingCall(call, ticket.toProtocol(), clientResponseObserver); - return stream; - } - - /** - * Initiate a bidirectional data exchange with the server. - * - * @param descriptor A descriptor for the data stream. - * @param options RPC call options. - * @return A pair of a readable stream and a writable stream. - */ - public ExchangeReaderWriter doExchange(FlightDescriptor descriptor, CallOption... options) { - Preconditions.checkNotNull(descriptor, "descriptor must not be null"); - - try { - final ClientCall call = - asyncStubNewCall(doExchangeDescriptor, options); - final FlightStream stream = - new FlightStream(allocator, PENDING_REQUESTS, call::cancel, call::request); - final ClientCallStreamObserver observer = - (ClientCallStreamObserver) - ClientCalls.asyncBidiStreamingCall(call, stream.asObserver()); - final ClientStreamListener writer = - new PutObserver( - descriptor, - observer, - stream.cancelled::isDone, - () -> { - try { - stream.completed.get(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw CallStatus.INTERNAL - .withDescription("Client error: interrupted while completing call") - .withCause(e) - .toRuntimeException(); - } catch (ExecutionException e) { - throw CallStatus.INTERNAL - .withDescription("Client error: internal while completing call") - .withCause(e) - .toRuntimeException(); - } - }); - // Send the descriptor to start. - try (final ArrowMessage message = new ArrowMessage(descriptor.toProtocol())) { - observer.onNext(message); - } catch (Exception e) { - throw CallStatus.INTERNAL - .withCause(e) - .withDescription("Could not write descriptor " + descriptor) - .toRuntimeException(); - } - return new ExchangeReaderWriter(stream, writer); - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - } - - /** A pair of a reader and a writer for a DoExchange call. */ - public static class ExchangeReaderWriter implements AutoCloseable { - private final FlightStream reader; - private final ClientStreamListener writer; - - ExchangeReaderWriter(FlightStream reader, ClientStreamListener writer) { - this.reader = reader; - this.writer = writer; - } - - /** Get the reader for the call. */ - public FlightStream getReader() { - return reader; - } - - /** Get the writer for the call. */ - public ClientStreamListener getWriter() { - return writer; - } - - /** - * Make sure stream is drained. You must call this to be notified of any errors that may have - * happened after the exchange is complete. This should be called after - * `getWriter().completed()` and instead of `getWriter().getResult()`. - */ - public void getResult() { - // After exchange is complete, make sure stream is drained to propagate errors through reader - while (reader.next()) {} - } - - /** Shut down the streams in this call. */ - @Override - public void close() throws Exception { - reader.close(); - } - } - - /** A stream observer for Flight.PutResult. */ - private static class SetStreamObserver implements StreamObserver { - private final BufferAllocator allocator; - private final StreamListener listener; - - SetStreamObserver(BufferAllocator allocator, StreamListener listener) { - super(); - this.allocator = allocator; - this.listener = listener == null ? NoOpStreamListener.getInstance() : listener; - } - - @Override - public void onNext(Flight.PutResult value) { - try (final PutResult message = PutResult.fromProtocol(allocator, value)) { - listener.onNext(message); - } - } - - @Override - public void onError(Throwable t) { - listener.onError(StatusUtils.fromThrowable(t)); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } - } - - /** The implementation of a {@link ClientStreamListener} for writing data to a Flight server. */ - static class PutObserver extends OutboundStreamListenerImpl implements ClientStreamListener { - private final BooleanSupplier isCancelled; - private final Runnable getResult; - - /** - * Create a new client stream listener. - * - * @param descriptor The descriptor for the stream. - * @param observer The write-side gRPC StreamObserver. - * @param isCancelled A flag to check if the call has been cancelled. - * @param getResult A flag that blocks until the overall call completes. - */ - PutObserver( - FlightDescriptor descriptor, - ClientCallStreamObserver observer, - BooleanSupplier isCancelled, - Runnable getResult) { - super(descriptor, observer); - Preconditions.checkNotNull(descriptor, "descriptor must be provided"); - Preconditions.checkNotNull(isCancelled, "isCancelled must be provided"); - Preconditions.checkNotNull(getResult, "getResult must be provided"); - this.isCancelled = isCancelled; - this.getResult = getResult; - this.unloader = null; - } - - @Override - protected void waitUntilStreamReady() { - // Check isCancelled as well to avoid inadvertently blocking forever - // (so long as PutListener properly implements it) - while (!responseObserver.isReady() && !isCancelled.getAsBoolean()) { - /* busy wait */ - } - } - - @Override - public void getResult() { - getResult.run(); - } - } - - /** - * Cancel execution of a distributed query. - * - * @param request The query to cancel. - * @param options Call options. - * @return The server response. - */ - public CancelFlightInfoResult cancelFlightInfo( - CancelFlightInfoRequest request, CallOption... options) { - Action action = - new Action(FlightConstants.CANCEL_FLIGHT_INFO.getType(), request.serialize().array()); - Iterator results = doAction(action, options); - if (!results.hasNext()) { - throw CallStatus.INTERNAL - .withDescription("Server did not return a response") - .toRuntimeException(); - } - - CancelFlightInfoResult result; - try { - result = CancelFlightInfoResult.deserialize(ByteBuffer.wrap(results.next().getBody())); - } catch (IOException e) { - throw CallStatus.INTERNAL - .withDescription("Failed to parse server response: " + e) - .withCause(e) - .toRuntimeException(); - } - results.forEachRemaining((ignored) -> {}); - return result; - } - - /** - * Request the server to extend the lifetime of a query result set. - * - * @param request The result set partition. - * @param options Call options. - * @return The new endpoint with an updated expiration time. - */ - public FlightEndpoint renewFlightEndpoint( - RenewFlightEndpointRequest request, CallOption... options) { - Action action = - new Action(FlightConstants.RENEW_FLIGHT_ENDPOINT.getType(), request.serialize().array()); - Iterator results = doAction(action, options); - if (!results.hasNext()) { - throw CallStatus.INTERNAL - .withDescription("Server did not return a response") - .toRuntimeException(); - } - - FlightEndpoint result; - try { - result = FlightEndpoint.deserialize(ByteBuffer.wrap(results.next().getBody())); - } catch (IOException | URISyntaxException e) { - throw CallStatus.INTERNAL - .withDescription("Failed to parse server response: " + e) - .withCause(e) - .toRuntimeException(); - } - results.forEachRemaining((ignored) -> {}); - return result; - } - - /** - * Set server session option(s) by name/value. - * - *

    Sessions are generally persisted via HTTP cookies. - * - * @param request The session options to set on the server. - * @param options Call options. - * @return The result containing per-value error statuses, if any. - */ - public SetSessionOptionsResult setSessionOptions( - SetSessionOptionsRequest request, CallOption... options) { - Action action = - new Action(FlightConstants.SET_SESSION_OPTIONS.getType(), request.serialize().array()); - Iterator results = doAction(action, options); - if (!results.hasNext()) { - throw CallStatus.INTERNAL - .withDescription("Server did not return a response") - .toRuntimeException(); - } - - SetSessionOptionsResult result; - try { - result = SetSessionOptionsResult.deserialize(ByteBuffer.wrap(results.next().getBody())); - } catch (IOException e) { - throw CallStatus.INTERNAL - .withDescription("Failed to parse server response: " + e) - .withCause(e) - .toRuntimeException(); - } - results.forEachRemaining((ignored) -> {}); - return result; - } - - /** - * Get the current server session options. - * - *

    The session is generally accessed via an HTTP cookie. - * - * @param request The (empty) GetSessionOptionsRequest. - * @param options Call options. - * @return The result containing the set of session options configured on the server. - */ - public GetSessionOptionsResult getSessionOptions( - GetSessionOptionsRequest request, CallOption... options) { - Action action = - new Action(FlightConstants.GET_SESSION_OPTIONS.getType(), request.serialize().array()); - Iterator results = doAction(action, options); - if (!results.hasNext()) { - throw CallStatus.INTERNAL - .withDescription("Server did not return a response") - .toRuntimeException(); - } - - GetSessionOptionsResult result; - try { - result = GetSessionOptionsResult.deserialize(ByteBuffer.wrap(results.next().getBody())); - } catch (IOException e) { - throw CallStatus.INTERNAL - .withDescription("Failed to parse server response: " + e) - .withCause(e) - .toRuntimeException(); - } - results.forEachRemaining((ignored) -> {}); - return result; - } - - /** - * Close/invalidate the current server session. - * - *

    The session is generally accessed via an HTTP cookie. - * - * @param request The (empty) CloseSessionRequest. - * @param options Call options. - * @return The result containing the status of the close operation. - */ - public CloseSessionResult closeSession(CloseSessionRequest request, CallOption... options) { - Action action = - new Action(FlightConstants.CLOSE_SESSION.getType(), request.serialize().array()); - Iterator results = doAction(action, options); - if (!results.hasNext()) { - throw CallStatus.INTERNAL - .withDescription("Server did not return a response") - .toRuntimeException(); - } - - CloseSessionResult result; - try { - result = CloseSessionResult.deserialize(ByteBuffer.wrap(results.next().getBody())); - } catch (IOException e) { - throw CallStatus.INTERNAL - .withDescription("Failed to parse server response: " + e) - .withCause(e) - .toRuntimeException(); - } - results.forEachRemaining((ignored) -> {}); - return result; - } - - /** Interface for writers to an Arrow data stream. */ - public interface ClientStreamListener extends OutboundStreamListener { - - /** - * Wait for the stream to finish on the server side. You must call this to be notified of any - * errors that may have happened during the upload. - */ - void getResult(); - } - - /** - * A handler for server-sent application metadata messages during a Flight DoPut operation. - * - *

    Generally, instead of implementing this yourself, you should use {@link AsyncPutListener} or - * {@link SyncPutListener}. - */ - public interface PutListener extends StreamListener { - - /** - * Wait for the stream to finish on the server side. You must call this to be notified of any - * errors that may have happened during the upload. - */ - void getResult(); - - /** - * Called when a message from the server is received. - * - * @param val The application metadata. This buffer will be reclaimed once onNext returns; you - * must retain a reference to use it outside this method. - */ - @Override - void onNext(PutResult val); - - /** - * Check if the call has been cancelled. - * - *

    By default, this always returns false. Implementations should provide an appropriate - * implementation, as otherwise, a DoPut operation may inadvertently block forever. - */ - default boolean isCancelled() { - return false; - } - } - - /** Shut down this client. */ - @Override - public void close() throws InterruptedException { - channel.shutdown().awaitTermination(5, TimeUnit.SECONDS); - allocator.close(); - } - - /** Create a builder for a Flight client. */ - public static Builder builder() { - return new Builder(); - } - - /** - * Create a builder for a Flight client. - * - * @param allocator The allocator to use for the client. - * @param location The location to connect to. - */ - public static Builder builder(BufferAllocator allocator, Location location) { - return new Builder(allocator, location); - } - - /** A builder for Flight clients. */ - public static final class Builder { - private BufferAllocator allocator; - private Location location; - private boolean forceTls = false; - private int maxInboundMessageSize = FlightServer.MAX_GRPC_MESSAGE_SIZE; - private InputStream trustedCertificates = null; - private InputStream clientCertificate = null; - private InputStream clientKey = null; - private String overrideHostname = null; - private List middleware = new ArrayList<>(); - private boolean verifyServer = true; - - private Builder() {} - - private Builder(BufferAllocator allocator, Location location) { - this.allocator = Preconditions.checkNotNull(allocator); - this.location = Preconditions.checkNotNull(location); - } - - /** Force the client to connect over TLS. */ - public Builder useTls() { - this.forceTls = true; - return this; - } - - /** Override the hostname checked for TLS. Use with caution in production. */ - public Builder overrideHostname(final String hostname) { - this.overrideHostname = hostname; - return this; - } - - /** Set the maximum inbound message size. */ - public Builder maxInboundMessageSize(int maxSize) { - Preconditions.checkArgument(maxSize > 0); - this.maxInboundMessageSize = maxSize; - return this; - } - - /** Set the trusted TLS certificates. */ - public Builder trustedCertificates(final InputStream stream) { - this.trustedCertificates = Preconditions.checkNotNull(stream); - return this; - } - - /** Set the trusted TLS certificates. */ - public Builder clientCertificate( - final InputStream clientCertificate, final InputStream clientKey) { - Preconditions.checkNotNull(clientKey); - this.clientCertificate = Preconditions.checkNotNull(clientCertificate); - this.clientKey = Preconditions.checkNotNull(clientKey); - return this; - } - - public Builder allocator(BufferAllocator allocator) { - this.allocator = Preconditions.checkNotNull(allocator); - return this; - } - - public Builder location(Location location) { - this.location = Preconditions.checkNotNull(location); - return this; - } - - public Builder intercept(FlightClientMiddleware.Factory factory) { - middleware.add(factory); - return this; - } - - public Builder verifyServer(boolean verifyServer) { - this.verifyServer = verifyServer; - return this; - } - - /** Create the client from this builder. */ - public FlightClient build() { - final NettyChannelBuilder builder; - - switch (location.getUri().getScheme()) { - case LocationSchemes.GRPC: - case LocationSchemes.GRPC_INSECURE: - case LocationSchemes.GRPC_TLS: - { - builder = NettyChannelBuilder.forAddress(location.toSocketAddress()); - break; - } - case LocationSchemes.GRPC_DOMAIN_SOCKET: - { - // The implementation is platform-specific, so we have to find the classes at runtime - builder = NettyChannelBuilder.forAddress(location.toSocketAddress()); - try { - try { - // Linux - builder.channelType( - Class.forName("io.netty.channel.epoll.EpollDomainSocketChannel") - .asSubclass(ServerChannel.class)); - final EventLoopGroup elg = - Class.forName("io.netty.channel.epoll.EpollEventLoopGroup") - .asSubclass(EventLoopGroup.class) - .getDeclaredConstructor() - .newInstance(); - builder.eventLoopGroup(elg); - } catch (ClassNotFoundException e) { - // BSD - builder.channelType( - Class.forName("io.netty.channel.kqueue.KQueueDomainSocketChannel") - .asSubclass(ServerChannel.class)); - final EventLoopGroup elg = - Class.forName("io.netty.channel.kqueue.KQueueEventLoopGroup") - .asSubclass(EventLoopGroup.class) - .getDeclaredConstructor() - .newInstance(); - builder.eventLoopGroup(elg); - } - } catch (ClassNotFoundException - | InstantiationException - | IllegalAccessException - | NoSuchMethodException - | InvocationTargetException e) { - throw new UnsupportedOperationException( - "Could not find suitable Netty native transport implementation for domain socket address."); - } - break; - } - default: - throw new IllegalArgumentException( - "Scheme is not supported: " + location.getUri().getScheme()); - } - - if (this.forceTls || LocationSchemes.GRPC_TLS.equals(location.getUri().getScheme())) { - builder.useTransportSecurity(); - - final boolean hasTrustedCerts = this.trustedCertificates != null; - final boolean hasKeyCertPair = this.clientCertificate != null && this.clientKey != null; - if (!this.verifyServer && (hasTrustedCerts || hasKeyCertPair)) { - throw new IllegalArgumentException( - "FlightClient has been configured to disable server verification, " - + "but certificate options have been specified."); - } - - final SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient(); - - if (!this.verifyServer) { - sslContextBuilder.trustManager(InsecureTrustManagerFactory.INSTANCE); - } else if (this.trustedCertificates != null - || this.clientCertificate != null - || this.clientKey != null) { - if (this.trustedCertificates != null) { - sslContextBuilder.trustManager(this.trustedCertificates); - } - if (this.clientCertificate != null && this.clientKey != null) { - sslContextBuilder.keyManager(this.clientCertificate, this.clientKey); - } - } - try { - builder.sslContext(sslContextBuilder.build()); - } catch (SSLException e) { - throw new RuntimeException(e); - } - - if (this.overrideHostname != null) { - builder.overrideAuthority(this.overrideHostname); - } - } else { - builder.usePlaintext(); - } - - builder - .maxTraceEvents(MAX_CHANNEL_TRACE_EVENTS) - .maxInboundMessageSize(maxInboundMessageSize) - .maxInboundMetadataSize(maxInboundMessageSize); - return new FlightClient(allocator, builder.build(), middleware); - } - } - - /** - * Helper method to create a call from the asyncStub, method descriptor, and list of calling - * options. - */ - private ClientCall asyncStubNewCall( - MethodDescriptor descriptor, CallOption... options) { - FlightServiceStub wrappedStub = CallOptions.wrapStub(asyncStub, options); - return wrappedStub.getChannel().newCall(descriptor, wrappedStub.getCallOptions()); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClientMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClientMiddleware.java deleted file mode 100644 index 238d4f29fe9ee..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClientMiddleware.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** - * Client-side middleware for Flight. - * - *

    Middleware are instantiated per-call and should store state in the middleware instance. - */ -public interface FlightClientMiddleware { - /** A callback used before request headers are sent. The headers may be manipulated. */ - void onBeforeSendingHeaders(CallHeaders outgoingHeaders); - - /** A callback called after response headers are received. The headers may be manipulated. */ - void onHeadersReceived(CallHeaders incomingHeaders); - - /** A callback called after the call completes. */ - void onCallCompleted(CallStatus status); - - /** A factory for client middleware instances. */ - interface Factory { - /** - * Create a new middleware instance for the given call. - * - * @throws FlightRuntimeException if the middleware wants to reject the call with the given - * status - */ - FlightClientMiddleware onCallStarted(CallInfo info); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java deleted file mode 100644 index 6b89c794d6ca7..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** String constants relevant to flight implementations. */ -public interface FlightConstants { - - String SERVICE = "arrow.flight.protocol.FlightService"; - - FlightServerMiddleware.Key HEADER_KEY = - FlightServerMiddleware.Key.of("org.apache.arrow.flight.ServerHeaderMiddleware"); - - ActionType CANCEL_FLIGHT_INFO = - new ActionType( - "CancelFlightInfo", - "Explicitly cancel a running FlightInfo.\n" - + "Request Message: CancelFlightInfoRequest\n" - + "Response Message: CancelFlightInfoResult"); - ActionType RENEW_FLIGHT_ENDPOINT = - new ActionType( - "RenewFlightEndpoint", - "Extend expiration time of the given FlightEndpoint.\n" - + "Request Message: RenewFlightEndpointRequest\n" - + "Response Message: Renewed FlightEndpoint"); - - ActionType SET_SESSION_OPTIONS = - new ActionType( - "SetSessionOptions", - "Set client session options by name/value pairs.\n" - + "Request Message: SetSessionOptionsRequest\n" - + "Response Message: SetSessionOptionsResult"); - - ActionType GET_SESSION_OPTIONS = - new ActionType( - "GetSessionOptions", - "Get current client session options\n" - + "Request Message: GetSessionOptionsRequest\n" - + "Response Message: GetSessionOptionsResult"); - ActionType CLOSE_SESSION = - new ActionType( - "CloseSession", - "Explicitly close/invalidate the cookie-specified client session.\n" - + "Request Message: CloseSessionRequest\n" - + "Response Message: CloseSessionResult"); -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java deleted file mode 100644 index cc8bc87c145b5..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableList; -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.flight.impl.Flight.FlightDescriptor.DescriptorType; -import org.apache.arrow.util.Preconditions; - -/** - * An identifier for a particular set of data. This can either be an opaque command that generates - * the data or a static "path" to the data. This is a POJO wrapper around the protobuf message with - * the same name. - */ -public class FlightDescriptor { - - private boolean isCmd; - private List path; - private byte[] cmd; - - private FlightDescriptor(boolean isCmd, List path, byte[] cmd) { - super(); - this.isCmd = isCmd; - this.path = path; - this.cmd = cmd; - } - - public static FlightDescriptor command(byte[] cmd) { - return new FlightDescriptor(true, null, cmd); - } - - public static FlightDescriptor path(Iterable path) { - return new FlightDescriptor(false, ImmutableList.copyOf(path), null); - } - - public static FlightDescriptor path(String... path) { - return new FlightDescriptor(false, ImmutableList.copyOf(path), null); - } - - FlightDescriptor(Flight.FlightDescriptor descriptor) { - if (descriptor.getType() == DescriptorType.CMD) { - isCmd = true; - cmd = descriptor.getCmd().toByteArray(); - } else if (descriptor.getType() == DescriptorType.PATH) { - isCmd = false; - path = descriptor.getPathList(); - } else { - throw new UnsupportedOperationException(); - } - } - - public boolean isCommand() { - return isCmd; - } - - public List getPath() { - Preconditions.checkArgument(!isCmd); - return path; - } - - public byte[] getCommand() { - Preconditions.checkArgument(isCmd); - return cmd; - } - - Flight.FlightDescriptor toProtocol() { - Flight.FlightDescriptor.Builder b = Flight.FlightDescriptor.newBuilder(); - - if (isCmd) { - return b.setType(DescriptorType.CMD).setCmd(ByteString.copyFrom(cmd)).build(); - } - return b.setType(DescriptorType.PATH).addAllPath(path).build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the FlightDescriptor, as returned by {@link - * #serialize()}. - * @return The deserialized FlightDescriptor. - * @throws IOException if the serialized form is invalid. - */ - public static FlightDescriptor deserialize(ByteBuffer serialized) throws IOException { - return new FlightDescriptor(Flight.FlightDescriptor.parseFrom(serialized)); - } - - @Override - public String toString() { - if (isCmd) { - return toHex(cmd); - } else { - return Joiner.on('.').join(path); - } - } - - private String toHex(byte[] bytes) { - StringBuilder sb = new StringBuilder(); - for (byte b : bytes) { - sb.append(String.format("%02X ", b)); - } - return sb.toString(); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((cmd == null) ? 0 : Arrays.hashCode(cmd)); - result = prime * result + (isCmd ? 1231 : 1237); - result = prime * result + ((path == null) ? 0 : path.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof FlightDescriptor)) { - return false; - } - FlightDescriptor other = (FlightDescriptor) obj; - if (cmd == null) { - if (other.cmd != null) { - return false; - } - } else if (!Arrays.equals(cmd, other.cmd)) { - return false; - } - if (isCmd != other.isCmd) { - return false; - } - if (path == null) { - if (other.path != null) { - return false; - } - } else if (!path.equals(other.path)) { - return false; - } - return true; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java deleted file mode 100644 index c8e472a050616..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.protobuf.ByteString; -import com.google.protobuf.Timestamp; -import com.google.protobuf.util.Timestamps; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Base64; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import org.apache.arrow.flight.impl.Flight; - -/** POJO to convert to/from the underlying protobuf FlightEndpoint. */ -public class FlightEndpoint { - private final List locations; - private final Ticket ticket; - private final Instant expirationTime; - private final byte[] appMetadata; - - /** - * Constructs a new endpoint with no expiration time. - * - * @param ticket A ticket that describe the key of a data stream. - * @param locations The possible locations the stream can be retrieved from. - */ - public FlightEndpoint(Ticket ticket, Location... locations) { - this(ticket, /*expirationTime*/ null, locations); - } - - /** - * Constructs a new endpoint with an expiration time. - * - * @param ticket A ticket that describe the key of a data stream. - * @param expirationTime (optional) When this endpoint expires. - * @param locations The possible locations the stream can be retrieved from. - */ - public FlightEndpoint(Ticket ticket, Instant expirationTime, Location... locations) { - this( - ticket, - expirationTime, - null, - Collections.unmodifiableList(new ArrayList<>(Arrays.asList(locations)))); - } - - /** Private constructor with all parameters. Should only be called by Builder. */ - private FlightEndpoint( - Ticket ticket, Instant expirationTime, byte[] appMetadata, List locations) { - Objects.requireNonNull(ticket); - this.locations = locations; - this.expirationTime = expirationTime; - this.ticket = ticket; - this.appMetadata = appMetadata; - } - - /** Constructs from the protocol buffer representation. */ - FlightEndpoint(Flight.FlightEndpoint flt) throws URISyntaxException { - this.locations = new ArrayList<>(); - for (final Flight.Location location : flt.getLocationList()) { - this.locations.add(new Location(location.getUri())); - } - if (flt.hasExpirationTime()) { - this.expirationTime = - Instant.ofEpochSecond( - flt.getExpirationTime().getSeconds(), Timestamps.toNanos(flt.getExpirationTime())); - } else { - this.expirationTime = null; - } - this.appMetadata = (flt.getAppMetadata().isEmpty() ? null : flt.getAppMetadata().toByteArray()); - this.ticket = new Ticket(flt.getTicket()); - } - - public List getLocations() { - return locations; - } - - public Ticket getTicket() { - return ticket; - } - - public Optional getExpirationTime() { - return Optional.ofNullable(expirationTime); - } - - public byte[] getAppMetadata() { - return appMetadata; - } - - /** Converts to the protocol buffer representation. */ - Flight.FlightEndpoint toProtocol() { - Flight.FlightEndpoint.Builder b = - Flight.FlightEndpoint.newBuilder().setTicket(ticket.toProtocol()); - - for (Location l : locations) { - b.addLocation(l.toProtocol()); - } - - if (expirationTime != null) { - b.setExpirationTime( - Timestamp.newBuilder() - .setSeconds(expirationTime.getEpochSecond()) - .setNanos(expirationTime.getNano()) - .build()); - } - - if (appMetadata != null) { - b.setAppMetadata(ByteString.copyFrom(appMetadata)); - } - - return b.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - * @throws URISyntaxException if the serialized form contains an unsupported URI format. - */ - public static FlightEndpoint deserialize(ByteBuffer serialized) - throws IOException, URISyntaxException { - return new FlightEndpoint(Flight.FlightEndpoint.parseFrom(serialized)); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof FlightEndpoint)) { - return false; - } - FlightEndpoint that = (FlightEndpoint) o; - return locations.equals(that.locations) - && ticket.equals(that.ticket) - && Objects.equals(expirationTime, that.expirationTime) - && Arrays.equals(appMetadata, that.appMetadata); - } - - @Override - public int hashCode() { - return Objects.hash(locations, ticket, expirationTime, Arrays.hashCode(appMetadata)); - } - - @Override - public String toString() { - return "FlightEndpoint{" - + "locations=" - + locations - + ", ticket=" - + ticket - + ", expirationTime=" - + (expirationTime == null ? "(none)" : expirationTime.toString()) - + ", appMetadata=" - + (appMetadata == null ? "(none)" : Base64.getEncoder().encodeToString(appMetadata)) - + '}'; - } - - /** - * Create a builder for FlightEndpoint. - * - * @param ticket A ticket that describe the key of a data stream. - * @param locations The possible locations the stream can be retrieved from. - */ - public static Builder builder(Ticket ticket, Location... locations) { - return new Builder(ticket, locations); - } - - /** Builder for FlightEndpoint. */ - public static final class Builder { - private final Ticket ticket; - private final List locations; - private Instant expirationTime = null; - private byte[] appMetadata = null; - - private Builder(Ticket ticket, Location... locations) { - this.ticket = ticket; - this.locations = Collections.unmodifiableList(new ArrayList<>(Arrays.asList(locations))); - } - - /** - * Set expiration time for the endpoint. Default is null, which means don't expire. - * - * @param expirationTime (optional) When this endpoint expires. - */ - public Builder setExpirationTime(Instant expirationTime) { - this.expirationTime = expirationTime; - return this; - } - - /** - * Set the app metadata to send along with the flight. Default is null; - * - * @param appMetadata Metadata to send along with the flight - */ - public Builder setAppMetadata(byte[] appMetadata) { - this.appMetadata = appMetadata; - return this; - } - - /** Build FlightEndpoint object. */ - public FlightEndpoint build() { - return new FlightEndpoint(ticket, expirationTime, appMetadata, locations); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java deleted file mode 100644 index 13e4f2f21503a..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import io.grpc.BindableService; -import io.grpc.CallOptions; -import io.grpc.ClientCall; -import io.grpc.ConnectivityState; -import io.grpc.ManagedChannel; -import io.grpc.MethodDescriptor; -import java.util.Collections; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.flight.auth.ServerAuthHandler; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.VisibleForTesting; - -/** Exposes Flight GRPC service & client. */ -public class FlightGrpcUtils { - /** Proxy class for ManagedChannel that makes closure a no-op. */ - @VisibleForTesting - static class NonClosingProxyManagedChannel extends ManagedChannel { - private final ManagedChannel channel; - private boolean isShutdown; - - NonClosingProxyManagedChannel(ManagedChannel channel) { - this.channel = channel; - this.isShutdown = channel.isShutdown(); - } - - @Override - public ManagedChannel shutdown() { - isShutdown = true; - return this; - } - - @Override - public boolean isShutdown() { - if (this.channel.isShutdown()) { - // If the underlying channel is shut down, ensure we're updated to match. - shutdown(); - } - return isShutdown; - } - - @Override - public boolean isTerminated() { - return this.isShutdown(); - } - - @Override - public ManagedChannel shutdownNow() { - return shutdown(); - } - - @Override - public boolean awaitTermination(long l, TimeUnit timeUnit) { - // Don't actually await termination, since it'll be a no-op, so simply return whether or not - // the channel has been shut down already. - return this.isShutdown(); - } - - @Override - public ClientCall newCall( - MethodDescriptor methodDescriptor, CallOptions callOptions) { - if (this.isShutdown()) { - throw new IllegalStateException("Channel has been shut down."); - } - - return this.channel.newCall(methodDescriptor, callOptions); - } - - @Override - public String authority() { - return this.channel.authority(); - } - - @Override - public ConnectivityState getState(boolean requestConnection) { - if (this.isShutdown()) { - return ConnectivityState.SHUTDOWN; - } - - return this.channel.getState(requestConnection); - } - - @Override - public void notifyWhenStateChanged(ConnectivityState source, Runnable callback) { - // The proxy has no insight into the underlying channel state changes, so we'll have to leak - // the abstraction - // a bit here and simply pass to the underlying channel, even though it will never transition - // to shutdown via - // the proxy. This should be fine, since it's mainly targeted at the FlightClient and there's - // no getter for - // the channel. - this.channel.notifyWhenStateChanged(source, callback); - } - - @Override - public void resetConnectBackoff() { - this.channel.resetConnectBackoff(); - } - - @Override - public void enterIdle() { - this.channel.enterIdle(); - } - } - - private FlightGrpcUtils() {} - - /** - * Creates a Flight service. - * - * @param allocator Memory allocator - * @param producer Specifies the service api - * @param authHandler Authentication handler - * @param executor Executor service - * @return FlightBindingService - */ - public static BindableService createFlightService( - BufferAllocator allocator, - FlightProducer producer, - ServerAuthHandler authHandler, - ExecutorService executor) { - return new FlightBindingService(allocator, producer, authHandler, executor); - } - - /** - * Creates a Flight client. - * - * @param incomingAllocator Memory allocator - * @param channel provides a connection to a gRPC server. - */ - public static FlightClient createFlightClient( - BufferAllocator incomingAllocator, ManagedChannel channel) { - return new FlightClient(incomingAllocator, channel, Collections.emptyList()); - } - - /** - * Creates a Flight client. - * - * @param incomingAllocator Memory allocator - * @param channel provides a connection to a gRPC server. Will not be closed on closure of the - * returned FlightClient. - */ - public static FlightClient createFlightClientWithSharedChannel( - BufferAllocator incomingAllocator, ManagedChannel channel) { - return new FlightClient( - incomingAllocator, new NonClosingProxyManagedChannel(channel), Collections.emptyList()); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java deleted file mode 100644 index 88926ddba0def..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; -import com.google.protobuf.ByteString; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.nio.channels.Channels; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Base64; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.stream.Collectors; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.WriteChannel; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.validate.MetadataV4UnionChecker; - -/** A POJO representation of a FlightInfo, metadata associated with a set of data records. */ -public class FlightInfo { - private final Schema schema; - private final FlightDescriptor descriptor; - private final List endpoints; - private final long bytes; - private final long records; - private final boolean ordered; - private final IpcOption option; - private final byte[] appMetadata; - - /** - * Constructs a new instance. - * - * @param schema The schema of the Flight - * @param descriptor An identifier for the Flight. - * @param endpoints A list of endpoints that have the flight available. - * @param bytes The number of bytes in the flight - * @param records The number of records in the flight. - */ - public FlightInfo( - Schema schema, - FlightDescriptor descriptor, - List endpoints, - long bytes, - long records) { - this(schema, descriptor, endpoints, bytes, records, /*ordered*/ false, IpcOption.DEFAULT); - } - - /** - * Constructs a new instance. - * - * @param schema The schema of the Flight - * @param descriptor An identifier for the Flight. - * @param endpoints A list of endpoints that have the flight available. - * @param bytes The number of bytes in the flight - * @param records The number of records in the flight. - * @param option IPC write options. - */ - public FlightInfo( - Schema schema, - FlightDescriptor descriptor, - List endpoints, - long bytes, - long records, - IpcOption option) { - this(schema, descriptor, endpoints, bytes, records, /*ordered*/ false, option); - } - - /** - * Constructs a new instance. - * - * @param schema The schema of the Flight - * @param descriptor An identifier for the Flight. - * @param endpoints A list of endpoints that have the flight available. - * @param bytes The number of bytes in the flight - * @param records The number of records in the flight. - * @param ordered Whether the endpoints in this flight are ordered. - * @param option IPC write options. - */ - public FlightInfo( - Schema schema, - FlightDescriptor descriptor, - List endpoints, - long bytes, - long records, - boolean ordered, - IpcOption option) { - this(schema, descriptor, endpoints, bytes, records, ordered, option, null); - } - - /** - * Constructs a new instance. - * - * @param schema The schema of the Flight - * @param descriptor An identifier for the Flight. - * @param endpoints A list of endpoints that have the flight available. - * @param bytes The number of bytes in the flight - * @param records The number of records in the flight. - * @param ordered Whether the endpoints in this flight are ordered. - * @param option IPC write options. - * @param appMetadata Metadata to send along with the flight - */ - public FlightInfo( - Schema schema, - FlightDescriptor descriptor, - List endpoints, - long bytes, - long records, - boolean ordered, - IpcOption option, - byte[] appMetadata) { - Objects.requireNonNull(descriptor); - Objects.requireNonNull(endpoints); - if (schema != null) { - MetadataV4UnionChecker.checkForUnion(schema.getFields().iterator(), option.metadataVersion); - } - this.schema = schema; - this.descriptor = descriptor; - this.endpoints = endpoints; - this.bytes = bytes; - this.records = records; - this.ordered = ordered; - this.option = option; - this.appMetadata = appMetadata; - } - - /** Constructs from the protocol buffer representation. */ - FlightInfo(Flight.FlightInfo pbFlightInfo) throws URISyntaxException { - try { - final ByteBuffer schemaBuf = pbFlightInfo.getSchema().asReadOnlyByteBuffer(); - schema = - pbFlightInfo.getSchema().size() > 0 - ? MessageSerializer.deserializeSchema( - new ReadChannel(Channels.newChannel(new ByteBufferBackedInputStream(schemaBuf)))) - : null; - } catch (IOException e) { - throw new RuntimeException(e); - } - descriptor = new FlightDescriptor(pbFlightInfo.getFlightDescriptor()); - endpoints = new ArrayList<>(); - for (final Flight.FlightEndpoint endpoint : pbFlightInfo.getEndpointList()) { - endpoints.add(new FlightEndpoint(endpoint)); - } - bytes = pbFlightInfo.getTotalBytes(); - records = pbFlightInfo.getTotalRecords(); - ordered = pbFlightInfo.getOrdered(); - appMetadata = - (pbFlightInfo.getAppMetadata().size() == 0 - ? null - : pbFlightInfo.getAppMetadata().toByteArray()); - option = IpcOption.DEFAULT; - } - - public Optional getSchemaOptional() { - return Optional.ofNullable(schema); - } - - /** - * Returns the schema, or an empty schema if no schema is present. - * - * @deprecated Deprecated. Use {@link #getSchemaOptional()} instead. - */ - @Deprecated - public Schema getSchema() { - return schema != null ? schema : new Schema(Collections.emptyList()); - } - - public long getBytes() { - return bytes; - } - - public long getRecords() { - return records; - } - - public FlightDescriptor getDescriptor() { - return descriptor; - } - - public List getEndpoints() { - return endpoints; - } - - public boolean getOrdered() { - return ordered; - } - - public byte[] getAppMetadata() { - return appMetadata; - } - - /** Converts to the protocol buffer representation. */ - Flight.FlightInfo toProtocol() { - Flight.FlightInfo.Builder builder = - Flight.FlightInfo.newBuilder() - .addAllEndpoint( - endpoints.stream().map(t -> t.toProtocol()).collect(Collectors.toList())) - .setFlightDescriptor(descriptor.toProtocol()) - .setTotalBytes(FlightInfo.this.bytes) - .setTotalRecords(records) - .setOrdered(ordered); - if (schema != null) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - MessageSerializer.serialize(new WriteChannel(Channels.newChannel(baos)), schema, option); - builder.setSchema(ByteString.copyFrom(baos.toByteArray())); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - if (appMetadata != null) { - builder.setAppMetadata(ByteString.copyFrom(appMetadata)); - } - return builder.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the FlightInfo, as returned by {@link #serialize()}. - * @return The deserialized FlightInfo. - * @throws IOException if the serialized form is invalid. - * @throws URISyntaxException if the serialized form contains an unsupported URI format. - */ - public static FlightInfo deserialize(ByteBuffer serialized) - throws IOException, URISyntaxException { - return new FlightInfo(Flight.FlightInfo.parseFrom(serialized)); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof FlightInfo)) { - return false; - } - FlightInfo that = (FlightInfo) o; - return bytes == that.bytes - && records == that.records - && schema.equals(that.schema) - && descriptor.equals(that.descriptor) - && endpoints.equals(that.endpoints) - && ordered == that.ordered - && Arrays.equals(appMetadata, that.appMetadata); - } - - @Override - public int hashCode() { - return Objects.hash( - schema, descriptor, endpoints, bytes, records, ordered, Arrays.hashCode(appMetadata)); - } - - @Override - public String toString() { - return "FlightInfo{" - + "schema=" - + schema - + ", descriptor=" - + descriptor - + ", endpoints=" - + endpoints - + ", bytes=" - + bytes - + ", records=" - + records - + ", ordered=" - + ordered - + ", appMetadata=" - + (appMetadata == null ? "(none)" : Base64.getEncoder().encodeToString(appMetadata)) - + '}'; - } - - /** - * Create a builder for FlightInfo. - * - * @param schema The schema of the Flight - * @param descriptor An identifier for the Flight. - * @param endpoints A list of endpoints that have the flight available. - */ - public static Builder builder( - Schema schema, FlightDescriptor descriptor, List endpoints) { - return new Builder(schema, descriptor, endpoints); - } - - /** Builder for FlightInfo. */ - public static final class Builder { - private final Schema schema; - private final FlightDescriptor descriptor; - private final List endpoints; - private long bytes = -1; - private long records = -1; - private boolean ordered = false; - private IpcOption option = IpcOption.DEFAULT; - private byte[] appMetadata = null; - - private Builder(Schema schema, FlightDescriptor descriptor, List endpoints) { - this.schema = schema; - this.descriptor = descriptor; - this.endpoints = endpoints; - } - - /** - * Set the number of bytes for the flight. Default to -1 for unknown. - * - * @param bytes The number of bytes in the flight - */ - public Builder setBytes(long bytes) { - this.bytes = bytes; - return this; - } - - /** - * Set the number of records for the flight. Default to -1 for unknown. - * - * @param records The number of records in the flight. - */ - public Builder setRecords(long records) { - this.records = records; - return this; - } - - /** - * Set whether the flight endpoints are ordered. Default is false. - * - * @param ordered Whether the endpoints in this flight are ordered. - */ - public Builder setOrdered(boolean ordered) { - this.ordered = ordered; - return this; - } - - /** - * Set IPC write options. Default is IpcOption.DEFAULT - * - * @param option IPC write options. - */ - public Builder setOption(IpcOption option) { - this.option = option; - return this; - } - - /** - * Set the app metadata to send along with the flight. Default is null. - * - * @param appMetadata Metadata to send along with the flight - */ - public Builder setAppMetadata(byte[] appMetadata) { - this.appMetadata = appMetadata; - return this; - } - - /** Build FlightInfo object. */ - public FlightInfo build() { - return new FlightInfo( - schema, descriptor, endpoints, bytes, records, ordered, option, appMetadata); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightMethod.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightMethod.java deleted file mode 100644 index c7ca0bb4a84a6..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightMethod.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import org.apache.arrow.flight.impl.FlightServiceGrpc; - -/** All the RPC methods available in Flight. */ -public enum FlightMethod { - HANDSHAKE, - LIST_FLIGHTS, - GET_FLIGHT_INFO, - GET_SCHEMA, - DO_GET, - DO_PUT, - DO_ACTION, - LIST_ACTIONS, - DO_EXCHANGE, - POLL_FLIGHT_INFO, - ; - - /** - * Convert a method name string into a {@link FlightMethod}. - * - * @throws IllegalArgumentException if the method name is not valid. - */ - public static FlightMethod fromProtocol(final String methodName) { - if (FlightServiceGrpc.getHandshakeMethod().getFullMethodName().equals(methodName)) { - return HANDSHAKE; - } else if (FlightServiceGrpc.getListFlightsMethod().getFullMethodName().equals(methodName)) { - return LIST_FLIGHTS; - } else if (FlightServiceGrpc.getGetFlightInfoMethod().getFullMethodName().equals(methodName)) { - return GET_FLIGHT_INFO; - } else if (FlightServiceGrpc.getGetSchemaMethod().getFullMethodName().equals(methodName)) { - return GET_SCHEMA; - } else if (FlightServiceGrpc.getDoGetMethod().getFullMethodName().equals(methodName)) { - return DO_GET; - } else if (FlightServiceGrpc.getDoPutMethod().getFullMethodName().equals(methodName)) { - return DO_PUT; - } else if (FlightServiceGrpc.getDoActionMethod().getFullMethodName().equals(methodName)) { - return DO_ACTION; - } else if (FlightServiceGrpc.getListActionsMethod().getFullMethodName().equals(methodName)) { - return LIST_ACTIONS; - } else if (FlightServiceGrpc.getDoExchangeMethod().getFullMethodName().equals(methodName)) { - return DO_EXCHANGE; - } else if (FlightServiceGrpc.getPollFlightInfoMethod().getFullMethodName().equals(methodName)) { - return POLL_FLIGHT_INFO; - } - throw new IllegalArgumentException("Not a Flight method name in gRPC: " + methodName); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightProducer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightProducer.java deleted file mode 100644 index 1662636075780..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightProducer.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.Map; - -/** API to Implement an Arrow Flight producer. */ -public interface FlightProducer { - - /** - * Return data for a stream. - * - * @param context Per-call context. - * @param ticket The application-defined ticket identifying this stream. - * @param listener An interface for sending data back to the client. - */ - void getStream(CallContext context, Ticket ticket, ServerStreamListener listener); - - /** - * List available data streams on this service. - * - * @param context Per-call context. - * @param criteria Application-defined criteria for filtering streams. - * @param listener An interface for sending data back to the client. - */ - void listFlights(CallContext context, Criteria criteria, StreamListener listener); - - /** - * Get information about a particular data stream. - * - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor); - - /** - * Begin or get an update on execution of a long-running query. - * - *

    If the descriptor would begin a query, the server should return a response immediately to - * not block the client. Otherwise, the server should not return an update until progress is made - * to not spam the client with inactionable updates. - * - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about execution. - */ - default PollInfo pollFlightInfo(CallContext context, FlightDescriptor descriptor) { - FlightInfo info = getFlightInfo(context, descriptor); - return new PollInfo(info, null, null, null); - } - - /** - * Get schema for a particular data stream. - * - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Schema for the stream. - */ - default SchemaResult getSchema(CallContext context, FlightDescriptor descriptor) { - FlightInfo info = getFlightInfo(context, descriptor); - return new SchemaResult( - info.getSchemaOptional() - .orElseThrow( - () -> - CallStatus.INVALID_ARGUMENT - .withDescription("No schema is present in FlightInfo") - .toRuntimeException())); - } - - /** - * Accept uploaded data for a particular stream. - * - * @param context Per-call context. - * @param flightStream The data stream being uploaded. - */ - Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream); - - /** - * This method is used to perform a bidirectional data exchange between a client and a server. - * - * @param context Per-call context. - * @param reader The FlightStream from which data is read. - * @param writer The ServerStreamListener to which data is written. - * @throws RuntimeException if the method is not implemented. - */ - default void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) { - throw CallStatus.UNIMPLEMENTED - .withDescription("DoExchange is unimplemented") - .toRuntimeException(); - } - - /** - * Generic handler for application-defined RPCs. - * - * @param context Per-call context. - * @param action Client-supplied parameters. - * @param listener A stream of responses. - */ - void doAction(CallContext context, Action action, StreamListener listener); - - /** - * List available application-defined RPCs. - * - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void listActions(CallContext context, StreamListener listener); - - /** An interface for sending Arrow data back to a client. */ - interface ServerStreamListener extends OutboundStreamListener { - - /** Check whether the call has been cancelled. If so, stop sending data. */ - boolean isCancelled(); - - /** - * Set a callback for when the client cancels a call, i.e. {@link #isCancelled()} has become - * true. - * - *

    Note that this callback may only be called some time after {@link #isCancelled()} becomes - * true, and may never be called if all executor threads on the server are busy, or the RPC - * method body is implemented in a blocking fashion. - */ - void setOnCancelHandler(Runnable handler); - } - - /** - * Callbacks for pushing objects to a receiver. - * - * @param Type of the values in the stream. - */ - interface StreamListener { - - /** Send the next value to the client. */ - void onNext(T val); - - /** - * Indicate an error to the client. - * - *

    Terminates the stream; do not call {@link #onCompleted()}. - */ - void onError(Throwable t); - - /** Indicate that the transmission is finished. */ - void onCompleted(); - } - - /** Call-specific context. */ - interface CallContext { - /** The identity of the authenticated peer. May be the empty string if unknown. */ - String peerIdentity(); - - /** Whether the call has been cancelled by the client. */ - boolean isCancelled(); - - /** - * Get the middleware instance of the given type for this call. - * - *

    Returns null if not found. - */ - T getMiddleware(FlightServerMiddleware.Key key); - - /** Get an immutable map of middleware for this call. */ - Map, FlightServerMiddleware> getMiddleware(); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightRuntimeException.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightRuntimeException.java deleted file mode 100644 index d002dd4867f8c..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightRuntimeException.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** - * An exception raised from a Flight RPC. - * - *

    In service implementations, raising an instance of this exception will provide clients with a - * more detailed message and error code. - */ -public class FlightRuntimeException extends RuntimeException { - private final CallStatus status; - - /** Create a new exception from the given status. */ - FlightRuntimeException(CallStatus status) { - super(status.description(), status.cause()); - this.status = status; - } - - public CallStatus status() { - return status; - } - - @Override - public String toString() { - String s = getClass().getName(); - return String.format("%s: %s: %s", s, status.code(), status.description()); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java deleted file mode 100644 index ac761457f57fd..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java +++ /dev/null @@ -1,556 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import io.grpc.Server; -import io.grpc.ServerInterceptors; -import io.grpc.netty.GrpcSslContexts; -import io.grpc.netty.NettyServerBuilder; -import io.netty.channel.EventLoopGroup; -import io.netty.channel.ServerChannel; -import io.netty.handler.ssl.ClientAuth; -import io.netty.handler.ssl.SslContext; -import io.netty.handler.ssl.SslContextBuilder; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.InvocationTargetException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; -import javax.net.ssl.SSLException; -import org.apache.arrow.flight.auth.ServerAuthHandler; -import org.apache.arrow.flight.auth.ServerAuthInterceptor; -import org.apache.arrow.flight.auth2.Auth2Constants; -import org.apache.arrow.flight.auth2.CallHeaderAuthenticator; -import org.apache.arrow.flight.auth2.ServerCallHeaderAuthMiddleware; -import org.apache.arrow.flight.grpc.ServerBackpressureThresholdInterceptor; -import org.apache.arrow.flight.grpc.ServerInterceptorAdapter; -import org.apache.arrow.flight.grpc.ServerInterceptorAdapter.KeyFactory; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.util.VisibleForTesting; - -/** - * Generic server of flight data that is customized via construction with delegate classes for the - * actual logic. The server currently uses GRPC as its transport mechanism. - */ -public class FlightServer implements AutoCloseable { - - private static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(FlightServer.class); - - private final Location location; - private final Server server; - // The executor used by the gRPC server. We don't use it here, but we do need to clean it up with - // the server. - // May be null, if a user-supplied executor was provided (as we do not want to clean that up) - @VisibleForTesting final ExecutorService grpcExecutor; - - /** The maximum size of an individual gRPC message. This effectively disables the limit. */ - static final int MAX_GRPC_MESSAGE_SIZE = Integer.MAX_VALUE; - - /** The default number of bytes that can be queued on an output stream before blocking. */ - public static final int DEFAULT_BACKPRESSURE_THRESHOLD = 10 * 1024 * 1024; // 10MB - - /** Create a new instance from a gRPC server. For internal use only. */ - private FlightServer(Location location, Server server, ExecutorService grpcExecutor) { - this.location = location; - this.server = server; - this.grpcExecutor = grpcExecutor; - } - - /** Start the server. */ - public FlightServer start() throws IOException { - server.start(); - return this; - } - - /** Get the port the server is running on (if applicable). */ - public int getPort() { - return server.getPort(); - } - - /** Get the location for this server. */ - public Location getLocation() { - if (location.getUri().getPort() == 0) { - // If the server was bound to port 0, replace the port in the location with the real port. - final URI uri = location.getUri(); - try { - return new Location( - new URI( - uri.getScheme(), - uri.getUserInfo(), - uri.getHost(), - getPort(), - uri.getPath(), - uri.getQuery(), - uri.getFragment())); - } catch (URISyntaxException e) { - // We don't expect this to happen - throw new RuntimeException(e); - } - } - return location; - } - - /** Block until the server shuts down. */ - public void awaitTermination() throws InterruptedException { - server.awaitTermination(); - } - - /** Request that the server shut down. */ - public void shutdown() { - server.shutdown(); - if (grpcExecutor != null) { - grpcExecutor.shutdown(); - } - } - - /** - * Wait for the server to shut down with a timeout. - * - * @return true if the server shut down successfully. - */ - public boolean awaitTermination(final long timeout, final TimeUnit unit) - throws InterruptedException { - return server.awaitTermination(timeout, unit); - } - - /** Shutdown the server, waits for up to 6 seconds for successful shutdown before returning. */ - @Override - public void close() throws InterruptedException { - shutdown(); - final boolean terminated = awaitTermination(3000, TimeUnit.MILLISECONDS); - if (terminated) { - logger.debug("Server was terminated within 3s"); - return; - } - - // get more aggressive in termination. - server.shutdownNow(); - - int count = 0; - while (!server.isTerminated() && count < 30) { - count++; - logger.debug("Waiting for termination"); - Thread.sleep(100); - } - - if (!server.isTerminated()) { - logger.warn("Couldn't shutdown server, resources likely will be leaked."); - } - } - - /** Create a builder for a Flight server. */ - public static Builder builder() { - return new Builder(); - } - - /** Create a builder for a Flight server. */ - public static Builder builder( - BufferAllocator allocator, Location location, FlightProducer producer) { - return new Builder(allocator, location, producer); - } - - /** A builder for Flight servers. */ - public static final class Builder { - private BufferAllocator allocator; - private Location location; - private FlightProducer producer; - private final Map builderOptions; - private ServerAuthHandler authHandler = ServerAuthHandler.NO_OP; - private CallHeaderAuthenticator headerAuthenticator = CallHeaderAuthenticator.NO_OP; - private ExecutorService executor = null; - private int maxInboundMessageSize = MAX_GRPC_MESSAGE_SIZE; - private int maxHeaderListSize = MAX_GRPC_MESSAGE_SIZE; - private int backpressureThreshold = DEFAULT_BACKPRESSURE_THRESHOLD; - private InputStream certChain; - private InputStream key; - private InputStream mTlsCACert; - private SslContext sslContext; - private final List> interceptors; - // Keep track of inserted interceptors - private final Set interceptorKeys; - - Builder() { - builderOptions = new HashMap<>(); - interceptors = new ArrayList<>(); - interceptorKeys = new HashSet<>(); - } - - Builder(BufferAllocator allocator, Location location, FlightProducer producer) { - this(); - this.allocator = Preconditions.checkNotNull(allocator); - this.location = Preconditions.checkNotNull(location); - this.producer = Preconditions.checkNotNull(producer); - } - - /** Create the server for this builder. */ - public FlightServer build() { - // Add the auth middleware if applicable. - if (headerAuthenticator != CallHeaderAuthenticator.NO_OP) { - this.middleware( - FlightServerMiddleware.Key.of(Auth2Constants.AUTHORIZATION_HEADER), - new ServerCallHeaderAuthMiddleware.Factory(headerAuthenticator)); - } - - this.middleware(FlightConstants.HEADER_KEY, new ServerHeaderMiddleware.Factory()); - - final NettyServerBuilder builder; - switch (location.getUri().getScheme()) { - case LocationSchemes.GRPC_DOMAIN_SOCKET: - { - // The implementation is platform-specific, so we have to find the classes at runtime - builder = NettyServerBuilder.forAddress(location.toSocketAddress()); - try { - try { - // Linux - builder.channelType( - Class.forName("io.netty.channel.epoll.EpollServerDomainSocketChannel") - .asSubclass(ServerChannel.class)); - final EventLoopGroup elg = - Class.forName("io.netty.channel.epoll.EpollEventLoopGroup") - .asSubclass(EventLoopGroup.class) - .getConstructor() - .newInstance(); - builder.bossEventLoopGroup(elg).workerEventLoopGroup(elg); - } catch (ClassNotFoundException e) { - // BSD - builder.channelType( - Class.forName("io.netty.channel.kqueue.KQueueServerDomainSocketChannel") - .asSubclass(ServerChannel.class)); - final EventLoopGroup elg = - Class.forName("io.netty.channel.kqueue.KQueueEventLoopGroup") - .asSubclass(EventLoopGroup.class) - .getConstructor() - .newInstance(); - builder.bossEventLoopGroup(elg).workerEventLoopGroup(elg); - } - } catch (ClassNotFoundException - | InstantiationException - | IllegalAccessException - | NoSuchMethodException - | InvocationTargetException e) { - throw new UnsupportedOperationException( - "Could not find suitable Netty native transport implementation for domain socket address."); - } - break; - } - case LocationSchemes.GRPC: - case LocationSchemes.GRPC_INSECURE: - { - builder = NettyServerBuilder.forAddress(location.toSocketAddress()); - break; - } - case LocationSchemes.GRPC_TLS: - { - if (certChain == null) { - throw new IllegalArgumentException( - "Must provide a certificate and key to serve gRPC over TLS"); - } - builder = NettyServerBuilder.forAddress(location.toSocketAddress()); - break; - } - default: - throw new IllegalArgumentException( - "Scheme is not supported: " + location.getUri().getScheme()); - } - - if (certChain != null) { - SslContextBuilder sslContextBuilder = GrpcSslContexts.forServer(certChain, key); - - if (mTlsCACert != null) { - sslContextBuilder.clientAuth(ClientAuth.REQUIRE).trustManager(mTlsCACert); - } - try { - sslContext = sslContextBuilder.build(); - } catch (SSLException e) { - throw new RuntimeException(e); - } finally { - closeMTlsCACert(); - closeCertChain(); - closeKey(); - } - - builder.sslContext(sslContext); - } - - // Share one executor between the gRPC service, DoPut, and Handshake - final ExecutorService exec; - // We only want to have FlightServer close the gRPC executor if we created it here. We should - // not close - // user-supplied executors. - final ExecutorService grpcExecutor; - if (executor != null) { - exec = executor; - grpcExecutor = null; - } else { - exec = - Executors.newCachedThreadPool( - // Name threads for better debuggability - new ThreadFactoryBuilder() - .setNameFormat("flight-server-default-executor-%d") - .build()); - grpcExecutor = exec; - } - - final FlightBindingService flightService = - new FlightBindingService(allocator, producer, authHandler, exec); - builder - .executor(exec) - .maxInboundMessageSize(maxInboundMessageSize) - .maxInboundMetadataSize(maxHeaderListSize) - .addService( - ServerInterceptors.intercept( - flightService, - new ServerBackpressureThresholdInterceptor(backpressureThreshold), - new ServerAuthInterceptor(authHandler))); - - // Allow hooking into the gRPC builder. This is not guaranteed to be available on all Arrow - // versions or - // Flight implementations. - builderOptions.computeIfPresent( - "grpc.builderConsumer", - (key, builderConsumer) -> { - final Consumer consumer = - (Consumer) builderConsumer; - consumer.accept(builder); - return null; - }); - - // Allow explicitly setting some Netty-specific options - builderOptions.computeIfPresent( - "netty.channelType", - (key, channelType) -> { - builder.channelType((Class) channelType); - return null; - }); - builderOptions.computeIfPresent( - "netty.bossEventLoopGroup", - (key, elg) -> { - builder.bossEventLoopGroup((EventLoopGroup) elg); - return null; - }); - builderOptions.computeIfPresent( - "netty.workerEventLoopGroup", - (key, elg) -> { - builder.workerEventLoopGroup((EventLoopGroup) elg); - return null; - }); - - builder.intercept(new ServerInterceptorAdapter(interceptors)); - return new FlightServer(location, builder.build(), grpcExecutor); - } - - public Builder setMaxHeaderListSize(int maxHeaderListSize) { - this.maxHeaderListSize = maxHeaderListSize; - return this; - } - - /** - * Set the maximum size of a message. Defaults to "unlimited", depending on the underlying - * transport. - */ - public Builder maxInboundMessageSize(int maxMessageSize) { - this.maxInboundMessageSize = maxMessageSize; - return this; - } - - /** - * Set the number of bytes that may be queued on a server output stream before writes are - * blocked. - */ - public Builder backpressureThreshold(int backpressureThreshold) { - Preconditions.checkArgument(backpressureThreshold > 0); - this.backpressureThreshold = backpressureThreshold; - return this; - } - - /** - * A small utility function to ensure that InputStream attributes. are closed if they are not - * null - * - * @param stream The InputStream to close (if it is not null). - */ - private void closeInputStreamIfNotNull(InputStream stream) { - if (stream != null) { - try { - stream.close(); - } catch (IOException expected) { - // stream closes gracefully, doesn't expect an exception. - } - } - } - - /** - * A small utility function to ensure that the certChain attribute is closed if it is not null. - * It then sets the attribute to null. - */ - private void closeCertChain() { - closeInputStreamIfNotNull(certChain); - certChain = null; - } - - /** - * A small utility function to ensure that the key attribute is closed if it is not null. It - * then sets the attribute to null. - */ - private void closeKey() { - closeInputStreamIfNotNull(key); - key = null; - } - - /** - * A small utility function to ensure that the mTlsCACert attribute is closed if it is not null. - * It then sets the attribute to null. - */ - private void closeMTlsCACert() { - closeInputStreamIfNotNull(mTlsCACert); - mTlsCACert = null; - } - - /** - * Enable TLS on the server. - * - * @param certChain The certificate chain to use. - * @param key The private key to use. - */ - public Builder useTls(final File certChain, final File key) throws IOException { - closeCertChain(); - this.certChain = new FileInputStream(certChain); - - closeKey(); - this.key = new FileInputStream(key); - - return this; - } - - /** - * Enable Client Verification via mTLS on the server. - * - * @param mTlsCACert The CA certificate to use for verifying clients. - */ - public Builder useMTlsClientVerification(final File mTlsCACert) throws IOException { - closeMTlsCACert(); - this.mTlsCACert = new FileInputStream(mTlsCACert); - return this; - } - - /** - * Enable TLS on the server. - * - * @param certChain The certificate chain to use. - * @param key The private key to use. - */ - public Builder useTls(final InputStream certChain, final InputStream key) throws IOException { - closeCertChain(); - this.certChain = certChain; - - closeKey(); - this.key = key; - - return this; - } - - /** - * Enable mTLS on the server. - * - * @param mTlsCACert The CA certificate to use for verifying clients. - */ - public Builder useMTlsClientVerification(final InputStream mTlsCACert) throws IOException { - closeMTlsCACert(); - this.mTlsCACert = mTlsCACert; - return this; - } - - /** - * Set the executor used by the server. - * - *

    Flight will NOT take ownership of the executor. The application must clean it up if one is - * provided. (If not provided, Flight will use a default executor which it will clean up.) - */ - public Builder executor(ExecutorService executor) { - this.executor = executor; - return this; - } - - /** Set the authentication handler. */ - public Builder authHandler(ServerAuthHandler authHandler) { - this.authHandler = authHandler; - return this; - } - - /** Set the header-based authentication mechanism. */ - public Builder headerAuthenticator(CallHeaderAuthenticator headerAuthenticator) { - this.headerAuthenticator = headerAuthenticator; - return this; - } - - /** Provide a transport-specific option. Not guaranteed to have any effect. */ - public Builder transportHint(final String key, Object option) { - builderOptions.put(key, option); - return this; - } - - /** - * Add a Flight middleware component to inspect and modify requests to this service. - * - * @param key An identifier for this middleware component. Service implementations can retrieve - * the middleware instance for the current call using {@link - * org.apache.arrow.flight.FlightProducer.CallContext}. - * @param factory A factory for the middleware. - * @param The middleware type. - * @throws IllegalArgumentException if the key already exists - */ - public Builder middleware( - final FlightServerMiddleware.Key key, final FlightServerMiddleware.Factory factory) { - if (interceptorKeys.contains(key.key)) { - throw new IllegalArgumentException("Key already exists: " + key.key); - } - interceptors.add(new KeyFactory<>(key, factory)); - interceptorKeys.add(key.key); - return this; - } - - public Builder allocator(BufferAllocator allocator) { - this.allocator = Preconditions.checkNotNull(allocator); - return this; - } - - public Builder location(Location location) { - this.location = Preconditions.checkNotNull(location); - return this; - } - - public Builder producer(FlightProducer producer) { - this.producer = Preconditions.checkNotNull(producer); - return this; - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServerMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServerMiddleware.java deleted file mode 100644 index b16df2c6cc363..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServerMiddleware.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.Objects; - -/** - * Server-side middleware for Flight calls. - * - *

    Middleware are instantiated per-call. - * - *

    Methods are not guaranteed to be called on any particular thread, relative to the thread that - * Flight requests are executed on. Do not depend on thread-local storage; instead, use state on the - * middleware instance. Service implementations may communicate with middleware implementations - * through {@link org.apache.arrow.flight.FlightProducer.CallContext#getMiddleware(Key)}. Methods on - * the middleware instance are non-reentrant, that is, a particular RPC will not make multiple - * concurrent calls to methods on a single middleware instance. However, methods on the factory - * instance are expected to be thread-safe, and if the factory instance returns the same middleware - * object more than once, then that middleware object must be thread-safe. - */ -public interface FlightServerMiddleware { - - /** - * A factory for Flight server middleware. - * - * @param The middleware type. - */ - interface Factory { - /** - * A callback for when the call starts. - * - * @param info Details about the call. - * @param incomingHeaders A mutable set of request headers. - * @param context Context about the current request. - * @throws FlightRuntimeException if the middleware wants to reject the call with the given - * status - */ - T onCallStarted(CallInfo info, CallHeaders incomingHeaders, RequestContext context); - } - - /** - * A key for Flight server middleware. On a server, middleware instances are identified by this - * key. - * - *

    Keys use reference equality, so instances should be shared. - * - * @param The middleware class stored in this key. This provides a compile-time check when - * retrieving instances. - */ - class Key { - final String key; - - Key(String key) { - this.key = Objects.requireNonNull(key, "Key must not be null."); - } - - /** Create a new key for the given type. */ - public static Key of(String key) { - return new Key<>(key); - } - } - - /** - * Callback for when the underlying transport is about to send response headers. - * - * @param outgoingHeaders A mutable set of response headers. These can be manipulated to send - * different headers to the client. - */ - void onBeforeSendingHeaders(CallHeaders outgoingHeaders); - - /** - * Callback for when the underlying transport has completed a call. - * - * @param status Whether the call completed successfully or not. - */ - void onCallCompleted(CallStatus status); - - /** - * Callback for when an RPC method implementation throws an uncaught exception. - * - *

    May be called multiple times, and may be called before or after {@link - * #onCallCompleted(CallStatus)}. Generally, an uncaught exception will end the call with a error - * {@link CallStatus}, and will be reported to {@link #onCallCompleted(CallStatus)}, but not - * necessarily this method. - * - * @param err The exception that was thrown. - */ - void onCallErrored(Throwable err); -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java deleted file mode 100644 index 9f130463c0fab..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.base.Strings; -import io.grpc.stub.ServerCallStreamObserver; -import io.grpc.stub.StreamObserver; -import java.util.Collections; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.function.BooleanSupplier; -import java.util.function.Consumer; -import org.apache.arrow.flight.FlightProducer.ServerStreamListener; -import org.apache.arrow.flight.auth.AuthConstants; -import org.apache.arrow.flight.auth.ServerAuthHandler; -import org.apache.arrow.flight.auth.ServerAuthWrapper; -import org.apache.arrow.flight.auth2.Auth2Constants; -import org.apache.arrow.flight.grpc.ContextPropagatingExecutorService; -import org.apache.arrow.flight.grpc.RequestContextAdapter; -import org.apache.arrow.flight.grpc.ServerInterceptorAdapter; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceImplBase; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** GRPC service implementation for a flight server. */ -class FlightService extends FlightServiceImplBase { - - private static final Logger logger = LoggerFactory.getLogger(FlightService.class); - private static final int PENDING_REQUESTS = 5; - - private final BufferAllocator allocator; - private final FlightProducer producer; - private final ServerAuthHandler authHandler; - private final ExecutorService executors; - - FlightService( - BufferAllocator allocator, - FlightProducer producer, - ServerAuthHandler authHandler, - ExecutorService executors) { - this.allocator = allocator; - this.producer = producer; - this.authHandler = authHandler; - this.executors = new ContextPropagatingExecutorService(executors); - } - - private CallContext makeContext(ServerCallStreamObserver responseObserver) { - // Try to get the peer identity from middleware first (using the auth2 interfaces). - final RequestContext context = RequestContextAdapter.REQUEST_CONTEXT_KEY.get(); - String peerIdentity = null; - if (context != null) { - peerIdentity = context.get(Auth2Constants.PEER_IDENTITY_KEY); - } - - if (Strings.isNullOrEmpty(peerIdentity)) { - // Try the legacy auth interface, which defaults to empty string. - peerIdentity = AuthConstants.PEER_IDENTITY_KEY.get(); - } - - return new CallContext(peerIdentity, responseObserver::isCancelled); - } - - @Override - public StreamObserver handshake( - StreamObserver responseObserver) { - // This method is not meaningful with the auth2 interfaces. Authentication would already - // have happened by header/middleware with the auth2 classes. - return ServerAuthWrapper.wrapHandshake(authHandler, responseObserver, executors); - } - - @Override - public void listFlights( - Flight.Criteria criteria, StreamObserver responseObserver) { - final StreamPipe listener = - StreamPipe.wrap( - responseObserver, FlightInfo::toProtocol, this::handleExceptionWithMiddleware); - try { - final CallContext context = makeContext((ServerCallStreamObserver) responseObserver); - producer.listFlights(context, new Criteria(criteria), listener); - } catch (Exception ex) { - listener.onError(ex); - } - // Do NOT call StreamPipe#onCompleted, as the FlightProducer implementation may be asynchronous - } - - public void doGetCustom( - Flight.Ticket ticket, StreamObserver responseObserverSimple) { - final ServerCallStreamObserver responseObserver = - (ServerCallStreamObserver) responseObserverSimple; - - final GetListener listener = - new GetListener(responseObserver, this::handleExceptionWithMiddleware); - try { - producer.getStream(makeContext(responseObserver), new Ticket(ticket), listener); - } catch (Exception ex) { - listener.error(ex); - } - // Do NOT call GetListener#completed, as the implementation of getStream may be asynchronous - } - - @Override - public void doAction(Flight.Action request, StreamObserver responseObserver) { - final StreamPipe listener = - StreamPipe.wrap(responseObserver, Result::toProtocol, this::handleExceptionWithMiddleware); - try { - final CallContext context = makeContext((ServerCallStreamObserver) responseObserver); - producer.doAction(context, new Action(request), listener); - } catch (Exception ex) { - listener.onError(ex); - } - // Do NOT call StreamPipe#onCompleted, as the FlightProducer implementation may be asynchronous - } - - @Override - public void listActions( - Flight.Empty request, StreamObserver responseObserver) { - final StreamPipe listener = - StreamPipe.wrap( - responseObserver, ActionType::toProtocol, this::handleExceptionWithMiddleware); - try { - final CallContext context = makeContext((ServerCallStreamObserver) responseObserver); - producer.listActions(context, listener); - } catch (Exception ex) { - listener.onError(ex); - } - // Do NOT call StreamPipe#onCompleted, as the FlightProducer implementation may be asynchronous - } - - private static class GetListener extends OutboundStreamListenerImpl - implements ServerStreamListener { - private final ServerCallStreamObserver serverCallResponseObserver; - private final Consumer errorHandler; - private Runnable onCancelHandler = null; - private Runnable onReadyHandler = null; - private boolean completed; - - public GetListener( - ServerCallStreamObserver responseObserver, Consumer errorHandler) { - super(null, responseObserver); - this.errorHandler = errorHandler; - this.completed = false; - this.serverCallResponseObserver = responseObserver; - this.serverCallResponseObserver.setOnCancelHandler(this::onCancel); - this.serverCallResponseObserver.setOnReadyHandler(this::onReady); - this.serverCallResponseObserver.disableAutoInboundFlowControl(); - } - - private void onCancel() { - logger.debug("Stream cancelled by client."); - if (onCancelHandler != null) { - onCancelHandler.run(); - } - } - - private void onReady() { - if (onReadyHandler != null) { - onReadyHandler.run(); - } - } - - @Override - public void setOnCancelHandler(Runnable handler) { - this.onCancelHandler = handler; - } - - @Override - public void setOnReadyHandler(Runnable handler) { - this.onReadyHandler = handler; - } - - @Override - public boolean isCancelled() { - return serverCallResponseObserver.isCancelled(); - } - - @Override - protected void waitUntilStreamReady() { - // Don't do anything - service implementations are expected to manage backpressure themselves - } - - @Override - public void error(Throwable ex) { - if (!completed) { - completed = true; - super.error(ex); - } else { - errorHandler.accept(ex); - } - } - - @Override - public void completed() { - if (!completed) { - completed = true; - super.completed(); - } else { - errorHandler.accept(new IllegalStateException("Tried to complete already-completed call")); - } - } - } - - public StreamObserver doPutCustom( - final StreamObserver responseObserverSimple) { - ServerCallStreamObserver responseObserver = - (ServerCallStreamObserver) responseObserverSimple; - responseObserver.disableAutoInboundFlowControl(); - responseObserver.request(1); - - final StreamPipe ackStream = - StreamPipe.wrap( - responseObserver, PutResult::toProtocol, this::handleExceptionWithMiddleware); - final FlightStream fs = - new FlightStream( - allocator, - PENDING_REQUESTS, - /* server-upload streams are not cancellable */ null, - responseObserver::request); - // When the ackStream is completed, the FlightStream will be closed with it - ackStream.setAutoCloseable(fs); - final StreamObserver observer = fs.asObserver(); - Future unused = - executors.submit( - () -> { - try { - producer.acceptPut(makeContext(responseObserver), fs, ackStream).run(); - } catch (Throwable ex) { - ackStream.onError(ex); - } finally { - // ARROW-6136: Close the stream if and only if acceptPut hasn't closed it itself - // We don't do this for other streams since the implementation may be asynchronous - ackStream.ensureCompleted(); - } - }); - - return observer; - } - - @Override - public void getFlightInfo( - Flight.FlightDescriptor request, StreamObserver responseObserver) { - final FlightInfo info; - try { - info = - producer.getFlightInfo( - makeContext((ServerCallStreamObserver) responseObserver), - new FlightDescriptor(request)); - } catch (Exception ex) { - // Don't capture exceptions from onNext or onCompleted with this block - because then we can't - // call onError - responseObserver.onError(StatusUtils.toGrpcException(ex)); - return; - } - responseObserver.onNext(info.toProtocol()); - responseObserver.onCompleted(); - } - - @Override - public void pollFlightInfo( - Flight.FlightDescriptor request, StreamObserver responseObserver) { - final PollInfo info; - try { - info = - producer.pollFlightInfo( - makeContext((ServerCallStreamObserver) responseObserver), - new FlightDescriptor(request)); - } catch (Exception ex) { - // Don't capture exceptions from onNext or onCompleted with this block - because then we can't - // call onError - responseObserver.onError(StatusUtils.toGrpcException(ex)); - return; - } - responseObserver.onNext(info.toProtocol()); - responseObserver.onCompleted(); - } - - /** Broadcast the given exception to all registered middleware. */ - private void handleExceptionWithMiddleware(Throwable t) { - final Map, FlightServerMiddleware> middleware = - ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get(); - if (middleware == null || middleware.isEmpty()) { - logger.error("Uncaught exception in Flight method body", t); - return; - } - middleware.forEach((k, v) -> v.onCallErrored(t)); - } - - @Override - public void getSchema( - Flight.FlightDescriptor request, StreamObserver responseObserver) { - try { - SchemaResult result = - producer.getSchema( - makeContext((ServerCallStreamObserver) responseObserver), - new FlightDescriptor(request)); - responseObserver.onNext(result.toProtocol()); - responseObserver.onCompleted(); - } catch (Exception ex) { - responseObserver.onError(StatusUtils.toGrpcException(ex)); - } - } - - /** Ensures that other resources are cleaned up when the service finishes its call. */ - private static class ExchangeListener extends GetListener { - - private AutoCloseable resource; - private boolean closed = false; - private Runnable onCancelHandler = null; - - public ExchangeListener( - ServerCallStreamObserver responseObserver, Consumer errorHandler) { - super(responseObserver, errorHandler); - this.resource = null; - super.setOnCancelHandler( - () -> { - try { - if (onCancelHandler != null) { - onCancelHandler.run(); - } - } finally { - cleanup(); - } - }); - } - - private void cleanup() { - if (closed) { - // Prevent double-free. gRPC will call the OnCancelHandler even on a normal call end, which - // means that - // we'll double-free without this guard. - return; - } - closed = true; - try { - AutoCloseables.close(resource); - } catch (Exception e) { - throw CallStatus.INTERNAL - .withCause(e) - .withDescription("Server internal error cleaning up resources") - .toRuntimeException(); - } - } - - @Override - public void error(Throwable ex) { - try { - this.cleanup(); - } finally { - super.error(ex); - } - } - - @Override - public void completed() { - try { - this.cleanup(); - } finally { - super.completed(); - } - } - - @Override - public void setOnCancelHandler(Runnable handler) { - onCancelHandler = handler; - } - } - - public StreamObserver doExchangeCustom( - StreamObserver responseObserverSimple) { - final ServerCallStreamObserver responseObserver = - (ServerCallStreamObserver) responseObserverSimple; - final ExchangeListener listener = - new ExchangeListener(responseObserver, this::handleExceptionWithMiddleware); - final FlightStream fs = - new FlightStream( - allocator, - PENDING_REQUESTS, - /* server-upload streams are not cancellable */ null, - responseObserver::request); - // When service completes the call, this cleans up the FlightStream - listener.resource = fs; - responseObserver.disableAutoInboundFlowControl(); - responseObserver.request(1); - final StreamObserver observer = fs.asObserver(); - try { - Future unused = - executors.submit( - () -> { - try { - producer.doExchange(makeContext(responseObserver), fs, listener); - } catch (Exception ex) { - listener.error(ex); - } - // We do not clean up or close anything here, to allow long-running asynchronous - // implementations. - // It is the service's responsibility to call completed() or error(), which will - // then clean up the FlightStream. - }); - } catch (Exception ex) { - listener.error(ex); - } - return observer; - } - - /** Call context for the service. */ - static class CallContext implements FlightProducer.CallContext { - - private final String peerIdentity; - private final BooleanSupplier isCancelled; - - CallContext(final String peerIdentity, BooleanSupplier isCancelled) { - this.peerIdentity = peerIdentity; - this.isCancelled = isCancelled; - } - - @Override - public String peerIdentity() { - return peerIdentity; - } - - @Override - public boolean isCancelled() { - return this.isCancelled.getAsBoolean(); - } - - @Override - public T getMiddleware(FlightServerMiddleware.Key key) { - final Map, FlightServerMiddleware> middleware = - ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get(); - if (middleware == null) { - return null; - } - final FlightServerMiddleware m = middleware.get(key); - if (m == null) { - return null; - } - @SuppressWarnings("unchecked") - final T result = (T) m; - return result; - } - - @Override - public Map, FlightServerMiddleware> getMiddleware() { - final Map, FlightServerMiddleware> middleware = - ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get(); - if (middleware == null) { - return Collections.emptyMap(); - } - // This is an unmodifiable map - return middleware; - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java deleted file mode 100644 index b2720e319424f..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** A status code describing the result of a Flight call. */ -public enum FlightStatusCode { - /** The call completed successfully. Generally clients will not see this, but middleware may. */ - OK, - /** - * An unknown error occurred. This may also be the result of an implementation error on the - * server-side; by default, unhandled server exceptions result in this code. - */ - UNKNOWN, - /** An internal/implementation error occurred. */ - INTERNAL, - /** One or more of the given arguments was invalid. */ - INVALID_ARGUMENT, - /** The operation timed out. */ - TIMED_OUT, - /** The operation describes a resource that does not exist. */ - NOT_FOUND, - /** The operation creates a resource that already exists. */ - ALREADY_EXISTS, - /** The operation was cancelled. */ - CANCELLED, - /** The client was not authenticated. */ - UNAUTHENTICATED, - /** The client did not have permission to make the call. */ - UNAUTHORIZED, - /** The requested operation is not implemented. */ - UNIMPLEMENTED, - /** - * The server cannot currently handle the request. This should be used for retriable requests, - * i.e. the server should send this code only if it has not done any work. - */ - UNAVAILABLE, - /** - * Some resource has been exhausted, perhaps a per-user quota, or perhaps the entire file system - * is out of space. (see: https://grpc.github.io/grpc/core/md_doc_statuscodes.html) - */ - RESOURCE_EXHAUSTED; - - /** Create a blank {@link CallStatus} with this code. */ - public CallStatus toStatus() { - return new CallStatus(this); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java deleted file mode 100644 index 15cfd6ba8547a..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java +++ /dev/null @@ -1,535 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.common.util.concurrent.SettableFuture; -import io.grpc.stub.StreamObserver; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.arrow.flight.ArrowMessage.HeaderType; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.VisibleForTesting; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.DictionaryUtility; -import org.apache.arrow.vector.validate.MetadataV4UnionChecker; - -/** An adaptor between protobuf streams and flight data streams. */ -public class FlightStream implements AutoCloseable { - // Use AutoCloseable sentinel objects to simplify logic in #close - private final AutoCloseable DONE = - new AutoCloseable() { - @Override - public void close() throws Exception {} - }; - private final AutoCloseable DONE_EX = - new AutoCloseable() { - @Override - public void close() throws Exception {} - }; - - private final BufferAllocator allocator; - private final Cancellable cancellable; - private final LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); - private final SettableFuture root = SettableFuture.create(); - private final SettableFuture descriptor = SettableFuture.create(); - private final int pendingTarget; - private final Requestor requestor; - // The completion flags. - // This flag is only updated as the user iterates through the data, i.e. it tracks whether the - // user has read all the - // data and closed the stream - final CompletableFuture completed; - // This flag is immediately updated when gRPC signals that the server has ended the call. This is - // used to make sure - // we don't block forever trying to write to a server that has rejected a call. - final CompletableFuture cancelled; - - private final AtomicInteger pending = new AtomicInteger(); - private volatile VectorSchemaRoot fulfilledRoot; - private DictionaryProvider.MapDictionaryProvider dictionaries; - private volatile VectorLoader loader; - private volatile Throwable ex; - private volatile ArrowBuf applicationMetadata = null; - @VisibleForTesting volatile MetadataVersion metadataVersion = null; - - /** - * Constructs a new instance. - * - * @param allocator The allocator to use for creating/reallocating buffers for Vectors. - * @param pendingTarget Target number of messages to receive. - * @param cancellable Used to cancel mid-stream requests. - * @param requestor A callback to determine how many pending items there are. - */ - public FlightStream( - BufferAllocator allocator, int pendingTarget, Cancellable cancellable, Requestor requestor) { - Objects.requireNonNull(allocator); - Objects.requireNonNull(requestor); - this.allocator = allocator; - this.pendingTarget = pendingTarget; - this.cancellable = cancellable; - this.requestor = requestor; - this.dictionaries = new DictionaryProvider.MapDictionaryProvider(); - this.completed = new CompletableFuture<>(); - this.cancelled = new CompletableFuture<>(); - } - - /** Get the schema for this stream. Blocks until the schema is available. */ - public Schema getSchema() { - return getRoot().getSchema(); - } - - /** - * Get the provider for dictionaries in this stream. - * - *

    Does NOT retain a reference to the underlying dictionaries. Dictionaries may be updated as - * the stream is read. This method is intended for stream processing, where the application code - * will not retain references to values after the stream is closed. - * - * @throws IllegalStateException if {@link #takeDictionaryOwnership()} was called - * @see #takeDictionaryOwnership() - */ - public DictionaryProvider getDictionaryProvider() { - if (dictionaries == null) { - throw new IllegalStateException("Dictionary ownership was claimed by the application."); - } - return dictionaries; - } - - /** - * Get an owned reference to the dictionaries in this stream. Should be called after finishing - * reading the stream, but before closing. - * - *

    If called, the client is responsible for closing the dictionaries in this provider. Can only - * be called once. - * - * @return The dictionary provider for the stream. - * @throws IllegalStateException if called more than once. - */ - public DictionaryProvider takeDictionaryOwnership() { - if (dictionaries == null) { - throw new IllegalStateException("Dictionary ownership was claimed by the application."); - } - // Swap out the provider so it is not closed - final DictionaryProvider provider = dictionaries; - dictionaries = null; - return provider; - } - - /** - * Get the descriptor for this stream. Only applicable on the server side of a DoPut operation. - * Will block until the client sends the descriptor. - */ - public FlightDescriptor getDescriptor() { - // This blocks until the first message from the client is received. - try { - return descriptor.get(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw CallStatus.INTERNAL.withCause(e).withDescription("Interrupted").toRuntimeException(); - } catch (ExecutionException e) { - throw CallStatus.INTERNAL - .withCause(e) - .withDescription("Error getting descriptor") - .toRuntimeException(); - } - } - - /** - * Closes the stream (freeing any existing resources). - * - *

    If the stream isn't complete and is cancellable, this method will cancel and drain the - * stream first. - */ - @Override - public void close() throws Exception { - final List closeables = new ArrayList<>(); - Throwable suppressor = null; - if (cancellable != null) { - // Client-side stream. Cancel the call, to help ensure gRPC doesn't deliver a message after - // close() ends. - // On the server side, we can't rely on draining the stream , because this gRPC bug means the - // completion callback - // may never run https://github.com/grpc/grpc-java/issues/5882 - try { - synchronized (cancellable) { - if (!cancelled.isDone()) { - // Only cancel if the call is not done on the gRPC side - cancellable.cancel("Stream closed before end", /* no exception to report */ null); - } - } - // Drain the stream without the lock (as next() implicitly needs the lock) - while (next()) {} - } catch (FlightRuntimeException e) { - suppressor = e; - } - } - // Perform these operations under a lock. This way the observer can't enqueue new messages while - // we're in the - // middle of cleanup. This should only be a concern for server-side streams since client-side - // streams are drained - // by the lambda above. - synchronized (completed) { - try { - if (fulfilledRoot != null) { - closeables.add(fulfilledRoot); - } - closeables.add(applicationMetadata); - closeables.addAll(queue); - if (dictionaries != null) { - dictionaries - .getDictionaryIds() - .forEach(id -> closeables.add(dictionaries.lookup(id).getVector())); - } - if (suppressor != null) { - AutoCloseables.close(suppressor, closeables); - } else { - AutoCloseables.close(closeables); - } - // Remove any metadata after closing to prevent negative refcnt - applicationMetadata = null; - } finally { - // The value of this CompletableFuture is meaningless, only whether it's completed (or has - // an exception) - // No-op if already complete - completed.complete(null); - } - } - } - - /** - * Blocking request to load next item into list. - * - * @return Whether or not more data was found. - */ - public boolean next() { - try { - if (completed.isDone() && queue.isEmpty()) { - return false; - } - - pending.decrementAndGet(); - requestOutstanding(); - - Object data = queue.take(); - if (DONE == data) { - queue.put(DONE); - // Other code ignores the value of this CompletableFuture, only whether it's completed (or - // has an exception) - completed.complete(null); - return false; - } else if (DONE_EX == data) { - queue.put(DONE_EX); - if (ex instanceof Exception) { - throw (Exception) ex; - } else { - throw new Exception(ex); - } - } else { - try (ArrowMessage msg = ((ArrowMessage) data)) { - if (msg.getMessageType() == HeaderType.NONE) { - updateMetadata(msg); - // We received a message without data, so erase any leftover data - if (fulfilledRoot != null) { - fulfilledRoot.clear(); - } - } else if (msg.getMessageType() == HeaderType.RECORD_BATCH) { - checkMetadataVersion(msg); - // Ensure we have the root - root.get().clear(); - try (ArrowRecordBatch arb = msg.asRecordBatch()) { - loader.load(arb); - } - updateMetadata(msg); - } else if (msg.getMessageType() == HeaderType.DICTIONARY_BATCH) { - checkMetadataVersion(msg); - // Ensure we have the root - root.get().clear(); - try (ArrowDictionaryBatch arb = msg.asDictionaryBatch()) { - final long id = arb.getDictionaryId(); - if (dictionaries == null) { - throw new IllegalStateException( - "Dictionary ownership was claimed by the application."); - } - final Dictionary dictionary = dictionaries.lookup(id); - if (dictionary == null) { - throw new IllegalArgumentException("Dictionary not defined in schema: ID " + id); - } - - final FieldVector vector = dictionary.getVector(); - final VectorSchemaRoot dictionaryRoot = - new VectorSchemaRoot( - Collections.singletonList(vector.getField()), - Collections.singletonList(vector), - 0); - final VectorLoader dictionaryLoader = new VectorLoader(dictionaryRoot); - dictionaryLoader.load(arb.getDictionary()); - } - return next(); - } else { - throw new UnsupportedOperationException( - "Message type is unsupported: " + msg.getMessageType()); - } - return true; - } - } - } catch (RuntimeException e) { - throw e; - } catch (ExecutionException e) { - throw StatusUtils.fromThrowable(e.getCause()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** Update our metadata reference with a new one from this message. */ - private void updateMetadata(ArrowMessage msg) { - if (this.applicationMetadata != null) { - this.applicationMetadata.close(); - } - this.applicationMetadata = msg.getApplicationMetadata(); - if (this.applicationMetadata != null) { - this.applicationMetadata.getReferenceManager().retain(); - } - } - - /** Ensure the Arrow metadata version doesn't change mid-stream. */ - private void checkMetadataVersion(ArrowMessage msg) { - if (msg.asSchemaMessage() == null) { - return; - } - MetadataVersion receivedVersion = - MetadataVersion.fromFlatbufID(msg.asSchemaMessage().getMessage().version()); - if (this.metadataVersion != receivedVersion) { - throw new IllegalStateException( - "Metadata version mismatch: stream started as " - + this.metadataVersion - + " but got message with version " - + receivedVersion); - } - } - - /** - * Get the current vector data from the stream. - * - *

    The data in the root may change at any time. Clients should NOT modify the root, but instead - * unload the data into their own root. - * - * @throws FlightRuntimeException if there was an error reading the schema from the stream. - */ - public VectorSchemaRoot getRoot() { - try { - return root.get(); - } catch (InterruptedException e) { - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } catch (ExecutionException e) { - throw StatusUtils.fromThrowable(e.getCause()); - } - } - - /** - * Check if there is a root (i.e. whether the other end has started sending data). - * - *

    Updated by calls to {@link #next()}. - * - * @return true if and only if the other end has started sending data. - */ - public boolean hasRoot() { - return root.isDone(); - } - - /** - * Get the most recent metadata sent from the server. This may be cleared by calls to {@link - * #next()} if the server sends a message without metadata. This does NOT take ownership of the - * buffer - call retain() to create a reference if you need the buffer after a call to {@link - * #next()}. - * - * @return the application metadata. May be null. - */ - public ArrowBuf getLatestMetadata() { - return applicationMetadata; - } - - private synchronized void requestOutstanding() { - if (pending.get() < pendingTarget) { - requestor.request(pendingTarget - pending.get()); - pending.set(pendingTarget); - } - } - - private class Observer implements StreamObserver { - - Observer() { - super(); - } - - /** Helper to add an item to the queue under the appropriate lock. */ - private void enqueue(AutoCloseable message) { - synchronized (completed) { - if (completed.isDone()) { - // The stream is already closed (RPC ended), discard the message - AutoCloseables.closeNoChecked(message); - } else { - queue.add(message); - } - } - } - - @Override - public void onNext(ArrowMessage msg) { - // Operations here have to be under a lock so that we don't add a message to the queue while - // in the middle of - // close(). - requestOutstanding(); - switch (msg.getMessageType()) { - case NONE: - { - // No IPC message - pure metadata or descriptor - if (msg.getDescriptor() != null) { - descriptor.set(new FlightDescriptor(msg.getDescriptor())); - } - if (msg.getApplicationMetadata() != null) { - enqueue(msg); - } - break; - } - case SCHEMA: - { - Schema schema = msg.asSchema(); - - // if there is app metadata in the schema message, make sure - // that we don't leak it. - ArrowBuf meta = msg.getApplicationMetadata(); - if (meta != null) { - meta.close(); - } - - final List fields = new ArrayList<>(); - final Map dictionaryMap = new HashMap<>(); - for (final Field originalField : schema.getFields()) { - final Field updatedField = - DictionaryUtility.toMemoryFormat(originalField, allocator, dictionaryMap); - fields.add(updatedField); - } - for (final Map.Entry entry : dictionaryMap.entrySet()) { - dictionaries.put(entry.getValue()); - } - schema = new Schema(fields, schema.getCustomMetadata()); - metadataVersion = - MetadataVersion.fromFlatbufID(msg.asSchemaMessage().getMessage().version()); - try { - MetadataV4UnionChecker.checkRead(schema, metadataVersion); - } catch (IOException e) { - ex = e; - enqueue(DONE_EX); - break; - } - - synchronized (completed) { - if (!completed.isDone()) { - fulfilledRoot = VectorSchemaRoot.create(schema, allocator); - loader = new VectorLoader(fulfilledRoot); - if (msg.getDescriptor() != null) { - descriptor.set(new FlightDescriptor(msg.getDescriptor())); - } - root.set(fulfilledRoot); - } - } - break; - } - case RECORD_BATCH: - case DICTIONARY_BATCH: - enqueue(msg); - break; - case TENSOR: - default: - ex = - new UnsupportedOperationException( - "Unable to handle message of type: " + msg.getMessageType()); - enqueue(DONE_EX); - } - } - - @Override - public void onError(Throwable t) { - ex = StatusUtils.fromThrowable(t); - queue.add(DONE_EX); - cancelled.complete(null); - root.setException(ex); - } - - @Override - public void onCompleted() { - // Depends on gRPC calling onNext and onCompleted non-concurrently - cancelled.complete(null); - queue.add(DONE); - } - } - - /** - * Cancels sending the stream to a client. - * - *

    Callers should drain the stream (with {@link #next()}) to ensure all messages sent before - * cancellation are received and to wait for the underlying transport to acknowledge cancellation. - */ - public void cancel(String message, Throwable exception) { - if (cancellable == null) { - throw new UnsupportedOperationException( - "Streams cannot be cancelled that are produced by client. " - + "Instead, server should reject incoming messages."); - } - cancellable.cancel(message, exception); - // Do not mark the stream as completed, as gRPC may still be delivering messages. - } - - StreamObserver asObserver() { - return new Observer(); - } - - /** Provides a callback to cancel a process that is in progress. */ - @FunctionalInterface - public interface Cancellable { - void cancel(String message, Throwable exception); - } - - /** Provides a interface to request more items from a stream producer. */ - @FunctionalInterface - public interface Requestor { - /** Requests count more messages from the instance of this object. */ - void request(int count); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsRequest.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsRequest.java deleted file mode 100644 index d516ba5df4980..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsRequest.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.nio.ByteBuffer; -import org.apache.arrow.flight.impl.Flight; - -/** A request to get current session options. */ -public class GetSessionOptionsRequest { - public GetSessionOptionsRequest() {} - - GetSessionOptionsRequest(Flight.GetSessionOptionsRequest proto) {} - - Flight.GetSessionOptionsRequest toProtocol() { - return Flight.GetSessionOptionsRequest.getDefaultInstance(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static GetSessionOptionsRequest deserialize(ByteBuffer serialized) throws IOException { - return new GetSessionOptionsRequest(Flight.GetSessionOptionsRequest.parseFrom(serialized)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsResult.java deleted file mode 100644 index 618b8c700801a..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsResult.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.arrow.flight.impl.Flight; - -/** A request to view the currently-set options for the current server session. */ -public class GetSessionOptionsResult { - private final Map sessionOptions; - - public GetSessionOptionsResult(Map sessionOptions) { - this.sessionOptions = Collections.unmodifiableMap(new HashMap(sessionOptions)); - } - - GetSessionOptionsResult(Flight.GetSessionOptionsResult proto) { - sessionOptions = - Collections.unmodifiableMap( - proto.getSessionOptionsMap().entrySet().stream() - .collect( - Collectors.toMap( - Map.Entry::getKey, - (e) -> SessionOptionValueFactory.makeSessionOptionValue(e.getValue())))); - } - - /** - * Get the session options map contained in the request. - * - * @return An immutable view of the session options map. - */ - public Map getSessionOptions() { - return sessionOptions; - } - - Flight.GetSessionOptionsResult toProtocol() { - Flight.GetSessionOptionsResult.Builder b = Flight.GetSessionOptionsResult.newBuilder(); - b.putAllSessionOptions( - sessionOptions.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().toProtocol()))); - return b.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static GetSessionOptionsResult deserialize(ByteBuffer serialized) throws IOException { - return new GetSessionOptionsResult(Flight.GetSessionOptionsResult.parseFrom(serialized)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/HeaderCallOption.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/HeaderCallOption.java deleted file mode 100644 index 34824bf05d1a5..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/HeaderCallOption.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import io.grpc.Metadata; -import io.grpc.stub.AbstractStub; -import io.grpc.stub.MetadataUtils; - -/** Method option for supplying headers to method calls. */ -public class HeaderCallOption implements CallOptions.GrpcCallOption { - private final Metadata propertiesMetadata = new Metadata(); - - /** - * Header property constructor. - * - * @param headers the headers that should be sent across. If a header is a string, it should only - * be valid ASCII characters. Binary headers should end in "-bin". - */ - public HeaderCallOption(CallHeaders headers) { - for (String key : headers.keys()) { - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - final Metadata.Key metaKey = Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER); - headers.getAllByte(key).forEach(v -> propertiesMetadata.put(metaKey, v)); - } else { - final Metadata.Key metaKey = Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER); - headers.getAll(key).forEach(v -> propertiesMetadata.put(metaKey, v)); - } - } - } - - @Override - public > T wrapStub(T stub) { - return stub.withInterceptors(MetadataUtils.newAttachHeadersInterceptor(propertiesMetadata)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java deleted file mode 100644 index d76fbb47b3929..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.lang.reflect.InvocationTargetException; -import java.net.InetSocketAddress; -import java.net.SocketAddress; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Objects; -import org.apache.arrow.flight.impl.Flight; - -/** A URI where a Flight stream is available. */ -public class Location { - private final URI uri; - - /** - * Constructs a new instance. - * - * @param uri the URI of the Flight service - * @throws IllegalArgumentException if the URI scheme is unsupported - */ - public Location(String uri) throws URISyntaxException { - this(new URI(uri)); - } - - /** - * Construct a new instance from an existing URI. - * - * @param uri the URI of the Flight service - */ - public Location(URI uri) { - super(); - Objects.requireNonNull(uri); - this.uri = uri; - } - - public URI getUri() { - return uri; - } - - /** - * Helper method to turn this Location into a SocketAddress. - * - * @return null if could not be converted - */ - public SocketAddress toSocketAddress() { - switch (uri.getScheme()) { - case LocationSchemes.GRPC: - case LocationSchemes.GRPC_TLS: - case LocationSchemes.GRPC_INSECURE: - { - return new InetSocketAddress(uri.getHost(), uri.getPort()); - } - - case LocationSchemes.GRPC_DOMAIN_SOCKET: - { - try { - // This dependency is not available on non-Unix platforms. - return Class.forName("io.netty.channel.unix.DomainSocketAddress") - .asSubclass(SocketAddress.class) - .getConstructor(String.class) - .newInstance(uri.getPath()); - } catch (InstantiationException - | ClassNotFoundException - | InvocationTargetException - | NoSuchMethodException - | IllegalAccessException e) { - return null; - } - } - - default: - { - return null; - } - } - } - - /** Convert this Location into its protocol-level representation. */ - Flight.Location toProtocol() { - return Flight.Location.newBuilder().setUri(uri.toString()).build(); - } - - /** - * Construct a special URI to indicate to clients that they may fetch data by reusing an existing - * connection to a Flight RPC server. - */ - public static Location reuseConnection() { - try { - return new Location(new URI(LocationSchemes.REUSE_CONNECTION, "", "", "", null)); - } catch (URISyntaxException e) { - // This should never happen. - throw new IllegalArgumentException(e); - } - } - - /** - * Construct a URI for a Flight+gRPC server without transport security. - * - * @throws IllegalArgumentException if the constructed URI is invalid. - */ - public static Location forGrpcInsecure(String host, int port) { - try { - return new Location( - new URI(LocationSchemes.GRPC_INSECURE, null, host, port, null, null, null)); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } - - /** - * Construct a URI for a Flight+gRPC server with transport security. - * - * @throws IllegalArgumentException if the constructed URI is invalid. - */ - public static Location forGrpcTls(String host, int port) { - try { - return new Location(new URI(LocationSchemes.GRPC_TLS, null, host, port, null, null, null)); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } - - /** - * Construct a URI for a Flight+gRPC server over a Unix domain socket. - * - * @throws IllegalArgumentException if the constructed URI is invalid. - */ - public static Location forGrpcDomainSocket(String path) { - try { - return new Location(new URI(LocationSchemes.GRPC_DOMAIN_SOCKET, null, path, null)); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } - - @Override - public String toString() { - return "Location{" + "uri=" + uri + '}'; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof Location)) { - return false; - } - Location location = (Location) o; - return uri.equals(location.uri); - } - - @Override - public int hashCode() { - return Objects.hash(uri); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java deleted file mode 100644 index 0c340452be2ba..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** Constants representing well-known URI schemes for Flight services. */ -public final class LocationSchemes { - public static final String GRPC = "grpc"; - public static final String GRPC_INSECURE = "grpc+tcp"; - public static final String GRPC_DOMAIN_SOCKET = "grpc+unix"; - public static final String GRPC_TLS = "grpc+tls"; - public static final String REUSE_CONNECTION = "arrow-flight-reuse-connection"; - - private LocationSchemes() { - throw new AssertionError("Do not instantiate this class."); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpFlightProducer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpFlightProducer.java deleted file mode 100644 index 6d968e174b338..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpFlightProducer.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** A {@link FlightProducer} that throws on all operations. */ -public class NoOpFlightProducer implements FlightProducer { - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) { - listener.onError( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - listener.onError( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public void listActions(CallContext context, StreamListener listener) { - listener.onError( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpSessionOptionValueVisitor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpSessionOptionValueVisitor.java deleted file mode 100644 index 34695139107eb..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpSessionOptionValueVisitor.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** - * A helper to facilitate easier anonymous subclass declaration. - * - *

    Implementations need only override callbacks for types they wish to do something with. - * - * @param Return type of the visit operation. - */ -public class NoOpSessionOptionValueVisitor implements SessionOptionValueVisitor { - /** A callback to handle SessionOptionValue containing a String. */ - public T visit(String value) { - return null; - } - - /** A callback to handle SessionOptionValue containing a boolean. */ - public T visit(boolean value) { - return null; - } - - /** A callback to handle SessionOptionValue containing a long. */ - public T visit(long value) { - return null; - } - - /** A callback to handle SessionOptionValue containing a double. */ - public T visit(double value) { - return null; - } - - /** A callback to handle SessionOptionValue containing an array of String. */ - public T visit(String[] value) { - return null; - } - - /** - * A callback to handle SessionOptionValue containing no value. - * - *

    By convention, an attempt to set a valueless SessionOptionValue should attempt to unset or - * clear the named option value on the server. - */ - public T visit(Void value) { - return null; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpStreamListener.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpStreamListener.java deleted file mode 100644 index 87081bbe389df..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpStreamListener.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import org.apache.arrow.flight.FlightProducer.StreamListener; - -/** - * A {@link StreamListener} that does nothing for all callbacks. - * - * @param The type of the callback object. - */ -public class NoOpStreamListener implements StreamListener { - private static NoOpStreamListener INSTANCE = new NoOpStreamListener(); - - /** Ignores the value received. */ - @Override - public void onNext(T val) {} - - /** Ignores the error received. */ - @Override - public void onError(Throwable t) {} - - /** Ignores the stream completion event. */ - @Override - public void onCompleted() {} - - @SuppressWarnings("unchecked") - public static StreamListener getInstance() { - // Safe because we never use T - return (StreamListener) INSTANCE; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java deleted file mode 100644 index 94b01b2bfc545..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.IpcOption; - -/** An interface for writing data to a peer, client or server. */ -public interface OutboundStreamListener { - - /** - * A hint indicating whether the client is ready to receive data without excessive buffering. - * - *

    Writers should poll this flag before sending data to respect backpressure from the client - * and avoid sending data faster than the client can handle. Ignoring this flag may mean that the - * server will start consuming excessive amounts of memory, as it may buffer messages in memory. - */ - boolean isReady(); - - /** - * Set a callback for when the listener is ready for new calls to putNext(), i.e. {@link - * #isReady()} has become true. - * - *

    Note that this callback may only be called some time after {@link #isReady()} becomes true, - * and may never be called if all executor threads on the server are busy, or the RPC method body - * is implemented in a blocking fashion. Note that isReady() must still be checked after the - * callback is run as it may have been run spuriously. - */ - default void setOnReadyHandler(Runnable handler) { - throw new UnsupportedOperationException("Not yet implemented."); - } - - /** - * Start sending data, using the schema of the given {@link VectorSchemaRoot}. - * - *

    This method must be called before all others, except {@link #putMetadata(ArrowBuf)}. - */ - default void start(VectorSchemaRoot root) { - start(root, null, IpcOption.DEFAULT); - } - - /** - * Start sending data, using the schema of the given {@link VectorSchemaRoot}. - * - *

    This method must be called before all others, except {@link #putMetadata(ArrowBuf)}. - */ - default void start(VectorSchemaRoot root, DictionaryProvider dictionaries) { - start(root, dictionaries, IpcOption.DEFAULT); - } - - /** - * Start sending data, using the schema of the given {@link VectorSchemaRoot}. - * - *

    This method must be called before all others, except {@link #putMetadata(ArrowBuf)}. - */ - void start(VectorSchemaRoot root, DictionaryProvider dictionaries, IpcOption option); - - /** - * Send the current contents of the associated {@link VectorSchemaRoot}. - * - *

    This will not necessarily block until the message is actually sent; it may buffer messages - * in memory. Use {@link #isReady()} to check if there is backpressure and avoid excessive - * buffering. - */ - void putNext(); - - /** - * Send the current contents of the associated {@link VectorSchemaRoot} alongside - * application-defined metadata. - * - * @param metadata The metadata to send. Ownership of the buffer is transferred to the Flight - * implementation. - */ - void putNext(ArrowBuf metadata); - - /** - * Send a pure metadata message without any associated data. - * - *

    This may be called without starting the stream. - */ - void putMetadata(ArrowBuf metadata); - - /** - * Indicate an error to the client. Terminates the stream; do not call {@link #completed()} - * afterwards. - */ - void error(Throwable ex); - - /** Indicate that transmission is finished. */ - void completed(); - - /** - * Toggle whether to use the zero-copy write optimization. - * - *

    By default or when disabled, Arrow may copy data into a buffer for the underlying - * implementation to send. When enabled, Arrow will instead try to directly enqueue the Arrow - * buffer for sending. Not all implementations support this optimization, so even if enabled, you - * may not see a difference. - * - *

    In this mode, buffers must not be reused after they are written with {@link #putNext()}. For - * example, you would have to call {@link VectorSchemaRoot#allocateNew()} after every call to - * {@link #putNext()}. Hence, this is not enabled by default. - * - *

    The default value can be toggled globally by setting the JVM property - * arrow.flight.enable_zero_copy_write or the environment variable - * ARROW_FLIGHT_ENABLE_ZERO_COPY_WRITE. - */ - default void setUseZeroCopy(boolean enabled) {} -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListenerImpl.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListenerImpl.java deleted file mode 100644 index a1bde3a848e42..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListenerImpl.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import io.grpc.stub.CallStreamObserver; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.IpcOption; - -/** A base class for writing Arrow data to a Flight stream. */ -abstract class OutboundStreamListenerImpl implements OutboundStreamListener { - private final FlightDescriptor descriptor; // nullable - protected final CallStreamObserver responseObserver; - protected volatile VectorUnloader unloader; // null until stream started - protected IpcOption option; // null until stream started - protected boolean tryZeroCopy = ArrowMessage.ENABLE_ZERO_COPY_WRITE; - - OutboundStreamListenerImpl( - FlightDescriptor descriptor, CallStreamObserver responseObserver) { - Preconditions.checkNotNull(responseObserver, "responseObserver must be provided"); - this.descriptor = descriptor; - this.responseObserver = responseObserver; - this.unloader = null; - } - - @Override - public boolean isReady() { - return responseObserver.isReady(); - } - - @Override - public void setOnReadyHandler(Runnable handler) { - responseObserver.setOnReadyHandler(handler); - } - - @Override - public void start(VectorSchemaRoot root, DictionaryProvider dictionaries, IpcOption option) { - this.option = option; - try { - DictionaryUtils.generateSchemaMessages( - root.getSchema(), descriptor, dictionaries, option, responseObserver::onNext); - } catch (RuntimeException e) { - // Propagate runtime exceptions, like those raised when trying to write unions with V4 - // metadata - throw e; - } catch (Exception e) { - // Only happens if closing buffers somehow fails - indicates application is an unknown state - // so propagate - // the exception - throw new RuntimeException("Could not generate and send all schema messages", e); - } - // We include the null count and align buffers to be compatible with Flight/C++ - unloader = new VectorUnloader(root, /* includeNullCount */ true, /* alignBuffers */ true); - } - - @Override - public void putNext() { - putNext(null); - } - - /** - * Busy-wait until the stream is ready. - * - *

    This is overridable as client/server have different behavior. - */ - protected abstract void waitUntilStreamReady(); - - @Override - public void putNext(ArrowBuf metadata) { - if (unloader == null) { - throw CallStatus.INTERNAL - .withDescription("Stream was not started, call start()") - .toRuntimeException(); - } - - waitUntilStreamReady(); - // close is a no-op if the message has been written to gRPC, otherwise frees the associated - // buffers - // in some code paths (e.g. if the call is cancelled), gRPC does not write the message, so we - // need to clean up - // ourselves. Normally, writing the ArrowMessage will transfer ownership of the data to - // gRPC/Netty. - try (final ArrowMessage message = - new ArrowMessage(unloader.getRecordBatch(), metadata, tryZeroCopy, option)) { - responseObserver.onNext(message); - } catch (Exception e) { - // This exception comes from ArrowMessage#close, not responseObserver#onNext. - // Generally this should not happen - ArrowMessage's implementation only closes non-throwing - // things. - // The user can't reasonably do anything about this, but if something does throw, we shouldn't - // let - // execution continue since other state (e.g. allocators) may be in an odd state. - throw new RuntimeException("Could not free ArrowMessage", e); - } - } - - @Override - public void putMetadata(ArrowBuf metadata) { - waitUntilStreamReady(); - try (final ArrowMessage message = new ArrowMessage(metadata)) { - responseObserver.onNext(message); - } catch (Exception e) { - throw StatusUtils.fromThrowable(e); - } - } - - @Override - public void error(Throwable ex) { - responseObserver.onError(StatusUtils.toGrpcException(ex)); - } - - @Override - public void completed() { - responseObserver.onCompleted(); - } - - @Override - public void setUseZeroCopy(boolean enabled) { - tryZeroCopy = enabled; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PollInfo.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PollInfo.java deleted file mode 100644 index 906bb5385241b..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PollInfo.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.protobuf.Timestamp; -import com.google.protobuf.util.Timestamps; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.time.Instant; -import java.util.Objects; -import java.util.Optional; -import org.apache.arrow.flight.impl.Flight; - -/** A POJO representation of the execution of a long-running query. */ -public class PollInfo { - private final FlightInfo flightInfo; - private final FlightDescriptor flightDescriptor; - private final Double progress; - private final Instant expirationTime; - - /** - * Create a new PollInfo. - * - * @param flightInfo The FlightInfo (must not be null). - * @param flightDescriptor The descriptor used to poll for more information; null if and only if - * query is finished. - * @param progress Optional progress info in [0.0, 1.0]. - * @param expirationTime An expiration time, after which the server may no longer recognize the - * descriptor. - */ - public PollInfo( - FlightInfo flightInfo, - FlightDescriptor flightDescriptor, - Double progress, - Instant expirationTime) { - this.flightInfo = Objects.requireNonNull(flightInfo); - this.flightDescriptor = flightDescriptor; - this.progress = progress; - this.expirationTime = expirationTime; - } - - PollInfo(Flight.PollInfo flt) throws URISyntaxException { - this.flightInfo = new FlightInfo(flt.getInfo()); - this.flightDescriptor = - flt.hasFlightDescriptor() ? new FlightDescriptor(flt.getFlightDescriptor()) : null; - this.progress = flt.hasProgress() ? flt.getProgress() : null; - this.expirationTime = - flt.hasExpirationTime() - ? Instant.ofEpochSecond( - flt.getExpirationTime().getSeconds(), Timestamps.toNanos(flt.getExpirationTime())) - : null; - } - - /** - * The FlightInfo describing the result set of the execution of a query. - * - *

    This is always present and always contains all endpoints for the query execution so far, not - * just new endpoints that completed execution since the last call to {@link - * FlightClient#pollInfo(FlightDescriptor, CallOption...)}. - */ - public FlightInfo getFlightInfo() { - return flightInfo; - } - - /** - * The FlightDescriptor that should be used to get further updates on this query. - * - *

    It is present if and only if the query is still running. If present, it should be passed to - * {@link FlightClient#pollInfo(FlightDescriptor, CallOption...)} to get an update. - */ - public Optional getFlightDescriptor() { - return Optional.ofNullable(flightDescriptor); - } - - /** - * The progress of the query. - * - *

    If present, should be a value in [0.0, 1.0]. It is not necessarily monotonic or - * non-decreasing. - */ - public Optional getProgress() { - return Optional.ofNullable(progress); - } - - /** - * The expiration time of the query execution. - * - *

    After this passes, the server may not recognize the descriptor anymore and the client will - * not be able to track the query anymore. - */ - public Optional getExpirationTime() { - return Optional.ofNullable(expirationTime); - } - - Flight.PollInfo toProtocol() { - Flight.PollInfo.Builder b = Flight.PollInfo.newBuilder(); - b.setInfo(flightInfo.toProtocol()); - if (flightDescriptor != null) { - b.setFlightDescriptor(flightDescriptor.toProtocol()); - } - if (progress != null) { - b.setProgress(progress); - } - if (expirationTime != null) { - b.setExpirationTime( - Timestamp.newBuilder() - .setSeconds(expirationTime.getEpochSecond()) - .setNanos(expirationTime.getNano()) - .build()); - } - return b.build(); - } - - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - public static PollInfo deserialize(ByteBuffer serialized) throws IOException, URISyntaxException { - return new PollInfo(Flight.PollInfo.parseFrom(serialized)); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof PollInfo)) { - return false; - } - PollInfo pollInfo = (PollInfo) o; - return Objects.equals(getFlightInfo(), pollInfo.getFlightInfo()) - && Objects.equals(getFlightDescriptor(), pollInfo.getFlightDescriptor()) - && Objects.equals(getProgress(), pollInfo.getProgress()) - && Objects.equals(getExpirationTime(), pollInfo.getExpirationTime()); - } - - @Override - public int hashCode() { - return Objects.hash(getFlightInfo(), getFlightDescriptor(), getProgress(), getExpirationTime()); - } - - @Override - public String toString() { - return "PollInfo{" - + "flightInfo=" - + flightInfo - + ", flightDescriptor=" - + flightDescriptor - + ", progress=" - + progress - + ", expirationTime=" - + expirationTime - + '}'; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PutResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PutResult.java deleted file mode 100644 index c0bf6549339d7..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PutResult.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.protobuf.ByteString; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; - -/** - * A message from the server during a DoPut operation. - * - *

    This object owns an {@link ArrowBuf} and should be closed when you are done with it. - */ -public class PutResult implements AutoCloseable { - - private ArrowBuf applicationMetadata; - - private PutResult(ArrowBuf metadata) { - applicationMetadata = metadata; - } - - /** - * Create a PutResult with application-specific metadata. - * - *

    This method assumes ownership of the {@link ArrowBuf}. - */ - public static PutResult metadata(ArrowBuf metadata) { - if (metadata == null) { - return empty(); - } - return new PutResult(metadata); - } - - /** Create an empty PutResult. */ - public static PutResult empty() { - return new PutResult(null); - } - - /** - * Get the metadata in this message. May be null. - * - *

    Ownership of the {@link ArrowBuf} is retained by this object. Call {@link - * ReferenceManager#retain()} to preserve a reference. - */ - public ArrowBuf getApplicationMetadata() { - return applicationMetadata; - } - - Flight.PutResult toProtocol() { - if (applicationMetadata == null) { - return Flight.PutResult.getDefaultInstance(); - } - return Flight.PutResult.newBuilder() - .setAppMetadata(ByteString.copyFrom(applicationMetadata.nioBuffer())) - .build(); - } - - /** - * Construct a PutResult from a Protobuf message. - * - * @param allocator The allocator to use for allocating application metadata memory. The result - * object owns the allocated buffer, if any. - * @param message The gRPC/Protobuf message. - */ - static PutResult fromProtocol(BufferAllocator allocator, Flight.PutResult message) { - final ArrowBuf buf = allocator.buffer(message.getAppMetadata().size()); - message - .getAppMetadata() - .asReadOnlyByteBufferList() - .forEach( - bb -> { - buf.setBytes(buf.writerIndex(), bb); - buf.writerIndex(buf.writerIndex() + bb.limit()); - }); - return new PutResult(buf); - } - - @Override - public void close() { - if (applicationMetadata != null) { - applicationMetadata.close(); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RenewFlightEndpointRequest.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RenewFlightEndpointRequest.java deleted file mode 100644 index f6c02a167bd85..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RenewFlightEndpointRequest.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.util.Objects; -import org.apache.arrow.flight.impl.Flight; - -/** A request to extend the expiration time of a FlightEndpoint. */ -public class RenewFlightEndpointRequest { - private final FlightEndpoint endpoint; - - public RenewFlightEndpointRequest(FlightEndpoint endpoint) { - this.endpoint = Objects.requireNonNull(endpoint); - } - - RenewFlightEndpointRequest(Flight.RenewFlightEndpointRequest proto) throws URISyntaxException { - this(new FlightEndpoint(proto.getEndpoint())); - } - - public FlightEndpoint getFlightEndpoint() { - return endpoint; - } - - Flight.RenewFlightEndpointRequest toProtocol() { - Flight.RenewFlightEndpointRequest.Builder b = Flight.RenewFlightEndpointRequest.newBuilder(); - b.setEndpoint(endpoint.toProtocol()); - return b.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static RenewFlightEndpointRequest deserialize(ByteBuffer serialized) - throws IOException, URISyntaxException { - return new RenewFlightEndpointRequest(Flight.RenewFlightEndpointRequest.parseFrom(serialized)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RequestContext.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RequestContext.java deleted file mode 100644 index e453e2f2642be..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/RequestContext.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.Set; - -/** Tracks variables about the current request. */ -public interface RequestContext { - /** - * Register a variable and a value. - * - * @param key the variable name. - * @param value the value. - */ - void put(String key, String value); - - /** - * Retrieve a registered variable. - * - * @param key the variable name. - * @return the value, or null if not found. - */ - String get(String key); - - /** - * Retrieves the keys that have been registered to this context. - * - * @return the keys used in this context. - */ - Set keySet(); - - /** - * Deletes a registered variable. - * - * @return the value associated with the deleted variable, or null if the key doesn't exist. - */ - String remove(String key); -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Result.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Result.java deleted file mode 100644 index 143951f8bc196..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Result.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.protobuf.ByteString; -import org.apache.arrow.flight.impl.Flight; - -/** - * Opaque result returned after executing an action. - * - *

    POJO wrapper around the Flight protocol buffer message sharing the same name. - */ -public class Result { - - private final byte[] body; - - public Result(byte[] body) { - this.body = body; - } - - Result(Flight.Result result) { - this.body = result.getBody().toByteArray(); - } - - public byte[] getBody() { - return body; - } - - Flight.Result toProtocol() { - return Flight.Result.newBuilder().setBody(ByteString.copyFrom(body)).build(); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SchemaResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SchemaResult.java deleted file mode 100644 index ee837dad11061..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SchemaResult.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; -import com.google.common.collect.ImmutableList; -import com.google.protobuf.ByteString; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.Channels; -import java.util.Objects; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.WriteChannel; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.validate.MetadataV4UnionChecker; - -/** - * Opaque result returned after executing a getSchema request. - * - *

    POJO wrapper around the Flight protocol buffer message sharing the same name. - */ -public class SchemaResult { - - private final Schema schema; - private final IpcOption option; - - public SchemaResult(Schema schema) { - this(schema, IpcOption.DEFAULT); - } - - /** Create a schema result with specific IPC options for serialization. */ - public SchemaResult(Schema schema, IpcOption option) { - Objects.requireNonNull(schema); - MetadataV4UnionChecker.checkForUnion(schema.getFields().iterator(), option.metadataVersion); - this.schema = schema; - this.option = option; - } - - public Schema getSchema() { - return schema; - } - - /** Converts to the protocol buffer representation. */ - Flight.SchemaResult toProtocol() { - // Encode schema in a Message payload - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try { - MessageSerializer.serialize(new WriteChannel(Channels.newChannel(baos)), schema, option); - } catch (IOException e) { - throw new RuntimeException(e); - } - return Flight.SchemaResult.newBuilder() - .setSchema(ByteString.copyFrom(baos.toByteArray())) - .build(); - } - - /** Converts from the protocol buffer representation. */ - static SchemaResult fromProtocol(Flight.SchemaResult pbSchemaResult) { - try { - final ByteBuffer schemaBuf = pbSchemaResult.getSchema().asReadOnlyByteBuffer(); - Schema schema = - pbSchemaResult.getSchema().size() > 0 - ? MessageSerializer.deserializeSchema( - new ReadChannel(Channels.newChannel(new ByteBufferBackedInputStream(schemaBuf)))) - : new Schema(ImmutableList.of()); - return new SchemaResult(schema); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerHeaderMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerHeaderMiddleware.java deleted file mode 100644 index 23c1f46d793b0..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerHeaderMiddleware.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** Middleware that's used to extract and pass headers to the server during requests. */ -public class ServerHeaderMiddleware implements FlightServerMiddleware { - /** Factory for accessing ServerHeaderMiddleware. */ - public static class Factory implements FlightServerMiddleware.Factory { - /** Construct a factory for receiving call headers. */ - public Factory() {} - - @Override - public ServerHeaderMiddleware onCallStarted( - CallInfo callInfo, CallHeaders incomingHeaders, RequestContext context) { - return new ServerHeaderMiddleware(incomingHeaders); - } - } - - private final CallHeaders headers; - - private ServerHeaderMiddleware(CallHeaders incomingHeaders) { - this.headers = incomingHeaders; - } - - /** Retrieve the headers for this call. */ - public CallHeaders headers() { - return headers; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {} - - @Override - public void onCallCompleted(CallStatus status) {} - - @Override - public void onCallErrored(Throwable err) {} -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java deleted file mode 100644 index 47fd6f1366272..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; - -/** Middleware for handling Flight SQL Sessions including session cookie handling. */ -public class ServerSessionMiddleware implements FlightServerMiddleware { - Factory factory; - boolean existingSession; - private Session session; - private String closedSessionId = null; - - public static final String sessionCookieName = "arrow_flight_session_id"; - - /** Factory for managing and accessing ServerSessionMiddleware. */ - public static class Factory implements FlightServerMiddleware.Factory { - private final ConcurrentMap sessionStore = new ConcurrentHashMap<>(); - private final Callable idGenerator; - - /** - * Construct a factory for ServerSessionMiddleware. - * - *

    Factory manages and accesses persistent sessions based on HTTP cookies. - * - * @param idGenerator A Callable returning unique session id Strings. - */ - public Factory(Callable idGenerator) { - this.idGenerator = idGenerator; - } - - private synchronized Session createNewSession() { - String id; - try { - id = idGenerator.call(); - } catch (Exception ignored) { - // Most impls aren't going to throw so don't make caller handle a nonexistent checked - // exception - throw CallStatus.INTERNAL.withDescription("Session creation error").toRuntimeException(); - } - - Session newSession = new Session(id); - if (sessionStore.putIfAbsent(id, newSession) != null) { - // Collision, should never happen - throw CallStatus.INTERNAL.withDescription("Session creation error").toRuntimeException(); - } - return newSession; - } - - private void closeSession(String id) { - if (sessionStore.remove(id) == null) { - throw CallStatus.NOT_FOUND - .withDescription("Session id '" + id + "' not found.") - .toRuntimeException(); - } - } - - @Override - public ServerSessionMiddleware onCallStarted( - CallInfo callInfo, CallHeaders incomingHeaders, RequestContext context) { - String sessionId = null; - - final Iterable it = incomingHeaders.getAll("cookie"); - if (it != null) { - findIdCookie: - for (final String headerValue : it) { - for (final String cookie : headerValue.split(" ;")) { - final String[] cookiePair = cookie.split("="); - if (cookiePair.length != 2) { - // Soft failure: Ignore invalid cookie list field - break; - } - - if (sessionCookieName.equals(cookiePair[0]) && cookiePair[1].length() > 0) { - sessionId = cookiePair[1]; - break findIdCookie; - } - } - } - } - - if (sessionId == null) { - // No session cookie, create middleware instance without session. - return new ServerSessionMiddleware(this, incomingHeaders, null); - } - - Session session = sessionStore.get(sessionId); - // Cookie provided by caller, but invalid - if (session == null) { - // Can't soft-fail/proceed here, clients will get unexpected behaviour without options they - // thought were set. - throw CallStatus.NOT_FOUND - .withDescription("Invalid " + sessionCookieName + " cookie.") - .toRuntimeException(); - } - - return new ServerSessionMiddleware(this, incomingHeaders, session); - } - } - - /** A thread-safe container for named SessionOptionValues. */ - public static class Session { - public final String id; - private ConcurrentMap sessionData = - new ConcurrentHashMap(); - - /** - * Construct a new Session with the given id. - * - * @param id The Session's id string, which is used as the session cookie value. - */ - private Session(String id) { - this.id = id; - } - - /** Get session option by name, or null if it does not exist. */ - public SessionOptionValue getSessionOption(String name) { - return sessionData.get(name); - } - - /** Get an immutable copy of the session options map. */ - public Map getSessionOptions() { - return Collections.unmodifiableMap(new HashMap(sessionData)); - } - - /** Set session option by name to given value. */ - public void setSessionOption(String name, SessionOptionValue value) { - sessionData.put(name, value); - } - - /** Idempotently remove name from this session. */ - public void eraseSessionOption(String name) { - sessionData.remove(name); - } - } - - private final CallHeaders headers; - - private ServerSessionMiddleware( - ServerSessionMiddleware.Factory factory, CallHeaders incomingHeaders, Session session) { - this.factory = factory; - headers = incomingHeaders; - this.session = session; - existingSession = (session != null); - } - - /** - * Check if there is an open session associated with this call. - * - * @return True iff there is an open session associated with this call. - */ - public boolean hasSession() { - return session != null; - } - - /** - * Get the existing or new session value map for this call. - * - * @return The session option value map, or null in case of an id generation collision. - */ - public synchronized Session getSession() { - if (session == null) { - session = factory.createNewSession(); - } - - return session; - } - - /** - * Close the current session. - * - *

    It is an error to call this without a valid session specified via cookie or equivalent. - */ - public synchronized void closeSession() { - if (session == null) { - throw CallStatus.NOT_FOUND - .withDescription("No session found for the current call.") - .toRuntimeException(); - } - factory.closeSession(session.id); - closedSessionId = session.id; - session = null; - } - - public CallHeaders getCallHeaders() { - return headers; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - if (!existingSession && session != null) { - outgoingHeaders.insert("set-cookie", sessionCookieName + "=" + session.id); - } - if (closedSessionId != null) { - outgoingHeaders.insert( - "set-cookie", sessionCookieName + "=" + closedSessionId + "; Max-Age=0"); - } - } - - @Override - public void onCallCompleted(CallStatus status) {} - - @Override - public void onCallErrored(Throwable err) {} -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValue.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValue.java deleted file mode 100644 index 5f342501fb0ad..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValue.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.Arrays; -import org.apache.arrow.flight.impl.Flight; - -/** A union-like container interface for supported session option value types. */ -public abstract class SessionOptionValue { - SessionOptionValue() {} - - /** Value access via a caller-provided visitor/functor. */ - public abstract T acceptVisitor(SessionOptionValueVisitor v); - - Flight.SessionOptionValue toProtocol() { - Flight.SessionOptionValue.Builder b = Flight.SessionOptionValue.newBuilder(); - SessionOptionValueToProtocolVisitor visitor = new SessionOptionValueToProtocolVisitor(b); - this.acceptVisitor(visitor); - return b.build(); - } - - /** Check whether the SessionOptionValue is empty/valueless. */ - public boolean isEmpty() { - return false; - } - - private class SessionOptionValueToProtocolVisitor implements SessionOptionValueVisitor { - final Flight.SessionOptionValue.Builder b; - - SessionOptionValueToProtocolVisitor(Flight.SessionOptionValue.Builder b) { - this.b = b; - } - - @Override - public Void visit(String value) { - b.setStringValue(value); - return null; - } - - @Override - public Void visit(boolean value) { - b.setBoolValue(value); - return null; - } - - @Override - public Void visit(long value) { - b.setInt64Value(value); - return null; - } - - @Override - public Void visit(double value) { - b.setDoubleValue(value); - return null; - } - - @Override - public Void visit(String[] value) { - Flight.SessionOptionValue.StringListValue.Builder pbSLVBuilder = - Flight.SessionOptionValue.StringListValue.newBuilder(); - pbSLVBuilder.addAllValues(Arrays.asList(value)); - b.setStringListValue(pbSLVBuilder.build()); - return null; - } - - @Override - public Void visit(Void ignored) { - b.clearOptionValue(); - return null; - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueFactory.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueFactory.java deleted file mode 100644 index aff7dffddd487..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueFactory.java +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import org.apache.arrow.flight.impl.Flight; - -/** Abstract factory for concrete SessionOptionValue instances. */ -public class SessionOptionValueFactory { - public static SessionOptionValue makeSessionOptionValue(String value) { - return new SessionOptionValueString(value); - } - - public static SessionOptionValue makeSessionOptionValue(boolean value) { - return new SessionOptionValueBoolean(value); - } - - public static SessionOptionValue makeSessionOptionValue(long value) { - return new SessionOptionValueLong(value); - } - - public static SessionOptionValue makeSessionOptionValue(double value) { - return new SessionOptionValueDouble(value); - } - - public static SessionOptionValue makeSessionOptionValue(String[] value) { - return new SessionOptionValueStringList(value); - } - - public static SessionOptionValue makeEmptySessionOptionValue() { - return new SessionOptionValueEmpty(); - } - - /** Construct a SessionOptionValue from its Protobuf object representation. */ - public static SessionOptionValue makeSessionOptionValue(Flight.SessionOptionValue proto) { - switch (proto.getOptionValueCase()) { - case STRING_VALUE: - return new SessionOptionValueString(proto.getStringValue()); - case BOOL_VALUE: - return new SessionOptionValueBoolean(proto.getBoolValue()); - case INT64_VALUE: - return new SessionOptionValueLong(proto.getInt64Value()); - case DOUBLE_VALUE: - return new SessionOptionValueDouble(proto.getDoubleValue()); - case STRING_LIST_VALUE: - // Using ByteString::toByteArray() here otherwise we still somehow get `ByteArray`s with - // broken .equals(String) - return new SessionOptionValueStringList( - proto.getStringListValue().getValuesList().asByteStringList().stream() - .map((e) -> new String(e.toByteArray(), StandardCharsets.UTF_8)) - .toArray(String[]::new)); - case OPTIONVALUE_NOT_SET: - return new SessionOptionValueEmpty(); - default: - // Unreachable - throw new IllegalArgumentException(""); - } - } - - private static class SessionOptionValueString extends SessionOptionValue { - private final String value; - - SessionOptionValueString(String value) { - this.value = value; - } - - @Override - public T acceptVisitor(SessionOptionValueVisitor v) { - return v.visit(value); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - SessionOptionValueString that = (SessionOptionValueString) o; - return value.equals(that.value); - } - - @Override - public int hashCode() { - return value.hashCode(); - } - - @Override - public String toString() { - return '"' + value + '"'; - } - } - - private static class SessionOptionValueBoolean extends SessionOptionValue { - private final boolean value; - - SessionOptionValueBoolean(boolean value) { - this.value = value; - } - - @Override - public T acceptVisitor(SessionOptionValueVisitor v) { - return v.visit(value); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - SessionOptionValueBoolean that = (SessionOptionValueBoolean) o; - return value == that.value; - } - - @Override - public int hashCode() { - return Boolean.hashCode(value); - } - - @Override - public String toString() { - return String.valueOf(value); - } - } - - private static class SessionOptionValueLong extends SessionOptionValue { - private final long value; - - SessionOptionValueLong(long value) { - this.value = value; - } - - @Override - public T acceptVisitor(SessionOptionValueVisitor v) { - return v.visit(value); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - SessionOptionValueLong that = (SessionOptionValueLong) o; - return value == that.value; - } - - @Override - public int hashCode() { - return Long.hashCode(value); - } - - @Override - public String toString() { - return String.valueOf(value); - } - } - - private static class SessionOptionValueDouble extends SessionOptionValue { - private final double value; - - SessionOptionValueDouble(double value) { - this.value = value; - } - - @Override - public T acceptVisitor(SessionOptionValueVisitor v) { - return v.visit(value); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - SessionOptionValueDouble that = (SessionOptionValueDouble) o; - return value == that.value; - } - - @Override - public int hashCode() { - return Double.hashCode(value); - } - - @Override - public String toString() { - return String.valueOf(value); - } - } - - private static class SessionOptionValueStringList extends SessionOptionValue { - private final String[] value; - - SessionOptionValueStringList(String[] value) { - this.value = value.clone(); - } - - @Override - public T acceptVisitor(SessionOptionValueVisitor v) { - return v.visit(value); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - SessionOptionValueStringList that = (SessionOptionValueStringList) o; - return Arrays.deepEquals(value, that.value); - } - - @Override - public int hashCode() { - return Arrays.deepHashCode(value); - } - - @Override - public String toString() { - if (value.length == 0) { - return "[]"; - } - return "[\"" + String.join("\", \"", value) + "\"]"; - } - } - - private static class SessionOptionValueEmpty extends SessionOptionValue { - @Override - public T acceptVisitor(SessionOptionValueVisitor v) { - return v.visit((Void) null); - } - - @Override - public boolean isEmpty() { - return true; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - return true; - } - - @Override - public int hashCode() { - return SessionOptionValueEmpty.class.hashCode(); - } - - @Override - public String toString() { - return ""; - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueVisitor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueVisitor.java deleted file mode 100644 index 324219f2eae51..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueVisitor.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -/** - * A visitor interface to access SessionOptionValue's contained value. - * - * @param Return type of the visit operation. - */ -public interface SessionOptionValueVisitor { - /** A callback to handle SessionOptionValue containing a String. */ - T visit(String value); - - /** A callback to handle SessionOptionValue containing a boolean. */ - T visit(boolean value); - - /** A callback to handle SessionOptionValue containing a long. */ - T visit(long value); - - /** A callback to handle SessionOptionValue containing a double. */ - T visit(double value); - - /** A callback to handle SessionOptionValue containing an array of String. */ - T visit(String[] value); - - /** - * A callback to handle SessionOptionValue containing no value. - * - *

    By convention, an attempt to set a valueless SessionOptionValue should attempt to unset or - * clear the named option value on the server. - */ - T visit(Void value); -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsRequest.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsRequest.java deleted file mode 100644 index 2bd5718eb5070..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsRequest.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.arrow.flight.impl.Flight; - -/** A request to set option(s) in an existing or implicitly-created server session. */ -public class SetSessionOptionsRequest { - private final Map sessionOptions; - - public SetSessionOptionsRequest(Map sessionOptions) { - this.sessionOptions = - Collections.unmodifiableMap(new HashMap(sessionOptions)); - } - - SetSessionOptionsRequest(Flight.SetSessionOptionsRequest proto) { - sessionOptions = - Collections.unmodifiableMap( - proto.getSessionOptionsMap().entrySet().stream() - .collect( - Collectors.toMap( - Map.Entry::getKey, - (e) -> SessionOptionValueFactory.makeSessionOptionValue(e.getValue())))); - } - - /** - * Get the session option map from the request. - * - * @return An immutable view of the session options map. - */ - public Map getSessionOptions() { - return Collections.unmodifiableMap(sessionOptions); - } - - Flight.SetSessionOptionsRequest toProtocol() { - Flight.SetSessionOptionsRequest.Builder b = Flight.SetSessionOptionsRequest.newBuilder(); - b.putAllSessionOptions( - sessionOptions.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().toProtocol()))); - return b.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static SetSessionOptionsRequest deserialize(ByteBuffer serialized) throws IOException { - return new SetSessionOptionsRequest(Flight.SetSessionOptionsRequest.parseFrom(serialized)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsResult.java deleted file mode 100644 index 8ea953fda3cd0..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsResult.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.arrow.flight.impl.Flight; - -/** The result of attempting to set a set of session options. */ -public class SetSessionOptionsResult { - /** Error status value for per-option errors. */ - public enum ErrorValue { - /** - * The status of setting the option is unknown. Servers should avoid using this value (send a - * NOT_FOUND error if the requested session is not known). Clients can retry the request. - */ - UNSPECIFIED, - /** The given session option name is invalid. */ - INVALID_NAME, - /** The session option value or type is invalid. */ - INVALID_VALUE, - /** The session option cannot be set. */ - ERROR, - ; - - static ErrorValue fromProtocol(Flight.SetSessionOptionsResult.ErrorValue s) { - return values()[s.getNumber()]; - } - - Flight.SetSessionOptionsResult.ErrorValue toProtocol() { - return Flight.SetSessionOptionsResult.ErrorValue.values()[ordinal()]; - } - } - - /** Per-option extensible error response container. */ - public static class Error { - public ErrorValue value; - - public Error(ErrorValue value) { - this.value = value; - } - - Error(Flight.SetSessionOptionsResult.Error e) { - value = ErrorValue.fromProtocol(e.getValue()); - } - - Flight.SetSessionOptionsResult.Error toProtocol() { - Flight.SetSessionOptionsResult.Error.Builder b = - Flight.SetSessionOptionsResult.Error.newBuilder(); - b.setValue(value.toProtocol()); - return b.build(); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - Error that = (Error) o; - return value == that.value; - } - - @Override - public int hashCode() { - return value.hashCode(); - } - } - - private final Map errors; - - public SetSessionOptionsResult(Map errors) { - this.errors = Collections.unmodifiableMap(new HashMap(errors)); - } - - SetSessionOptionsResult(Flight.SetSessionOptionsResult proto) { - errors = - Collections.unmodifiableMap( - proto.getErrors().entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, (e) -> new Error(e.getValue())))); - } - - /** Report whether the error map has nonzero length. */ - public boolean hasErrors() { - return errors.size() > 0; - } - - /** - * Get the error status map from the result object. - * - * @return An immutable view of the error status map. - */ - public Map getErrors() { - return errors; - } - - Flight.SetSessionOptionsResult toProtocol() { - Flight.SetSessionOptionsResult.Builder b = Flight.SetSessionOptionsResult.newBuilder(); - b.putAllErrors( - errors.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, (e) -> e.getValue().toProtocol()))); - return b.build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the message, as returned by {@link #serialize()}. - * @return The deserialized message. - * @throws IOException if the serialized form is invalid. - */ - public static SetSessionOptionsResult deserialize(ByteBuffer serialized) throws IOException { - return new SetSessionOptionsResult(Flight.SetSessionOptionsResult.parseFrom(serialized)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/StreamPipe.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/StreamPipe.java deleted file mode 100644 index 3e8d097da6751..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/StreamPipe.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import io.grpc.stub.StreamObserver; -import java.util.function.Consumer; -import java.util.function.Function; -import org.apache.arrow.flight.FlightProducer.StreamListener; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.util.AutoCloseables; - -/** - * Shim listener to avoid exposing GRPC internals. - * - * @param From Type - * @param To Type - */ -class StreamPipe implements StreamListener { - - private final StreamObserver delegate; - private final Function mapFunction; - private final Consumer errorHandler; - private AutoCloseable resource; - private boolean closed = false; - - /** - * Wrap the given gRPC StreamObserver with a transformation function. - * - * @param delegate The {@link StreamObserver} to wrap. - * @param func The transformation function. - * @param errorHandler A handler for uncaught exceptions (e.g. if something tries to double-close - * this stream). - * @param The source type. - * @param The output type. - * @return A wrapped listener. - */ - public static StreamPipe wrap( - StreamObserver delegate, Function func, Consumer errorHandler) { - return new StreamPipe<>(delegate, func, errorHandler); - } - - public StreamPipe( - StreamObserver delegate, Function func, Consumer errorHandler) { - super(); - this.delegate = delegate; - this.mapFunction = func; - this.errorHandler = errorHandler; - this.resource = null; - } - - /** Set an AutoCloseable resource to be cleaned up when the gRPC observer is to be completed. */ - void setAutoCloseable(AutoCloseable ac) { - resource = ac; - } - - @Override - public void onNext(FROM val) { - delegate.onNext(mapFunction.apply(val)); - } - - @Override - public void onError(Throwable t) { - if (closed) { - errorHandler.accept(t); - return; - } - try { - AutoCloseables.close(resource); - } catch (Exception e) { - errorHandler.accept(e); - } finally { - // Set closed to true in case onError throws, so that we don't try to close again - closed = true; - delegate.onError(StatusUtils.toGrpcException(t)); - } - } - - @Override - public void onCompleted() { - if (closed) { - errorHandler.accept(new IllegalStateException("Tried to complete already-completed call")); - return; - } - try { - AutoCloseables.close(resource); - } catch (Exception e) { - errorHandler.accept(e); - } finally { - // Set closed to true in case onCompleted throws, so that we don't try to close again - closed = true; - delegate.onCompleted(); - } - } - - /** Ensure this stream has been completed. */ - void ensureCompleted() { - if (!closed) { - onCompleted(); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SyncPutListener.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SyncPutListener.java deleted file mode 100644 index 0dd460f1fd02f..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SyncPutListener.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.memory.ArrowBuf; - -/** - * A listener for server-sent application metadata messages during a Flight DoPut. This class wraps - * the messages in a synchronous interface. - */ -public final class SyncPutListener implements FlightClient.PutListener, AutoCloseable { - - private final LinkedBlockingQueue queue; - private final CompletableFuture completed; - private static final Object DONE = new Object(); - private static final Object DONE_WITH_EXCEPTION = new Object(); - - public SyncPutListener() { - queue = new LinkedBlockingQueue<>(); - completed = new CompletableFuture<>(); - } - - private PutResult unwrap(Object queueItem) throws InterruptedException, ExecutionException { - if (queueItem == DONE) { - queue.put(queueItem); - return null; - } else if (queueItem == DONE_WITH_EXCEPTION) { - queue.put(queueItem); - completed.get(); - } - return (PutResult) queueItem; - } - - /** - * Get the next message from the server, blocking until it is available. - * - * @return The next message, or null if the server is done sending messages. The caller assumes - * ownership of the metadata and must remember to close it. - * @throws InterruptedException if interrupted while waiting. - * @throws ExecutionException if the server sent an error, or if there was an internal error. - */ - public PutResult read() throws InterruptedException, ExecutionException { - return unwrap(queue.take()); - } - - /** - * Get the next message from the server, blocking for the specified amount of time until it is - * available. - * - * @return The next message, or null if the server is done sending messages or no message arrived - * before the timeout. The caller assumes ownership of the metadata and must remember to close - * it. - * @throws InterruptedException if interrupted while waiting. - * @throws ExecutionException if the server sent an error, or if there was an internal error. - */ - public PutResult poll(long timeout, TimeUnit unit) - throws InterruptedException, ExecutionException { - return unwrap(queue.poll(timeout, unit)); - } - - @Override - public void getResult() { - try { - completed.get(); - } catch (ExecutionException e) { - throw StatusUtils.fromThrowable(e.getCause()); - } catch (InterruptedException e) { - throw StatusUtils.fromThrowable(e); - } - } - - @Override - public void onNext(PutResult val) { - final ArrowBuf metadata = val.getApplicationMetadata(); - metadata.getReferenceManager().retain(); - queue.add(PutResult.metadata(metadata)); - } - - @Override - public void onError(Throwable t) { - completed.completeExceptionally(StatusUtils.fromThrowable(t)); - queue.add(DONE_WITH_EXCEPTION); - } - - @Override - public void onCompleted() { - completed.complete(null); - queue.add(DONE); - } - - @Override - public void close() { - queue.forEach( - o -> { - if (o instanceof PutResult) { - ((PutResult) o).close(); - } - }); - } - - @Override - public boolean isCancelled() { - return completed.isDone(); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java deleted file mode 100644 index 4a7fcda99f7b1..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import com.google.protobuf.ByteString; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Arrays; -import org.apache.arrow.flight.impl.Flight; - -/** Endpoint for a particular stream. */ -public class Ticket { - private final byte[] bytes; - - public Ticket(byte[] bytes) { - super(); - this.bytes = bytes; - } - - public byte[] getBytes() { - return bytes; - } - - Ticket(org.apache.arrow.flight.impl.Flight.Ticket ticket) { - this.bytes = ticket.getTicket().toByteArray(); - } - - Flight.Ticket toProtocol() { - return Flight.Ticket.newBuilder().setTicket(ByteString.copyFrom(bytes)).build(); - } - - /** - * Get the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing non-Flight services to still return Flight - * types. - */ - public ByteBuffer serialize() { - return ByteBuffer.wrap(toProtocol().toByteArray()); - } - - /** - * Parse the serialized form of this protocol message. - * - *

    Intended to help interoperability by allowing Flight clients to obtain stream info from - * non-Flight services. - * - * @param serialized The serialized form of the Ticket, as returned by {@link #serialize()}. - * @return The deserialized Ticket. - * @throws IOException if the serialized form is invalid. - */ - public static Ticket deserialize(ByteBuffer serialized) throws IOException { - return new Ticket(Flight.Ticket.parseFrom(serialized)); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + Arrays.hashCode(bytes); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof Ticket)) { - return false; - } - Ticket other = (Ticket) obj; - if (!Arrays.equals(bytes, other.bytes)) { - return false; - } - return true; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java deleted file mode 100644 index 1a775f33ed6e9..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import io.grpc.Context; -import io.grpc.Metadata; -import io.grpc.Metadata.BinaryMarshaller; -import io.grpc.MethodDescriptor; -import org.apache.arrow.flight.FlightConstants; - -/** Constants used in authorization of flight connections. */ -public final class AuthConstants { - - public static final String HANDSHAKE_DESCRIPTOR_NAME = - MethodDescriptor.generateFullMethodName(FlightConstants.SERVICE, "Handshake"); - public static final String TOKEN_NAME = "Auth-Token-bin"; - public static final Metadata.Key TOKEN_KEY = - Metadata.Key.of( - TOKEN_NAME, - new BinaryMarshaller() { - - @Override - public byte[] toBytes(byte[] value) { - return value; - } - - @Override - public byte[] parseBytes(byte[] serialized) { - return serialized; - } - }); - - public static final Context.Key PEER_IDENTITY_KEY = - Context.keyWithDefault("arrow-flight-peer-identity", ""); - - private AuthConstants() {} -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicClientAuthHandler.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicClientAuthHandler.java deleted file mode 100644 index a0fdf059bdce0..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicClientAuthHandler.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import java.util.Iterator; -import org.apache.arrow.flight.impl.Flight.BasicAuth; - -/** A client auth handler that supports username and password. */ -public class BasicClientAuthHandler implements ClientAuthHandler { - - private final String name; - private final String password; - private byte[] token = null; - - public BasicClientAuthHandler(String name, String password) { - this.name = name; - this.password = password; - } - - @Override - public void authenticate(ClientAuthSender outgoing, Iterator incoming) { - BasicAuth.Builder builder = BasicAuth.newBuilder(); - if (name != null) { - builder.setUsername(name); - } - - if (password != null) { - builder.setPassword(password); - } - - outgoing.send(builder.build().toByteArray()); - this.token = incoming.next(); - } - - @Override - public byte[] getCallToken() { - return token; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicServerAuthHandler.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicServerAuthHandler.java deleted file mode 100644 index d69039c48c034..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/BasicServerAuthHandler.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import com.google.protobuf.InvalidProtocolBufferException; -import java.util.Iterator; -import java.util.Optional; -import org.apache.arrow.flight.impl.Flight.BasicAuth; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** A ServerAuthHandler for username/password authentication. */ -public class BasicServerAuthHandler implements ServerAuthHandler { - - private static final Logger logger = LoggerFactory.getLogger(BasicServerAuthHandler.class); - private final BasicAuthValidator authValidator; - - public BasicServerAuthHandler(BasicAuthValidator authValidator) { - super(); - this.authValidator = authValidator; - } - - /** Interface that this handler delegates for determining if credentials are valid. */ - public interface BasicAuthValidator { - - byte[] getToken(String username, String password) throws Exception; - - Optional isValid(byte[] token); - } - - @Override - public boolean authenticate(ServerAuthSender outgoing, Iterator incoming) { - byte[] bytes = incoming.next(); - try { - BasicAuth auth = BasicAuth.parseFrom(bytes); - byte[] token = authValidator.getToken(auth.getUsername(), auth.getPassword()); - outgoing.send(token); - return true; - } catch (InvalidProtocolBufferException e) { - logger.debug("Failure parsing auth message.", e); - } catch (Exception e) { - logger.debug("Unknown error during authorization.", e); - } - - return false; - } - - @Override - public Optional isValid(byte[] token) { - return authValidator.isValid(token); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthHandler.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthHandler.java deleted file mode 100644 index 3ca94006d8dbf..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthHandler.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import java.util.Iterator; -import org.apache.arrow.flight.FlightClient; - -/** - * Implement authentication for Flight on the client side. - * - * @deprecated As of 14.0.0. This implements a stateful "login" flow that does not play well with - * distributed or stateless systems. It will not be removed, but should not be used. Instead see - * {@link FlightClient#authenticateBasicToken(String, String)}. - */ -@Deprecated -public interface ClientAuthHandler { - /** - * Handle the initial handshake with the server. - * - * @param outgoing A channel to send data to the server. - * @param incoming An iterator of incoming data from the server. - */ - void authenticate(ClientAuthSender outgoing, Iterator incoming); - - /** Get the per-call authentication token. */ - byte[] getCallToken(); - - /** A communication channel to the server during initial connection. */ - interface ClientAuthSender { - - /** Send the server a message. */ - void send(byte[] payload); - - /** Signal an error to the server and abort the authentication attempt. */ - void onError(Throwable cause); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthInterceptor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthInterceptor.java deleted file mode 100644 index 3cdddeddb7982..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthInterceptor.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import io.grpc.CallOptions; -import io.grpc.Channel; -import io.grpc.ClientCall; -import io.grpc.ClientInterceptor; -import io.grpc.ForwardingClientCall.SimpleForwardingClientCall; -import io.grpc.Metadata; -import io.grpc.MethodDescriptor; - -/** GRPC client intercepter that handles authentication with the server. */ -public class ClientAuthInterceptor implements ClientInterceptor { - private volatile ClientAuthHandler authHandler = null; - - public void setAuthHandler(ClientAuthHandler authHandler) { - this.authHandler = authHandler; - } - - public ClientAuthInterceptor() {} - - public boolean hasAuthHandler() { - return authHandler != null; - } - - @Override - public ClientCall interceptCall( - MethodDescriptor methodDescriptor, CallOptions callOptions, Channel next) { - ClientCall call = next.newCall(methodDescriptor, callOptions); - - // once we have an auth header, add that to the calls. - if (authHandler != null) { - call = new HeaderAttachingClientCall<>(call); - } - - return call; - } - - private final class HeaderAttachingClientCall - extends SimpleForwardingClientCall { - - private HeaderAttachingClientCall(ClientCall call) { - super(call); - } - - @Override - public void start(Listener responseListener, Metadata headers) { - final Metadata authHeaders = new Metadata(); - authHeaders.put(AuthConstants.TOKEN_KEY, authHandler.getCallToken()); - headers.merge(authHeaders); - super.start(responseListener, headers); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java deleted file mode 100644 index dd62f7756e79b..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import com.google.protobuf.ByteString; -import io.grpc.StatusRuntimeException; -import io.grpc.stub.StreamObserver; -import java.util.Iterator; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.LinkedBlockingQueue; -import org.apache.arrow.flight.auth.ClientAuthHandler.ClientAuthSender; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.flight.impl.Flight.HandshakeRequest; -import org.apache.arrow.flight.impl.Flight.HandshakeResponse; -import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceStub; - -/** Utility class for performing authorization over using a GRPC stub. */ -public class ClientAuthWrapper { - - /** - * Do client auth for a client. The stub will be authenticated after this method returns. - * - * @param authHandler The handler to use. - * @param stub The service stub. - */ - public static void doClientAuth(ClientAuthHandler authHandler, FlightServiceStub stub) { - AuthObserver observer = new AuthObserver(); - try { - observer.responseObserver = stub.handshake(observer); - authHandler.authenticate(observer.sender, observer.iter); - if (!observer.sender.errored) { - observer.responseObserver.onCompleted(); - } - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - try { - if (!observer.completed.get()) { - // TODO: ARROW-5681 - throw new RuntimeException("Unauthenticated"); - } - } catch (InterruptedException e) { - throw new RuntimeException(e); - } catch (ExecutionException e) { - throw StatusUtils.fromThrowable(e.getCause()); - } - } - - private static class AuthObserver implements StreamObserver { - - private volatile StreamObserver responseObserver; - private final LinkedBlockingQueue messages = new LinkedBlockingQueue<>(); - private final AuthSender sender = new AuthSender(); - private CompletableFuture completed; - - public AuthObserver() { - super(); - completed = new CompletableFuture<>(); - } - - @Override - public void onNext(HandshakeResponse value) { - ByteString payload = value.getPayload(); - if (payload != null) { - messages.add(payload.toByteArray()); - } - } - - private Iterator iter = - new Iterator() { - - @Override - public byte[] next() { - while (!completed.isDone() || !messages.isEmpty()) { - byte[] bytes = messages.poll(); - if (bytes == null) { - // busy wait. - continue; - } else { - return bytes; - } - } - - if (completed.isCompletedExceptionally()) { - // Preserve prior exception behavior - // TODO: with ARROW-5681, throw an appropriate Flight exception if gRPC raised an - // exception - try { - completed.get(); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } catch (ExecutionException e) { - if (e.getCause() instanceof StatusRuntimeException) { - throw (StatusRuntimeException) e.getCause(); - } - throw new RuntimeException(e); - } - } - - throw new IllegalStateException( - "You attempted to retrieve messages after there were none."); - } - - @Override - public boolean hasNext() { - return !messages.isEmpty(); - } - }; - - @Override - public void onError(Throwable t) { - completed.completeExceptionally(t); - } - - private class AuthSender implements ClientAuthSender { - - private boolean errored = false; - - @Override - public void send(byte[] payload) { - try { - responseObserver.onNext( - HandshakeRequest.newBuilder().setPayload(ByteString.copyFrom(payload)).build()); - } catch (StatusRuntimeException sre) { - throw StatusUtils.fromGrpcRuntimeException(sre); - } - } - - @Override - public void onError(Throwable cause) { - this.errored = true; - responseObserver.onError(StatusUtils.toGrpcException(cause)); - } - } - - @Override - public void onCompleted() { - completed.complete(true); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java deleted file mode 100644 index 39fbcf15efba0..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import java.util.Iterator; -import java.util.Optional; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.auth2.CallHeaderAuthenticator; - -/** - * Interface for Server side authentication handlers. - * - * @deprecated As of 14.0.0. This implements a stateful "login" flow that does not play well with - * distributed or stateless systems. It will not be removed, but should not be used. Instead, - * see {@link FlightServer.Builder#headerAuthenticator(CallHeaderAuthenticator)} and {@link - * CallHeaderAuthenticator}. - */ -@Deprecated -public interface ServerAuthHandler { - - /** - * Validate the client token provided on each call. - * - * @return An empty optional if the client is not authenticated; the peer identity otherwise (may - * be the empty string). - */ - Optional isValid(byte[] token); - - /** - * Handle the initial handshake with the client. - * - * @param outgoing A writer to send messages to the client. - * @param incoming An iterator of messages from the client. - * @return true if client is authenticated, false otherwise. - */ - boolean authenticate(ServerAuthSender outgoing, Iterator incoming); - - /** - * Interface for a server implementations to send back authentication messages back to the client. - */ - interface ServerAuthSender { - - void send(byte[] payload); - - void onError(Throwable cause); - } - - /** An auth handler that does nothing. */ - ServerAuthHandler NO_OP = - new ServerAuthHandler() { - - @Override - public Optional isValid(byte[] token) { - return Optional.of(""); - } - - @Override - public boolean authenticate(ServerAuthSender outgoing, Iterator incoming) { - return true; - } - }; -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthInterceptor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthInterceptor.java deleted file mode 100644 index c52eaa444bc4b..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthInterceptor.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import io.grpc.Context; -import io.grpc.Contexts; -import io.grpc.Metadata; -import io.grpc.ServerCall; -import io.grpc.ServerCall.Listener; -import io.grpc.ServerCallHandler; -import io.grpc.ServerInterceptor; -import io.grpc.Status; -import io.grpc.StatusRuntimeException; -import java.util.Optional; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.grpc.StatusUtils; - -/** GRPC Interceptor for performing authentication. */ -public class ServerAuthInterceptor implements ServerInterceptor { - - private final ServerAuthHandler authHandler; - - public ServerAuthInterceptor(ServerAuthHandler authHandler) { - this.authHandler = authHandler; - } - - @Override - public Listener interceptCall( - ServerCall call, Metadata headers, ServerCallHandler next) { - if (!call.getMethodDescriptor() - .getFullMethodName() - .equals(AuthConstants.HANDSHAKE_DESCRIPTOR_NAME)) { - final Optional peerIdentity; - - // Allow customizing the response code by throwing FlightRuntimeException - try { - peerIdentity = isValid(headers); - } catch (FlightRuntimeException e) { - final Status grpcStatus = StatusUtils.toGrpcStatus(e.status()); - call.close(grpcStatus, new Metadata()); - return new NoopServerCallListener<>(); - } catch (StatusRuntimeException e) { - Metadata trailers = e.getTrailers(); - call.close(e.getStatus(), trailers == null ? new Metadata() : trailers); - return new NoopServerCallListener<>(); - } - - if (!peerIdentity.isPresent()) { - // Send back a description along with the status code - call.close( - Status.UNAUTHENTICATED.withDescription( - "Unauthenticated (invalid or missing auth token)"), - new Metadata()); - return new NoopServerCallListener<>(); - } - return Contexts.interceptCall( - Context.current().withValue(AuthConstants.PEER_IDENTITY_KEY, peerIdentity.get()), - call, - headers, - next); - } - - return next.startCall(call, headers); - } - - private Optional isValid(Metadata headers) { - byte[] token = headers.get(AuthConstants.TOKEN_KEY); - return authHandler.isValid(token); - } - - private static class NoopServerCallListener extends ServerCall.Listener {} -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java deleted file mode 100644 index 879a93a73b5b6..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import com.google.protobuf.ByteString; -import io.grpc.stub.StreamObserver; -import java.util.Iterator; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.auth.ServerAuthHandler.ServerAuthSender; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.flight.impl.Flight.HandshakeRequest; -import org.apache.arrow.flight.impl.Flight.HandshakeResponse; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Contains utility methods for integrating authorization into a GRPC stream. */ -public class ServerAuthWrapper { - private static final Logger LOGGER = LoggerFactory.getLogger(ServerAuthWrapper.class); - - /** - * Wrap the auth handler for handshake purposes. - * - * @param authHandler Authentication handler - * @param responseObserver Observer for handshake response - * @param executors ExecutorService - * @return AuthObserver - */ - public static StreamObserver wrapHandshake( - ServerAuthHandler authHandler, - StreamObserver responseObserver, - ExecutorService executors) { - - // stream started. - AuthObserver observer = new AuthObserver(responseObserver); - final Runnable r = - () -> { - try { - if (authHandler.authenticate(observer.sender, observer.iter)) { - responseObserver.onCompleted(); - return; - } - - responseObserver.onError( - StatusUtils.toGrpcException(CallStatus.UNAUTHENTICATED.toRuntimeException())); - } catch (Exception ex) { - LOGGER.error("Error during authentication", ex); - responseObserver.onError(StatusUtils.toGrpcException(ex)); - } - }; - observer.future = executors.submit(r); - return observer; - } - - private static class AuthObserver implements StreamObserver { - - private final StreamObserver responseObserver; - private volatile Future future; - private volatile boolean completed = false; - private final LinkedBlockingQueue messages = new LinkedBlockingQueue<>(); - private final AuthSender sender = new AuthSender(); - - public AuthObserver(StreamObserver responseObserver) { - super(); - this.responseObserver = responseObserver; - } - - @Override - public void onNext(HandshakeRequest value) { - ByteString payload = value.getPayload(); - if (payload != null) { - messages.add(payload.toByteArray()); - } - } - - private Iterator iter = - new Iterator() { - - @Override - public byte[] next() { - while (!completed || !messages.isEmpty()) { - byte[] bytes = messages.poll(); - if (bytes == null) { - // busy wait. - continue; - } - return bytes; - } - throw new IllegalStateException("Requesting more messages than client sent."); - } - - @Override - public boolean hasNext() { - return !messages.isEmpty(); - } - }; - - @Override - public void onError(Throwable t) { - completed = true; - while (future == null) { - /* busy wait */ - } - future.cancel(true); - } - - @Override - public void onCompleted() { - completed = true; - } - - private class AuthSender implements ServerAuthSender { - - @Override - public void send(byte[] payload) { - responseObserver.onNext( - HandshakeResponse.newBuilder().setPayload(ByteString.copyFrom(payload)).build()); - } - - @Override - public void onError(Throwable cause) { - responseObserver.onError(StatusUtils.toGrpcException(cause)); - } - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/Auth2Constants.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/Auth2Constants.java deleted file mode 100644 index 5736210a61aad..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/Auth2Constants.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -/** Constants used in authorization of flight connections. */ -public final class Auth2Constants { - public static final String PEER_IDENTITY_KEY = "arrow-flight-peer-identity"; - public static final String BEARER_PREFIX = "Bearer "; - public static final String BASIC_PREFIX = "Basic "; - public static final String AUTHORIZATION_HEADER = "Authorization"; - - private Auth2Constants() {} -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/AuthUtilities.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/AuthUtilities.java deleted file mode 100644 index 1c64ac7d60d72..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/AuthUtilities.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import org.apache.arrow.flight.CallHeaders; - -/** Utility class for completing the auth process. */ -public final class AuthUtilities { - private AuthUtilities() {} - - /** - * Helper method for retrieving a value from the Authorization header. - * - * @param headers The headers to inspect. - * @param valuePrefix The prefix within the value portion of the header to extract away. - * @return The header value. - */ - public static String getValueFromAuthHeader(CallHeaders headers, String valuePrefix) { - final String authHeaderValue = headers.get(Auth2Constants.AUTHORIZATION_HEADER); - if (authHeaderValue != null) { - if (authHeaderValue.regionMatches(true, 0, valuePrefix, 0, valuePrefix.length())) { - return authHeaderValue.substring(valuePrefix.length()); - } - } - return null; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicAuthCredentialWriter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicAuthCredentialWriter.java deleted file mode 100644 index ab8fc96fb79d6..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicAuthCredentialWriter.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import java.nio.charset.StandardCharsets; -import java.util.Base64; -import java.util.function.Consumer; -import org.apache.arrow.flight.CallHeaders; - -/** Client credentials that use a username and password. */ -public final class BasicAuthCredentialWriter implements Consumer { - - private final String name; - private final String password; - - public BasicAuthCredentialWriter(String name, String password) { - this.name = name; - this.password = password; - } - - @Override - public void accept(CallHeaders outputHeaders) { - outputHeaders.insert( - Auth2Constants.AUTHORIZATION_HEADER, - Auth2Constants.BASIC_PREFIX - + Base64.getEncoder() - .encodeToString( - String.format("%s:%s", name, password).getBytes(StandardCharsets.UTF_8))); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicCallHeaderAuthenticator.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicCallHeaderAuthenticator.java deleted file mode 100644 index 44fce5ec3df23..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BasicCallHeaderAuthenticator.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import java.io.UnsupportedEncodingException; -import java.nio.charset.StandardCharsets; -import java.util.Base64; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightRuntimeException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** A ServerAuthHandler for username/password authentication. */ -public class BasicCallHeaderAuthenticator implements CallHeaderAuthenticator { - - private static final Logger logger = LoggerFactory.getLogger(BasicCallHeaderAuthenticator.class); - - private final CredentialValidator authValidator; - - public BasicCallHeaderAuthenticator(CredentialValidator authValidator) { - this.authValidator = authValidator; - } - - @Override - public AuthResult authenticate(CallHeaders incomingHeaders) { - try { - final String authEncoded = - AuthUtilities.getValueFromAuthHeader(incomingHeaders, Auth2Constants.BASIC_PREFIX); - if (authEncoded == null) { - throw CallStatus.UNAUTHENTICATED.toRuntimeException(); - } - // The value has the format Base64(:) - final String authDecoded = - new String(Base64.getDecoder().decode(authEncoded), StandardCharsets.UTF_8); - final int colonPos = authDecoded.indexOf(':'); - if (colonPos == -1) { - throw CallStatus.UNAUTHENTICATED.toRuntimeException(); - } - - final String user = authDecoded.substring(0, colonPos); - final String password = authDecoded.substring(colonPos + 1); - return authValidator.validate(user, password); - } catch (UnsupportedEncodingException ex) { - // Note: Intentionally discarding the exception cause when reporting back to the client for - // security purposes. - logger.error("Authentication failed due to missing encoding.", ex); - throw CallStatus.INTERNAL.toRuntimeException(); - } catch (FlightRuntimeException ex) { - throw ex; - } catch (Exception ex) { - // Note: Intentionally discarding the exception cause when reporting back to the client for - // security purposes. - logger.error("Authentication failed.", ex); - throw CallStatus.UNAUTHENTICATED.toRuntimeException(); - } - } - - /** Interface that this handler delegates to for validating the incoming headers. */ - public interface CredentialValidator { - /** - * Validate the supplied credentials (username/password) and return the peer identity. - * - * @param username The username to validate. - * @param password The password to validate. - * @return The peer identity if the supplied credentials are valid. - * @throws Exception If the supplied credentials are not valid. - */ - AuthResult validate(String username, String password) throws Exception; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerCredentialWriter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerCredentialWriter.java deleted file mode 100644 index c213d64fe7d0d..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerCredentialWriter.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import java.util.function.Consumer; -import org.apache.arrow.flight.CallHeaders; - -/** Client credentials that use a bearer token. */ -public final class BearerCredentialWriter implements Consumer { - - private final String bearer; - - public BearerCredentialWriter(String bearer) { - this.bearer = bearer; - } - - @Override - public void accept(CallHeaders outputHeaders) { - outputHeaders.insert( - Auth2Constants.AUTHORIZATION_HEADER, Auth2Constants.BEARER_PREFIX + bearer); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java deleted file mode 100644 index 8d58d359d7774..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import org.apache.arrow.flight.CallHeaders; - -/** - * Partial implementation of {@link CallHeaderAuthenticator} for bearer-token based authentication. - */ -public abstract class BearerTokenAuthenticator implements CallHeaderAuthenticator { - - final CallHeaderAuthenticator initialAuthenticator; - - public BearerTokenAuthenticator(CallHeaderAuthenticator initialAuthenticator) { - this.initialAuthenticator = initialAuthenticator; - } - - @Override - public AuthResult authenticate(CallHeaders incomingHeaders) { - // Check if headers contain a bearer token and if so, validate the token. - final String bearerToken = - AuthUtilities.getValueFromAuthHeader(incomingHeaders, Auth2Constants.BEARER_PREFIX); - if (bearerToken != null) { - return validateBearer(bearerToken); - } - - // Delegate to the basic auth handler to do the validation. - final CallHeaderAuthenticator.AuthResult result = - initialAuthenticator.authenticate(incomingHeaders); - return getAuthResultWithBearerToken(result); - } - - /** - * Callback to run when the initial authenticator succeeds. - * - * @param authResult A successful initial authentication result. - * @return an alternate AuthResult based on the original AuthResult that will write a bearer token - * to output headers. - */ - protected abstract AuthResult getAuthResultWithBearerToken(AuthResult authResult); - - /** - * Validate the bearer token. - * - * @param bearerToken The bearer token to validate. - * @return A successful AuthResult if validation succeeded. - */ - protected abstract AuthResult validateBearer(String bearerToken); -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/CallHeaderAuthenticator.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/CallHeaderAuthenticator.java deleted file mode 100644 index 97382bffaf1cf..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/CallHeaderAuthenticator.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.FlightRuntimeException; - -/** - * Interface for Server side authentication handlers. - * - *

    A CallHeaderAuthenticator is used by {@link ServerCallHeaderAuthMiddleware} to validate - * headers sent by a Flight client for authentication purposes. The headers validated do not - * necessarily have to be Authorization headers. - * - *

    The workflow is that the FlightServer will intercept headers on a request, validate the - * headers, and either send back an UNAUTHENTICATED error, or succeed and potentially send back - * additional headers to the client. - * - *

    Implementations of CallHeaderAuthenticator should take care not to provide leak confidential - * details (such as indicating if usernames are valid or not) for security reasons when reporting - * errors back to clients. - * - *

    Example CallHeaderAuthenticators provided include: The {@link BasicCallHeaderAuthenticator} - * will authenticate basic HTTP credentials. - * - *

    The {@link BearerTokenAuthenticator} will authenticate basic HTTP credentials initially, then - * also send back a bearer token that the client can use for subsequent requests. The {@link - * GeneratedBearerTokenAuthenticator} will provide internally generated bearer tokens and maintain a - * cache of them. - */ -public interface CallHeaderAuthenticator { - - /** - * Encapsulates the result of the {@link CallHeaderAuthenticator} analysis of headers. - * - *

    This includes the identity of the incoming user and any outbound headers to send as a - * response to the client. - */ - interface AuthResult { - /** - * The peer identity that was determined by the handshake process based on the authentication - * credentials supplied by the client. - * - * @return The peer identity. - */ - String getPeerIdentity(); - - /** - * Appends a header to the outgoing call headers. - * - * @param outgoingHeaders The outgoing headers. - */ - default void appendToOutgoingHeaders(CallHeaders outgoingHeaders) {} - } - - /** - * Validate the auth headers sent by the client. - * - * @param incomingHeaders The incoming headers to authenticate. - * @return an auth result containing a peer identity and optionally a bearer token. - * @throws FlightRuntimeException with CallStatus.UNAUTHENTICATED if credentials were not supplied - * or if credentials were supplied but were not valid. - */ - AuthResult authenticate(CallHeaders incomingHeaders); - - /** An auth handler that does nothing. */ - CallHeaderAuthenticator NO_OP = - new CallHeaderAuthenticator() { - @Override - public AuthResult authenticate(CallHeaders incomingHeaders) { - return () -> ""; - } - }; -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientBearerHeaderHandler.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientBearerHeaderHandler.java deleted file mode 100644 index 7a5e110be4e80..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientBearerHeaderHandler.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.grpc.CredentialCallOption; - -/** A client header handler that parses the incoming headers for a bearer token. */ -public class ClientBearerHeaderHandler implements ClientHeaderHandler { - - @Override - public CredentialCallOption getCredentialCallOptionFromIncomingHeaders( - CallHeaders incomingHeaders) { - final String bearerValue = - AuthUtilities.getValueFromAuthHeader(incomingHeaders, Auth2Constants.BEARER_PREFIX); - if (bearerValue != null) { - return new CredentialCallOption(new BearerCredentialWriter(bearerValue)); - } - return null; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java deleted file mode 100644 index 1ce7d6896b08b..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import io.grpc.StatusRuntimeException; -import io.grpc.stub.StreamObserver; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.grpc.StatusUtils; -import org.apache.arrow.flight.impl.Flight.HandshakeRequest; -import org.apache.arrow.flight.impl.Flight.HandshakeResponse; -import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceStub; - -/** Utility class for executing a handshake with a FlightServer. */ -public class ClientHandshakeWrapper { - private static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(ClientHandshakeWrapper.class); - - /** - * Do handshake for a client. The stub will be authenticated after this method returns. - * - * @param stub The service stub. - */ - public static void doClientHandshake(FlightServiceStub stub) { - final HandshakeObserver observer = new HandshakeObserver(); - try { - observer.requestObserver = stub.handshake(observer); - observer.requestObserver.onNext(HandshakeRequest.newBuilder().build()); - observer.requestObserver.onCompleted(); - try { - if (!observer.completed.get()) { - // TODO: ARROW-5681 - throw CallStatus.UNAUTHENTICATED.toRuntimeException(); - } - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - throw ex; - } catch (ExecutionException ex) { - final FlightRuntimeException wrappedException = StatusUtils.fromThrowable(ex.getCause()); - logger.error("Failed on completing future", wrappedException); - throw wrappedException; - } - } catch (StatusRuntimeException sre) { - logger.error("Failed with SRE", sre); - throw StatusUtils.fromGrpcRuntimeException(sre); - } catch (Throwable ex) { - logger.error("Failed with unknown", ex); - if (ex instanceof FlightRuntimeException) { - throw (FlightRuntimeException) ex; - } - throw StatusUtils.fromThrowable(ex); - } - } - - private static class HandshakeObserver implements StreamObserver { - - private volatile StreamObserver requestObserver; - private final CompletableFuture completed; - - public HandshakeObserver() { - super(); - completed = new CompletableFuture<>(); - } - - @Override - public void onNext(HandshakeResponse value) {} - - @Override - public void onError(Throwable t) { - completed.completeExceptionally(t); - } - - @Override - public void onCompleted() { - completed.complete(true); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHeaderHandler.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHeaderHandler.java deleted file mode 100644 index 2640af1290a76..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHeaderHandler.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.grpc.CredentialCallOption; - -/** Interface for client side header parsing and conversion to CredentialCallOption. */ -public interface ClientHeaderHandler { - /** - * Parses the incoming headers and converts them into a CredentialCallOption. - * - * @param incomingHeaders Incoming headers to parse. - * @return An instance of CredentialCallOption. - */ - CredentialCallOption getCredentialCallOptionFromIncomingHeaders(CallHeaders incomingHeaders); - - /** An client header handler that does nothing. */ - ClientHeaderHandler NO_OP = - new ClientHeaderHandler() { - @Override - public CredentialCallOption getCredentialCallOptionFromIncomingHeaders( - CallHeaders incomingHeaders) { - return null; - } - }; -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientIncomingAuthHeaderMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientIncomingAuthHeaderMiddleware.java deleted file mode 100644 index 162f646cd7a0f..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientIncomingAuthHeaderMiddleware.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightClientMiddleware; -import org.apache.arrow.flight.grpc.CredentialCallOption; - -/** Middleware for capturing bearer tokens sent back from the Flight server. */ -public class ClientIncomingAuthHeaderMiddleware implements FlightClientMiddleware { - private final Factory factory; - - /** Factory used within FlightClient. */ - public static class Factory implements FlightClientMiddleware.Factory { - private final ClientHeaderHandler headerHandler; - private CredentialCallOption credentialCallOption = null; - - /** - * Construct a factory with the given header handler. - * - * @param headerHandler The header handler that will be used for handling incoming headers from - * the flight server. - */ - public Factory(ClientHeaderHandler headerHandler) { - this.headerHandler = headerHandler; - } - - @Override - public FlightClientMiddleware onCallStarted(CallInfo info) { - return new ClientIncomingAuthHeaderMiddleware(this); - } - - void setCredentialCallOption(CredentialCallOption callOption) { - this.credentialCallOption = callOption; - } - - public CredentialCallOption getCredentialCallOption() { - return credentialCallOption; - } - } - - private ClientIncomingAuthHeaderMiddleware(Factory factory) { - this.factory = factory; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {} - - @Override - public void onHeadersReceived(CallHeaders incomingHeaders) { - factory.setCredentialCallOption( - factory.headerHandler.getCredentialCallOptionFromIncomingHeaders(incomingHeaders)); - } - - @Override - public void onCallCompleted(CallStatus status) {} -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/GeneratedBearerTokenAuthenticator.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/GeneratedBearerTokenAuthenticator.java deleted file mode 100644 index c64577746f0ed..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/GeneratedBearerTokenAuthenticator.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import com.google.common.base.Strings; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import io.grpc.Metadata; -import java.nio.ByteBuffer; -import java.util.Base64; -import java.util.UUID; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.grpc.MetadataAdapter; - -/** Generates and caches bearer tokens from user credentials. */ -public class GeneratedBearerTokenAuthenticator extends BearerTokenAuthenticator { - private final Cache bearerToIdentityCache; - - /** - * Generate bearer tokens for the given basic call authenticator. - * - * @param authenticator The authenticator to initial validate inputs with. - */ - public GeneratedBearerTokenAuthenticator(CallHeaderAuthenticator authenticator) { - this(authenticator, CacheBuilder.newBuilder().expireAfterAccess(2, TimeUnit.HOURS)); - } - - /** - * Generate bearer tokens for the given basic call authenticator. - * - * @param authenticator The authenticator to initial validate inputs with. - * @param timeoutMinutes The time before tokens expire after being accessed. - */ - public GeneratedBearerTokenAuthenticator( - CallHeaderAuthenticator authenticator, int timeoutMinutes) { - this( - authenticator, - CacheBuilder.newBuilder().expireAfterAccess(timeoutMinutes, TimeUnit.MINUTES)); - } - - /** - * Generate bearer tokens for the given basic call authenticator. - * - * @param authenticator The authenticator to initial validate inputs with. - * @param cacheBuilder The configuration of the cache of bearer tokens. - */ - public GeneratedBearerTokenAuthenticator( - CallHeaderAuthenticator authenticator, CacheBuilder cacheBuilder) { - super(authenticator); - bearerToIdentityCache = cacheBuilder.build(); - } - - @Override - protected AuthResult validateBearer(String bearerToken) { - final String peerIdentity = bearerToIdentityCache.getIfPresent(bearerToken); - if (peerIdentity == null) { - throw CallStatus.UNAUTHENTICATED.toRuntimeException(); - } - - return new AuthResult() { - @Override - public String getPeerIdentity() { - return peerIdentity; - } - - @Override - public void appendToOutgoingHeaders(CallHeaders outgoingHeaders) { - if (null - == AuthUtilities.getValueFromAuthHeader( - outgoingHeaders, Auth2Constants.BEARER_PREFIX)) { - outgoingHeaders.insert( - Auth2Constants.AUTHORIZATION_HEADER, Auth2Constants.BEARER_PREFIX + bearerToken); - } - } - }; - } - - @Override - protected AuthResult getAuthResultWithBearerToken(AuthResult authResult) { - // We generate a dummy header and call appendToOutgoingHeaders with it. - // We then inspect the dummy header and parse the bearer token if present in the header - // and generate a new bearer token if a bearer token is not present in the header. - final CallHeaders dummyHeaders = new MetadataAdapter(new Metadata()); - authResult.appendToOutgoingHeaders(dummyHeaders); - String bearerToken = - AuthUtilities.getValueFromAuthHeader(dummyHeaders, Auth2Constants.BEARER_PREFIX); - final AuthResult authResultWithBearerToken; - if (Strings.isNullOrEmpty(bearerToken)) { - // Generate a new bearer token and return an AuthResult that can write it. - final UUID uuid = UUID.randomUUID(); - final ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[16]); - byteBuffer.putLong(uuid.getMostSignificantBits()); - byteBuffer.putLong(uuid.getLeastSignificantBits()); - final String newToken = Base64.getEncoder().encodeToString(byteBuffer.array()); - bearerToken = newToken; - authResultWithBearerToken = - new AuthResult() { - @Override - public String getPeerIdentity() { - return authResult.getPeerIdentity(); - } - - @Override - public void appendToOutgoingHeaders(CallHeaders outgoingHeaders) { - authResult.appendToOutgoingHeaders(outgoingHeaders); - outgoingHeaders.insert( - Auth2Constants.AUTHORIZATION_HEADER, Auth2Constants.BEARER_PREFIX + newToken); - } - }; - } else { - // Use the bearer token supplied by the original auth result. - authResultWithBearerToken = authResult; - } - bearerToIdentityCache.put(bearerToken, authResult.getPeerIdentity()); - return authResultWithBearerToken; - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ServerCallHeaderAuthMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ServerCallHeaderAuthMiddleware.java deleted file mode 100644 index bc47f0d8538cf..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ServerCallHeaderAuthMiddleware.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import static org.apache.arrow.flight.auth2.CallHeaderAuthenticator.AuthResult; - -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightServerMiddleware; -import org.apache.arrow.flight.RequestContext; - -/** - * Middleware that's used to validate credentials during the handshake and verify the bearer token - * in subsequent requests. - */ -public class ServerCallHeaderAuthMiddleware implements FlightServerMiddleware { - /** Factory for accessing ServerAuthMiddleware. */ - public static class Factory - implements FlightServerMiddleware.Factory { - private final CallHeaderAuthenticator authHandler; - - /** - * Construct a factory with the given auth handler. - * - * @param authHandler The auth handler what will be used for authenticating requests. - */ - public Factory(CallHeaderAuthenticator authHandler) { - this.authHandler = authHandler; - } - - @Override - public ServerCallHeaderAuthMiddleware onCallStarted( - CallInfo callInfo, CallHeaders incomingHeaders, RequestContext context) { - final AuthResult result = authHandler.authenticate(incomingHeaders); - context.put(Auth2Constants.PEER_IDENTITY_KEY, result.getPeerIdentity()); - return new ServerCallHeaderAuthMiddleware(result); - } - } - - private final AuthResult authResult; - - public ServerCallHeaderAuthMiddleware(AuthResult authResult) { - this.authResult = authResult; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - authResult.appendToOutgoingHeaders(outgoingHeaders); - } - - @Override - public void onCallCompleted(CallStatus status) {} - - @Override - public void onCallErrored(Throwable err) {} -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/client/ClientCookieMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/client/ClientCookieMiddleware.java deleted file mode 100644 index e5eb934001773..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/client/ClientCookieMiddleware.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.client; - -import java.net.HttpCookie; -import java.util.List; -import java.util.Locale; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.stream.Collectors; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightClientMiddleware; -import org.apache.arrow.util.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A client middleware for receiving and sending cookie information. Note that this class will not - * persist permanent cookies beyond the lifetime of this session. - * - *

    This middleware will automatically remove cookies that have expired. Note: Negative - * max-age values currently do not get marked as expired due to a JDK issue. Use max-age=0 to - * explicitly remove an existing cookie. - */ -public class ClientCookieMiddleware implements FlightClientMiddleware { - private static final Logger LOGGER = LoggerFactory.getLogger(ClientCookieMiddleware.class); - - private static final String SET_COOKIE_HEADER = "Set-Cookie"; - private static final String COOKIE_HEADER = "Cookie"; - - private final Factory factory; - - @VisibleForTesting - ClientCookieMiddleware(Factory factory) { - this.factory = factory; - } - - /** Factory used within FlightClient. */ - public static class Factory implements FlightClientMiddleware.Factory { - // Use a map to track the most recent version of a cookie from the server. - // Note that cookie names are case-sensitive (but header names aren't). - private ConcurrentMap cookies = new ConcurrentHashMap<>(); - - @Override - public ClientCookieMiddleware onCallStarted(CallInfo info) { - return new ClientCookieMiddleware(this); - } - - private void updateCookies(Iterable newCookieHeaderValues) { - // Note: Intentionally overwrite existing cookie values. - // A cookie defined once will continue to be used in all subsequent - // requests on the client instance. The server can send the same cookie again - // with a different value and the client will use the new value in future requests. - // The server can also update a cookie to have an Expiry in the past or negative age - // to signal that the client should stop using the cookie immediately. - newCookieHeaderValues.forEach( - headerValue -> { - try { - final List parsedCookies = HttpCookie.parse(headerValue); - parsedCookies.forEach( - parsedCookie -> { - final String cookieNameLc = parsedCookie.getName().toLowerCase(Locale.ENGLISH); - if (parsedCookie.hasExpired()) { - cookies.remove(cookieNameLc); - } else { - cookies.put(parsedCookie.getName().toLowerCase(Locale.ENGLISH), parsedCookie); - } - }); - } catch (IllegalArgumentException ex) { - LOGGER.warn( - "Skipping incorrectly formatted Set-Cookie header with value '{}'.", headerValue); - } - }); - } - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - final String cookieValue = getValidCookiesAsString(); - if (!cookieValue.isEmpty()) { - outgoingHeaders.insert(COOKIE_HEADER, cookieValue); - } - } - - @Override - public void onHeadersReceived(CallHeaders incomingHeaders) { - final Iterable setCookieHeaders = incomingHeaders.getAll(SET_COOKIE_HEADER); - if (setCookieHeaders != null) { - factory.updateCookies(setCookieHeaders); - } - } - - @Override - public void onCallCompleted(CallStatus status) {} - - /** Discards expired cookies and returns the valid cookies as a String delimited by ';'. */ - @VisibleForTesting - String getValidCookiesAsString() { - // Discard expired cookies. - factory.cookies.entrySet().removeIf(cookieEntry -> cookieEntry.getValue().hasExpired()); - - // Cookie header value format: - // [=; = cookie.getValue().toString()) - .collect(Collectors.joining("; ")); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java deleted file mode 100644 index 6415bd1a2537a..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/AddWritableBuffer.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.netty.buffer.ByteBuf; -import java.io.IOException; -import java.io.OutputStream; -import java.lang.reflect.Constructor; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.List; - -/** - * Allow a user to add a ByteBuf based InputStream directly into GRPC WritableBuffer to avoid an - * extra copy. This could be solved in GRPC by adding a ByteBufListable interface on InputStream and - * letting BufferChainOutputStream take advantage of it. - */ -public class AddWritableBuffer { - - private static final Constructor bufConstruct; - private static final Field bufferList; - private static final Field current; - private static final Method listAdd; - private static final Class bufChainOut; - - static { - Constructor tmpConstruct = null; - Field tmpBufferList = null; - Field tmpCurrent = null; - Class tmpBufChainOut = null; - Method tmpListAdd = null; - - try { - Class nwb = Class.forName("io.grpc.netty.NettyWritableBuffer"); - - Constructor tmpConstruct2 = nwb.getDeclaredConstructor(ByteBuf.class); - tmpConstruct2.setAccessible(true); - - Class tmpBufChainOut2 = - Class.forName("io.grpc.internal.MessageFramer$BufferChainOutputStream"); - - Field tmpBufferList2 = tmpBufChainOut2.getDeclaredField("bufferList"); - tmpBufferList2.setAccessible(true); - - Field tmpCurrent2 = tmpBufChainOut2.getDeclaredField("current"); - tmpCurrent2.setAccessible(true); - - Method tmpListAdd2 = List.class.getDeclaredMethod("add", Object.class); - - // output fields last. - tmpConstruct = tmpConstruct2; - tmpBufferList = tmpBufferList2; - tmpCurrent = tmpCurrent2; - tmpListAdd = tmpListAdd2; - tmpBufChainOut = tmpBufChainOut2; - - } catch (Exception ex) { - new RuntimeException("Failed to initialize AddWritableBuffer, falling back to slow path", ex) - .printStackTrace(); - } - - bufConstruct = tmpConstruct; - bufferList = tmpBufferList; - current = tmpCurrent; - listAdd = tmpListAdd; - bufChainOut = tmpBufChainOut; - } - - /** - * Add the provided ByteBuf to the gRPC BufferChainOutputStream if possible, else copy the buffer - * to the stream. - * - * @param buf The buffer to add. - * @param stream The Candidate OutputStream to add to. - * @param tryZeroCopy If true, try to zero-copy append the buffer to the stream. This may not - * succeed. - * @return True if buffer was zero-copy added to the stream. False if the buffer was copied. - * @throws IOException if the fast path is not enabled and there was an error copying the buffer - * to the stream. - */ - public static boolean add(ByteBuf buf, OutputStream stream, boolean tryZeroCopy) - throws IOException { - if (!tryZeroCopy || !tryAddBuffer(buf, stream)) { - buf.getBytes(0, stream, buf.readableBytes()); - return false; - } - return true; - } - - private static boolean tryAddBuffer(ByteBuf buf, OutputStream stream) throws IOException { - - if (bufChainOut == null) { - return false; - } - - if (!stream.getClass().equals(bufChainOut)) { - return false; - } - - try { - if (current.get(stream) != null) { - return false; - } - - buf.retain(); - Object obj = bufConstruct.newInstance(buf); - Object list = bufferList.get(stream); - listAdd.invoke(list, obj); - current.set(stream, obj); - return true; - } catch (IllegalAccessException - | IllegalArgumentException - | InvocationTargetException - | InstantiationException e) { - e.printStackTrace(); - return false; - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CallCredentialAdapter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CallCredentialAdapter.java deleted file mode 100644 index f33e9b2f9469f..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CallCredentialAdapter.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.CallCredentials; -import io.grpc.Metadata; -import java.util.concurrent.Executor; -import java.util.function.Consumer; -import org.apache.arrow.flight.CallHeaders; - -/** Adapter class to utilize a CredentialWriter to implement Grpc CallCredentials. */ -public class CallCredentialAdapter extends CallCredentials { - - private final Consumer credentialWriter; - - public CallCredentialAdapter(Consumer credentialWriter) { - this.credentialWriter = credentialWriter; - } - - @Override - public void applyRequestMetadata( - RequestInfo requestInfo, Executor executor, MetadataApplier metadataApplier) { - executor.execute( - () -> { - final Metadata headers = new Metadata(); - credentialWriter.accept(new MetadataAdapter(headers)); - metadataApplier.apply(headers); - }); - } - - @Override - public void thisUsesUnstableApi() { - // Mandatory to override this to acknowledge that CallCredentials is Experimental. - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java deleted file mode 100644 index ae29fe8f770fb..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.CallOptions; -import io.grpc.Channel; -import io.grpc.ClientCall; -import io.grpc.ClientInterceptor; -import io.grpc.ForwardingClientCall.SimpleForwardingClientCall; -import io.grpc.ForwardingClientCallListener.SimpleForwardingClientCallListener; -import io.grpc.Metadata; -import io.grpc.MethodDescriptor; -import io.grpc.Status; -import io.grpc.StatusRuntimeException; -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightClientMiddleware; -import org.apache.arrow.flight.FlightMethod; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightStatusCode; - -/** - * An adapter between Flight client middleware and gRPC interceptors. - * - *

    This is implemented as a single gRPC interceptor that runs all Flight client middleware - * sequentially. - */ -public class ClientInterceptorAdapter implements ClientInterceptor { - - private final List factories; - - public ClientInterceptorAdapter(List factories) { - this.factories = factories; - } - - @Override - public ClientCall interceptCall( - MethodDescriptor method, CallOptions callOptions, Channel next) { - final List middleware = new ArrayList<>(); - final CallInfo info = new CallInfo(FlightMethod.fromProtocol(method.getFullMethodName())); - - try { - for (final FlightClientMiddleware.Factory factory : factories) { - middleware.add(factory.onCallStarted(info)); - } - } catch (FlightRuntimeException e) { - // Explicitly propagate - throw e; - } catch (StatusRuntimeException e) { - throw StatusUtils.fromGrpcRuntimeException(e); - } catch (RuntimeException e) { - throw StatusUtils.fromThrowable(e); - } - return new FlightClientCall<>(next.newCall(method, callOptions), middleware); - } - - /** - * The ClientCallListener which hooks into the gRPC request cycle and actually runs middleware at - * certain points. - */ - private static class FlightClientCallListener - extends SimpleForwardingClientCallListener { - - private final List middleware; - boolean receivedHeaders; - - public FlightClientCallListener( - ClientCall.Listener responseListener, List middleware) { - super(responseListener); - this.middleware = middleware; - receivedHeaders = false; - } - - @Override - public void onHeaders(Metadata headers) { - receivedHeaders = true; - final MetadataAdapter adapter = new MetadataAdapter(headers); - try { - middleware.forEach(m -> m.onHeadersReceived(adapter)); - } finally { - // Make sure to always call the gRPC callback to avoid interrupting the gRPC request cycle - super.onHeaders(headers); - } - } - - @Override - public void onClose(Status status, Metadata trailers) { - try { - if (!receivedHeaders) { - // gRPC doesn't always send response headers if the call errors or completes immediately, - // but instead - // consolidates them with the trailers. If we never got headers, assume this happened and - // run the header - // callback with the trailers. - final MetadataAdapter adapter = new MetadataAdapter(trailers); - middleware.forEach(m -> m.onHeadersReceived(adapter)); - } - final CallStatus flightStatus = StatusUtils.fromGrpcStatusAndTrailers(status, trailers); - middleware.forEach(m -> m.onCallCompleted(flightStatus)); - } finally { - // Make sure to always call the gRPC callback to avoid interrupting the gRPC request cycle - super.onClose(status, trailers); - } - } - } - - /** - * The gRPC ClientCall which hooks into the gRPC request cycle and injects our ClientCallListener. - */ - private static class FlightClientCall - extends SimpleForwardingClientCall { - - private final List middleware; - - public FlightClientCall( - ClientCall clientCall, List middleware) { - super(clientCall); - this.middleware = middleware; - } - - @Override - public void start(Listener responseListener, Metadata headers) { - final MetadataAdapter metadataAdapter = new MetadataAdapter(headers); - middleware.forEach(m -> m.onBeforeSendingHeaders(metadataAdapter)); - - super.start(new FlightClientCallListener<>(responseListener, middleware), headers); - } - - @Override - public void cancel(String message, Throwable cause) { - final CallStatus flightStatus = - new CallStatus(FlightStatusCode.CANCELLED, cause, message, null); - middleware.forEach(m -> m.onCallCompleted(flightStatus)); - super.cancel(message, cause); - } - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ContextPropagatingExecutorService.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ContextPropagatingExecutorService.java deleted file mode 100644 index d0187bbec846a..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ContextPropagatingExecutorService.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.Context; -import java.util.Collection; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; -import java.util.stream.Collectors; - -/** - * An {@link ExecutorService} that propagates the {@link Context}. - * - *

    Context is used to propagate per-call state, like the authenticated user, between threads (as - * gRPC makes no guarantees about what thread things execute on). This wrapper makes it easy to - * preserve this when using an Executor. The Context itself is immutable, so it is thread-safe. - */ -public class ContextPropagatingExecutorService implements ExecutorService { - - private final ExecutorService delegate; - - public ContextPropagatingExecutorService(ExecutorService delegate) { - this.delegate = delegate; - } - - // These are just delegate methods. - - @Override - public void shutdown() { - delegate.shutdown(); - } - - @Override - public List shutdownNow() { - return delegate.shutdownNow(); - } - - @Override - public boolean isShutdown() { - return delegate.isShutdown(); - } - - @Override - public boolean isTerminated() { - return delegate.isTerminated(); - } - - @Override - public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException { - return delegate.awaitTermination(timeout, unit); - } - - // These are delegate methods that wrap the submitted task in the current gRPC Context. - - @Override - public Future submit(Callable task) { - return delegate.submit(Context.current().wrap(task)); - } - - @Override - public Future submit(Runnable task, T result) { - return delegate.submit(Context.current().wrap(task), result); - } - - @Override - public Future submit(Runnable task) { - return delegate.submit(Context.current().wrap(task)); - } - - @Override - public List> invokeAll(Collection> tasks) - throws InterruptedException { - return delegate.invokeAll( - tasks.stream().map(Context.current()::wrap).collect(Collectors.toList())); - } - - @Override - public List> invokeAll( - Collection> tasks, long timeout, TimeUnit unit) - throws InterruptedException { - return delegate.invokeAll( - tasks.stream().map(Context.current()::wrap).collect(Collectors.toList()), timeout, unit); - } - - @Override - public T invokeAny(Collection> tasks) - throws InterruptedException, ExecutionException { - return delegate.invokeAny( - tasks.stream().map(Context.current()::wrap).collect(Collectors.toList())); - } - - @Override - public T invokeAny(Collection> tasks, long timeout, TimeUnit unit) - throws InterruptedException, ExecutionException, TimeoutException { - return delegate.invokeAny( - tasks.stream().map(Context.current()::wrap).collect(Collectors.toList()), timeout, unit); - } - - @Override - public void execute(Runnable command) { - delegate.execute(Context.current().wrap(command)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CredentialCallOption.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CredentialCallOption.java deleted file mode 100644 index 11a7670849e33..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/CredentialCallOption.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.stub.AbstractStub; -import java.util.function.Consumer; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallOptions; - -/** Method option for supplying credentials to method calls. */ -public class CredentialCallOption implements CallOptions.GrpcCallOption { - private final Consumer credentialWriter; - - public CredentialCallOption(Consumer credentialWriter) { - this.credentialWriter = credentialWriter; - } - - @Override - public > T wrapStub(T stub) { - return stub.withCallCredentials(new CallCredentialAdapter(credentialWriter)); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java deleted file mode 100644 index 45c32a86c6a78..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/GetReadableBuffer.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import com.google.common.base.Throwables; -import com.google.common.io.ByteStreams; -import io.grpc.internal.ReadableBuffer; -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.Field; -import org.apache.arrow.memory.ArrowBuf; - -/** - * Enable access to ReadableBuffer directly to copy data from a BufferInputStream into a target - * ByteBuffer/ByteBuf. - * - *

    This could be solved by BufferInputStream exposing Drainable. - */ -public class GetReadableBuffer { - - private static final Field READABLE_BUFFER; - private static final Class BUFFER_INPUT_STREAM; - - static { - Field tmpField = null; - Class tmpClazz = null; - try { - Class clazz = Class.forName("io.grpc.internal.ReadableBuffers$BufferInputStream"); - - Field f = clazz.getDeclaredField("buffer"); - f.setAccessible(true); - // don't set until we've gotten past all exception cases. - tmpField = f; - tmpClazz = clazz; - } catch (Exception e) { - new RuntimeException("Failed to initialize GetReadableBuffer, falling back to slow path", e) - .printStackTrace(); - } - READABLE_BUFFER = tmpField; - BUFFER_INPUT_STREAM = tmpClazz; - } - - /** - * Extracts the ReadableBuffer for the given input stream. - * - * @param is Must be an instance of io.grpc.internal.ReadableBuffers$BufferInputStream or null - * will be returned. - */ - public static ReadableBuffer getReadableBuffer(InputStream is) { - - if (BUFFER_INPUT_STREAM == null || !is.getClass().equals(BUFFER_INPUT_STREAM)) { - return null; - } - - try { - return (ReadableBuffer) READABLE_BUFFER.get(is); - } catch (Exception ex) { - throw Throwables.propagate(ex); - } - } - - /** - * Helper method to read a gRPC-provided InputStream into an ArrowBuf. - * - * @param stream The stream to read from. Should be an instance of {@link #BUFFER_INPUT_STREAM}. - * @param buf The buffer to read into. - * @param size The number of bytes to read. - * @param fastPath Whether to enable the fast path (i.e. detect whether the stream is a {@link - * #BUFFER_INPUT_STREAM}). - * @throws IOException if there is an error reading form the stream - */ - public static void readIntoBuffer( - final InputStream stream, final ArrowBuf buf, final int size, final boolean fastPath) - throws IOException { - ReadableBuffer readableBuffer = fastPath ? getReadableBuffer(stream) : null; - if (readableBuffer != null) { - readableBuffer.readBytes(buf.nioBuffer(0, size)); - } else { - byte[] heapBytes = new byte[size]; - ByteStreams.readFully(stream, heapBytes); - buf.writeBytes(heapBytes); - } - buf.writerIndex(size); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java deleted file mode 100644 index a1de16ede6502..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.Metadata; -import java.nio.charset.StandardCharsets; -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import org.apache.arrow.flight.CallHeaders; - -/** - * A mutable adapter between the gRPC Metadata object and the Flight headers interface. - * - *

    This allows us to present the headers (metadata) from gRPC without copying to/from our own - * object. - */ -public class MetadataAdapter implements CallHeaders { - - private final Metadata metadata; - - public MetadataAdapter(Metadata metadata) { - this.metadata = metadata; - } - - @Override - public String get(String key) { - return this.metadata.get(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER)); - } - - @Override - public byte[] getByte(String key) { - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - return this.metadata.get(Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER)); - } - return get(key).getBytes(StandardCharsets.UTF_8); - } - - @Override - public Iterable getAll(String key) { - return this.metadata.getAll(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER)); - } - - @Override - public Iterable getAllByte(String key) { - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - return this.metadata.getAll(Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER)); - } - return StreamSupport.stream(getAll(key).spliterator(), false) - .map(String::getBytes) - .collect(Collectors.toList()); - } - - @Override - public void insert(String key, String value) { - this.metadata.put(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER), value); - } - - @Override - public void insert(String key, byte[] value) { - this.metadata.put(Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER), value); - } - - @Override - public Set keys() { - return new HashSet<>(this.metadata.keys()); - } - - @Override - public boolean containsKey(String key) { - if (key.endsWith("-bin")) { - final Metadata.Key grpcKey = Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER); - return this.metadata.containsKey(grpcKey); - } - final Metadata.Key grpcKey = Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER); - return this.metadata.containsKey(grpcKey); - } - - @Override - public String toString() { - return this.metadata.toString(); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/RequestContextAdapter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/RequestContextAdapter.java deleted file mode 100644 index 978a5b9ac528e..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/RequestContextAdapter.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.Context; -import java.util.HashMap; -import java.util.Set; -import org.apache.arrow.flight.RequestContext; - -/** Adapter for holding key value pairs. */ -public class RequestContextAdapter implements RequestContext { - public static final Context.Key REQUEST_CONTEXT_KEY = - Context.key("arrow-flight-request-context"); - private final HashMap map = new HashMap<>(); - - @Override - public void put(String key, String value) { - if (map.putIfAbsent(key, value) != null) { - throw new IllegalArgumentException( - "Duplicate write to a RequestContext at key " + key + " not allowed."); - } - } - - @Override - public String get(String key) { - return map.get(key); - } - - @Override - public Set keySet() { - return map.keySet(); - } - - @Override - public String remove(String key) { - return map.remove(key); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerBackpressureThresholdInterceptor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerBackpressureThresholdInterceptor.java deleted file mode 100644 index ca0d40e1abfda..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerBackpressureThresholdInterceptor.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.Metadata; -import io.grpc.ServerCall; -import io.grpc.ServerCallHandler; -import io.grpc.ServerInterceptor; - -/** - * An interceptor for specifying the number of bytes that can be queued before a call with an output - * stream gets blocked by backpressure. - */ -public class ServerBackpressureThresholdInterceptor implements ServerInterceptor { - - private final int numBytes; - - public ServerBackpressureThresholdInterceptor(int numBytes) { - this.numBytes = numBytes; - } - - @Override - public ServerCall.Listener interceptCall( - ServerCall call, Metadata headers, ServerCallHandler next) { - call.setOnReadyThreshold(numBytes); - return next.startCall(call, headers); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java deleted file mode 100644 index e7a79b9f7b685..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.Context; -import io.grpc.Contexts; -import io.grpc.ForwardingServerCall.SimpleForwardingServerCall; -import io.grpc.Metadata; -import io.grpc.ServerCall; -import io.grpc.ServerCall.Listener; -import io.grpc.ServerCallHandler; -import io.grpc.ServerInterceptor; -import io.grpc.Status; -import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightConstants; -import org.apache.arrow.flight.FlightMethod; -import org.apache.arrow.flight.FlightProducer.CallContext; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightServerMiddleware; -import org.apache.arrow.flight.FlightServerMiddleware.Key; - -/** - * An adapter between Flight middleware and a gRPC interceptor. - * - *

    This is implemented as a single gRPC interceptor that runs all Flight server middleware - * sequentially. Flight middleware instances are stored in the gRPC Context so their state is - * accessible later. - */ -public class ServerInterceptorAdapter implements ServerInterceptor { - - /** - * A combination of a middleware Key and factory. - * - * @param The middleware type. - */ - public static class KeyFactory { - - private final FlightServerMiddleware.Key key; - private final FlightServerMiddleware.Factory factory; - - public KeyFactory( - FlightServerMiddleware.Key key, FlightServerMiddleware.Factory factory) { - this.key = key; - this.factory = factory; - } - } - - /** - * The {@link Context.Key} that stores the Flight middleware active for a particular call. - * - *

    Applications should not use this directly. Instead, see {@link - * CallContext#getMiddleware(Key)}. - */ - public static final Context.Key, FlightServerMiddleware>> - SERVER_MIDDLEWARE_KEY = Context.key("arrow.flight.server_middleware"); - - private final List> factories; - - public ServerInterceptorAdapter(List> factories) { - this.factories = factories; - } - - @Override - public Listener interceptCall( - ServerCall call, Metadata headers, ServerCallHandler next) { - if (!FlightConstants.SERVICE.equals(call.getMethodDescriptor().getServiceName())) { - return Contexts.interceptCall(Context.current(), call, headers, next); - } - - final CallInfo info = - new CallInfo(FlightMethod.fromProtocol(call.getMethodDescriptor().getFullMethodName())); - final List middleware = new ArrayList<>(); - // Use LinkedHashMap to preserve insertion order - final Map, FlightServerMiddleware> middlewareMap = - new LinkedHashMap<>(); - final MetadataAdapter headerAdapter = new MetadataAdapter(headers); - final RequestContextAdapter requestContextAdapter = new RequestContextAdapter(); - for (final KeyFactory factory : factories) { - final FlightServerMiddleware m; - try { - m = factory.factory.onCallStarted(info, headerAdapter, requestContextAdapter); - } catch (FlightRuntimeException e) { - // Cancel call - call.close(StatusUtils.toGrpcStatus(e.status()), new Metadata()); - return new Listener() {}; - } - middleware.add(m); - middlewareMap.put(factory.key, m); - } - - // Inject the middleware into the context so RPC method implementations can communicate with - // middleware instances - final Context contextWithMiddlewareAndRequestsOptions = - Context.current() - .withValue(SERVER_MIDDLEWARE_KEY, Collections.unmodifiableMap(middlewareMap)) - .withValue(RequestContextAdapter.REQUEST_CONTEXT_KEY, requestContextAdapter); - - final SimpleForwardingServerCall forwardingServerCall = - new SimpleForwardingServerCall(call) { - boolean sentHeaders = false; - - @Override - public void sendHeaders(Metadata headers) { - sentHeaders = true; - try { - final MetadataAdapter headerAdapter = new MetadataAdapter(headers); - middleware.forEach(m -> m.onBeforeSendingHeaders(headerAdapter)); - } finally { - // Make sure to always call the gRPC callback to avoid interrupting the gRPC request - // cycle - super.sendHeaders(headers); - } - } - - @Override - public void close(Status status, Metadata trailers) { - try { - if (!sentHeaders) { - // gRPC doesn't always send response headers if the call errors or completes - // immediately - final MetadataAdapter headerAdapter = new MetadataAdapter(trailers); - middleware.forEach(m -> m.onBeforeSendingHeaders(headerAdapter)); - } - } finally { - // Make sure to always call the gRPC callback to avoid interrupting the gRPC request - // cycle - super.close(status, trailers); - } - - final CallStatus flightStatus = StatusUtils.fromGrpcStatus(status); - middleware.forEach(m -> m.onCallCompleted(flightStatus)); - } - }; - return Contexts.interceptCall( - contextWithMiddlewareAndRequestsOptions, forwardingServerCall, headers, next); - } -} diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java deleted file mode 100644 index 1dd724477d6e7..0000000000000 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import io.grpc.InternalMetadata; -import io.grpc.Metadata; -import io.grpc.Status; -import io.grpc.Status.Code; -import io.grpc.StatusException; -import io.grpc.StatusRuntimeException; -import java.nio.charset.StandardCharsets; -import java.util.Iterator; -import java.util.Objects; -import java.util.function.Function; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.ErrorFlightMetadata; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightStatusCode; - -/** - * Utilities to adapt gRPC and Flight status objects. - * - *

    NOT A PUBLIC CLASS, interface is not guaranteed to remain stable. - */ -public class StatusUtils { - - private StatusUtils() { - throw new AssertionError("Do not instantiate this class."); - } - - /** Convert from a Flight status code to a gRPC status code. */ - public static Status.Code toGrpcStatusCode(FlightStatusCode code) { - switch (code) { - case OK: - return Code.OK; - case UNKNOWN: - return Code.UNKNOWN; - case INTERNAL: - return Code.INTERNAL; - case INVALID_ARGUMENT: - return Code.INVALID_ARGUMENT; - case TIMED_OUT: - return Code.DEADLINE_EXCEEDED; - case NOT_FOUND: - return Code.NOT_FOUND; - case ALREADY_EXISTS: - return Code.ALREADY_EXISTS; - case CANCELLED: - return Code.CANCELLED; - case UNAUTHENTICATED: - return Code.UNAUTHENTICATED; - case UNAUTHORIZED: - return Code.PERMISSION_DENIED; - case UNIMPLEMENTED: - return Code.UNIMPLEMENTED; - case UNAVAILABLE: - return Code.UNAVAILABLE; - case RESOURCE_EXHAUSTED: - return Code.RESOURCE_EXHAUSTED; - default: - return Code.UNKNOWN; - } - } - - /** Convert from a gRPC status code to a Flight status code. */ - public static FlightStatusCode fromGrpcStatusCode(Status.Code code) { - switch (code) { - case OK: - return FlightStatusCode.OK; - case CANCELLED: - return FlightStatusCode.CANCELLED; - case UNKNOWN: - return FlightStatusCode.UNKNOWN; - case INVALID_ARGUMENT: - return FlightStatusCode.INVALID_ARGUMENT; - case DEADLINE_EXCEEDED: - return FlightStatusCode.TIMED_OUT; - case NOT_FOUND: - return FlightStatusCode.NOT_FOUND; - case ALREADY_EXISTS: - return FlightStatusCode.ALREADY_EXISTS; - case PERMISSION_DENIED: - return FlightStatusCode.UNAUTHORIZED; - case RESOURCE_EXHAUSTED: - return FlightStatusCode.RESOURCE_EXHAUSTED; - case FAILED_PRECONDITION: - return FlightStatusCode.INVALID_ARGUMENT; - case ABORTED: - return FlightStatusCode.INTERNAL; - case OUT_OF_RANGE: - return FlightStatusCode.INVALID_ARGUMENT; - case UNIMPLEMENTED: - return FlightStatusCode.UNIMPLEMENTED; - case INTERNAL: - return FlightStatusCode.INTERNAL; - case UNAVAILABLE: - return FlightStatusCode.UNAVAILABLE; - case DATA_LOSS: - return FlightStatusCode.INTERNAL; - case UNAUTHENTICATED: - return FlightStatusCode.UNAUTHENTICATED; - default: - return FlightStatusCode.UNKNOWN; - } - } - - /** Create Metadata Key for binary metadata. */ - static Metadata.Key keyOfBinary(String name) { - return Metadata.Key.of(name, Metadata.BINARY_BYTE_MARSHALLER); - } - - /** Create Metadata Key for ascii metadata. */ - static Metadata.Key keyOfAscii(String name) { - // Use InternalMetadata for keys that start with ":", e.g. ":status". See ARROW-14014. - return InternalMetadata.keyOf(name, Metadata.ASCII_STRING_MARSHALLER); - } - - /** Convert from a gRPC Status & trailers to a Flight status. */ - public static CallStatus fromGrpcStatusAndTrailers(Status status, Metadata trailers) { - // gRPC may not always have trailers - this happens when the server internally generates an - // error, which is rare, - // but can happen. - final ErrorFlightMetadata errorMetadata = trailers == null ? null : parseTrailers(trailers); - return new CallStatus( - fromGrpcStatusCode(status.getCode()), - status.getCause(), - status.getDescription(), - errorMetadata); - } - - /** Convert from a gRPC status to a Flight status. */ - public static CallStatus fromGrpcStatus(Status status) { - return new CallStatus( - fromGrpcStatusCode(status.getCode()), status.getCause(), status.getDescription(), null); - } - - /** Convert from a Flight status to a gRPC status. */ - public static Status toGrpcStatus(CallStatus status) { - return toGrpcStatusCode(status.code()) - .toStatus() - .withDescription(status.description()) - .withCause(status.cause()); - } - - /** Convert from a gRPC exception to a Flight exception. */ - public static FlightRuntimeException fromGrpcRuntimeException(StatusRuntimeException sre) { - return fromGrpcStatusAndTrailers(sre.getStatus(), sre.getTrailers()).toRuntimeException(); - } - - /** Convert gRPC trailers into Flight error metadata. */ - private static ErrorFlightMetadata parseTrailers(Metadata trailers) { - ErrorFlightMetadata metadata = new ErrorFlightMetadata(); - for (String key : trailers.keys()) { - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - metadata.insert(key, trailers.get(keyOfBinary(key))); - } else { - metadata.insert( - key, - Objects.requireNonNull(trailers.get(keyOfAscii(key))).getBytes(StandardCharsets.UTF_8)); - } - } - return metadata; - } - - /** Convert arbitrary exceptions to a {@link FlightRuntimeException}. */ - public static FlightRuntimeException fromThrowable(Throwable t) { - if (t instanceof StatusRuntimeException) { - return fromGrpcRuntimeException((StatusRuntimeException) t); - } else if (t instanceof FlightRuntimeException) { - return (FlightRuntimeException) t; - } - return CallStatus.UNKNOWN.withCause(t).withDescription(t.getMessage()).toRuntimeException(); - } - - /** - * Convert arbitrary exceptions to a {@link StatusRuntimeException} or {@link StatusException}. - * - *

    Such exceptions can be passed to {@link io.grpc.stub.StreamObserver#onError(Throwable)} and - * will give the client a reasonable error message. - */ - public static Throwable toGrpcException(Throwable ex) { - if (ex instanceof StatusRuntimeException) { - return ex; - } else if (ex instanceof StatusException) { - return ex; - } else if (ex instanceof FlightRuntimeException) { - final FlightRuntimeException fre = (FlightRuntimeException) ex; - if (fre.status().metadata() != null) { - Metadata trailers = toGrpcMetadata(fre.status().metadata()); - return new StatusRuntimeException(toGrpcStatus(fre.status()), trailers); - } - return toGrpcStatus(fre.status()).asRuntimeException(); - } - return Status.INTERNAL - .withCause(ex) - .withDescription("There was an error servicing your request.") - .asRuntimeException(); - } - - private static Metadata toGrpcMetadata(ErrorFlightMetadata metadata) { - final Metadata trailers = new Metadata(); - for (final String key : metadata.keys()) { - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - trailers.put(keyOfBinary(key), metadata.getByte(key)); - } else { - trailers.put(keyOfAscii(key), metadata.get(key)); - } - } - return trailers; - } - - /** - * Maps a transformation function to the elements of an iterator, while wrapping exceptions in - * {@link FlightRuntimeException}. - */ - public static Iterator wrapIterator( - Iterator fromIterator, Function transformer) { - Objects.requireNonNull(fromIterator); - Objects.requireNonNull(transformer); - return new Iterator() { - @Override - public boolean hasNext() { - try { - return fromIterator.hasNext(); - } catch (StatusRuntimeException e) { - throw fromGrpcRuntimeException(e); - } - } - - @Override - public TO next() { - try { - return transformer.apply(fromIterator.next()); - } catch (StatusRuntimeException e) { - throw fromGrpcRuntimeException(e); - } - } - }; - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java deleted file mode 100644 index 767687a51785e..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.io.File; -import java.lang.reflect.InvocationTargetException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.vector.test.util.ArrowTestDataUtil; -import org.junit.jupiter.api.function.Executable; - -/** Utility methods and constants for testing flight servers. */ -public class FlightTestUtil { - - public static final String LOCALHOST = "localhost"; - - static Path getFlightTestDataRoot() { - return ArrowTestDataUtil.getTestDataRoot().resolve("flight"); - } - - static Path exampleTlsRootCert() { - return getFlightTestDataRoot().resolve("root-ca.pem"); - } - - static List exampleTlsCerts() { - final Path root = getFlightTestDataRoot(); - final Path cert0Pem = root.resolve("cert0.pem"); - if (!Files.exists(cert0Pem)) { - throw new RuntimeException( - cert0Pem - + " doesn't exist. Make sure submodules are initialized (see https://arrow.apache.org/docs/dev/developers/java/building.html#building)"); - } - return Arrays.asList( - new CertKeyPair(cert0Pem.toFile(), root.resolve("cert0.pkcs1").toFile()), - new CertKeyPair(root.resolve("cert1.pem").toFile(), root.resolve("cert1.pkcs1").toFile())); - } - - static boolean isEpollAvailable() { - try { - Class epoll = Class.forName("io.netty.channel.epoll.Epoll"); - return (Boolean) epoll.getMethod("isAvailable").invoke(null); - } catch (ClassNotFoundException - | NoSuchMethodException - | IllegalAccessException - | InvocationTargetException e) { - return false; - } - } - - static boolean isKqueueAvailable() { - try { - Class kqueue = Class.forName("io.netty.channel.kqueue.KQueue"); - return (Boolean) kqueue.getMethod("isAvailable").invoke(null); - } catch (ClassNotFoundException - | NoSuchMethodException - | IllegalAccessException - | InvocationTargetException e) { - return false; - } - } - - static boolean isNativeTransportAvailable() { - return isEpollAvailable() || isKqueueAvailable(); - } - - /** - * Assert that the given runnable fails with a Flight exception of the given code. - * - * @param code The expected Flight status code. - * @param r The code to run. - * @return The thrown status. - */ - public static CallStatus assertCode(FlightStatusCode code, Executable r) { - final FlightRuntimeException ex = assertThrows(FlightRuntimeException.class, r); - assertEquals(code, ex.status().code()); - return ex.status(); - } - - public static class CertKeyPair { - - public final File cert; - public final File key; - - public CertKeyPair(File cert, File key) { - this.cert = cert; - this.key = key; - } - } - - private FlightTestUtil() {} -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestActionType.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestActionType.java deleted file mode 100644 index 03a7835d55ce9..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestActionType.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.Test; - -public class TestActionType { - @Test - void testActionType() { - final String type = "MyType"; - final String description = "My Description"; - final ActionType actionType = new ActionType(type, description); - assertEquals(type, actionType.getType()); - assertEquals(description, actionType.getDescription()); - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java deleted file mode 100644 index d8b1f6f05f880..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java +++ /dev/null @@ -1,353 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.function.BiConsumer; -import org.apache.arrow.flight.FlightClient.PutListener; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** Tests for application-specific metadata support in Flight. */ -public class TestApplicationMetadata { - - // The command used to trigger the test for ARROW-6136. - private static final byte[] COMMAND_ARROW_6136 = "ARROW-6136".getBytes(StandardCharsets.UTF_8); - // The expected error message. - private static final String MESSAGE_ARROW_6136 = "The stream should not be double-closed."; - - /** Ensure that a client can read the metadata sent from the server. */ - @Test - // This test is consistently flaky on CI, unfortunately. - @Disabled - public void retrieveMetadata() { - test( - (allocator, client) -> { - try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) { - byte i = 0; - while (stream.next()) { - final IntVector vector = (IntVector) stream.getRoot().getVector("a"); - assertEquals(1, vector.getValueCount()); - assertEquals(10, vector.get(0)); - assertEquals(i, stream.getLatestMetadata().getByte(0)); - i++; - } - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - - /** - * ARROW-6136: make sure that the Flight implementation doesn't double-close the server-to-client - * stream. - */ - @Test - public void arrow6136() { - final Schema schema = new Schema(Collections.emptyList()); - test( - (allocator, client) -> { - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final FlightDescriptor descriptor = FlightDescriptor.command(COMMAND_ARROW_6136); - - final PutListener listener = new SyncPutListener(); - final FlightClient.ClientStreamListener writer = - client.startPut(descriptor, root, listener); - // Must attempt to retrieve the result to get any server-side errors. - final CallStatus status = - FlightTestUtil.assertCode(FlightStatusCode.INTERNAL, writer::getResult); - assertEquals(MESSAGE_ARROW_6136, status.description()); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - - /** Ensure that a client can send metadata to the server. */ - @Test - @Disabled - public void uploadMetadataAsync() { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - test( - (allocator, client) -> { - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final FlightDescriptor descriptor = FlightDescriptor.path("test"); - - final PutListener listener = - new AsyncPutListener() { - int counter = 0; - - @Override - public void onNext(PutResult val) { - assertNotNull(val); - assertEquals(counter, val.getApplicationMetadata().getByte(0)); - counter++; - } - }; - final FlightClient.ClientStreamListener writer = - client.startPut(descriptor, root, listener); - - root.allocateNew(); - for (byte i = 0; i < 10; i++) { - final IntVector vector = (IntVector) root.getVector("a"); - final ArrowBuf metadata = allocator.buffer(1); - metadata.writeByte(i); - vector.set(0, 10); - vector.setValueCount(1); - root.setRowCount(1); - writer.putNext(metadata); - } - writer.completed(); - // Must attempt to retrieve the result to get any server-side errors. - writer.getResult(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - - /** Ensure that a client can send metadata to the server. Uses the synchronous API. */ - @Test - @Disabled - public void uploadMetadataSync() { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - test( - (allocator, client) -> { - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - final SyncPutListener listener = new SyncPutListener()) { - final FlightDescriptor descriptor = FlightDescriptor.path("test"); - final FlightClient.ClientStreamListener writer = - client.startPut(descriptor, root, listener); - - root.allocateNew(); - for (byte i = 0; i < 10; i++) { - final IntVector vector = (IntVector) root.getVector("a"); - final ArrowBuf metadata = allocator.buffer(1); - metadata.writeByte(i); - vector.set(0, 10); - vector.setValueCount(1); - root.setRowCount(1); - writer.putNext(metadata); - try (final PutResult message = listener.poll(5000, TimeUnit.SECONDS)) { - assertNotNull(message); - assertEquals(i, message.getApplicationMetadata().getByte(0)); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } - } - writer.completed(); - // Must attempt to retrieve the result to get any server-side errors. - writer.getResult(); - } - }); - } - - /** Make sure that a {@link SyncPutListener} properly reclaims memory if ignored. */ - @Test - @Disabled - public void syncMemoryReclaimed() { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - test( - (allocator, client) -> { - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - final SyncPutListener listener = new SyncPutListener()) { - final FlightDescriptor descriptor = FlightDescriptor.path("test"); - final FlightClient.ClientStreamListener writer = - client.startPut(descriptor, root, listener); - - root.allocateNew(); - for (byte i = 0; i < 10; i++) { - final IntVector vector = (IntVector) root.getVector("a"); - final ArrowBuf metadata = allocator.buffer(1); - metadata.writeByte(i); - vector.set(0, 10); - vector.setValueCount(1); - root.setRowCount(1); - writer.putNext(metadata); - } - writer.completed(); - // Must attempt to retrieve the result to get any server-side errors. - writer.getResult(); - } - }); - } - - /** - * ARROW-9221: Flight copies metadata from the byte buffer of a Protobuf ByteString, which is in - * big-endian by default, thus mangling metadata. - */ - @Test - public void testMetadataEndianness() throws Exception { - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final BufferAllocator serverAllocator = - allocator.newChildAllocator("flight-server", 0, Long.MAX_VALUE); - final FlightServer server = - FlightServer.builder( - serverAllocator, - forGrpcInsecure(LOCALHOST, 0), - new EndianFlightProducer(serverAllocator)) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, server.getLocation()).build()) { - final Schema schema = new Schema(Collections.emptyList()); - final FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]); - try (final SyncPutListener reader = new SyncPutListener(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final FlightClient.ClientStreamListener writer = client.startPut(descriptor, root, reader); - writer.completed(); - try (final PutResult metadata = reader.read()) { - assertEquals(16, metadata.getApplicationMetadata().readableBytes()); - byte[] bytes = new byte[16]; - metadata.getApplicationMetadata().readBytes(bytes); - assertArrayEquals(EndianFlightProducer.EXPECTED_BYTES, bytes); - } - writer.getResult(); - } - } - } - - private void test(BiConsumer fun) { - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final FlightServer s = - FlightServer.builder( - allocator, forGrpcInsecure(LOCALHOST, 0), new MetadataFlightProducer(allocator)) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) { - fun.accept(allocator, client); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - /** A FlightProducer that always produces a fixed data stream with metadata on the side. */ - private static class MetadataFlightProducer extends NoOpFlightProducer { - - private final BufferAllocator allocator; - - public MetadataFlightProducer(BufferAllocator allocator) { - this.allocator = allocator; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - root.allocateNew(); - listener.start(root); - for (byte i = 0; i < 10; i++) { - final IntVector vector = (IntVector) root.getVector("a"); - vector.set(0, 10); - vector.setValueCount(1); - root.setRowCount(1); - final ArrowBuf metadata = allocator.buffer(1); - metadata.writeByte(i); - listener.putNext(metadata); - } - listener.completed(); - } - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream stream, StreamListener ackStream) { - return () -> { - // Wait for the descriptor to be sent - stream.getRoot(); - if (stream.getDescriptor().isCommand() - && Arrays.equals(stream.getDescriptor().getCommand(), COMMAND_ARROW_6136)) { - // ARROW-6136: Try closing the stream - ackStream.onError( - CallStatus.INTERNAL.withDescription(MESSAGE_ARROW_6136).toRuntimeException()); - return; - } - try { - byte current = 0; - while (stream.next()) { - final ArrowBuf metadata = stream.getLatestMetadata(); - if (current != metadata.getByte(0)) { - ackStream.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Metadata does not match expected value; got %d but expected %d.", - metadata.getByte(0), current)) - .toRuntimeException()); - return; - } - ackStream.onNext(PutResult.metadata(metadata)); - current++; - } - if (current != 10) { - throw CallStatus.INVALID_ARGUMENT - .withDescription("Wrong number of messages sent.") - .toRuntimeException(); - } - } catch (Exception e) { - throw CallStatus.INTERNAL.withCause(e).withDescription(e.toString()).toRuntimeException(); - } - }; - } - } - - private static class EndianFlightProducer extends NoOpFlightProducer { - static final byte[] EXPECTED_BYTES = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - private final BufferAllocator allocator; - - private EndianFlightProducer(BufferAllocator allocator) { - this.allocator = allocator; - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - return () -> { - while (flightStream.next()) { - // Ignore any data - } - - try (final ArrowBuf buf = allocator.buffer(16)) { - buf.writeBytes(EXPECTED_BYTES); - ackStream.onNext(PutResult.metadata(buf)); - } - ackStream.onCompleted(); - }; - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestAuth.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestAuth.java deleted file mode 100644 index bd582e29a4332..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestAuth.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.util.Iterator; -import java.util.Optional; -import org.apache.arrow.flight.auth.ClientAuthHandler; -import org.apache.arrow.flight.auth.ServerAuthHandler; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -public class TestAuth { - - /** An auth handler that does not send messages should not block the server forever. */ - @Test - public void noMessages() throws Exception { - assertThrows( - RuntimeException.class, - () -> { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final FlightServer s = - FlightServer.builder( - allocator, forGrpcInsecure(LOCALHOST, 0), new NoOpFlightProducer()) - .authHandler(new OneshotAuthHandler()) - .build() - .start(); - final FlightClient client = - FlightClient.builder(allocator, s.getLocation()).build()) { - client.authenticate( - new ClientAuthHandler() { - @Override - public void authenticate(ClientAuthSender outgoing, Iterator incoming) {} - - @Override - public byte[] getCallToken() { - return new byte[0]; - } - }); - } - }); - } - - /** An auth handler that sends an error should not block the server forever. */ - @Test - public void clientError() throws Exception { - assertThrows( - RuntimeException.class, - () -> { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final FlightServer s = - FlightServer.builder( - allocator, forGrpcInsecure(LOCALHOST, 0), new NoOpFlightProducer()) - .authHandler(new OneshotAuthHandler()) - .build() - .start(); - final FlightClient client = - FlightClient.builder(allocator, s.getLocation()).build()) { - client.authenticate( - new ClientAuthHandler() { - @Override - public void authenticate(ClientAuthSender outgoing, Iterator incoming) { - outgoing.send(new byte[0]); - // Ensure the server-side runs - incoming.next(); - outgoing.onError(new RuntimeException("test")); - } - - @Override - public byte[] getCallToken() { - return new byte[0]; - } - }); - } - }); - } - - private static class OneshotAuthHandler implements ServerAuthHandler { - - @Override - public Optional isValid(byte[] token) { - return Optional.of("test"); - } - - @Override - public boolean authenticate(ServerAuthSender outgoing, Iterator incoming) { - incoming.next(); - outgoing.send(new byte[0]); - return false; - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java deleted file mode 100644 index 52891dd9e7175..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.common.collect.ImmutableList; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Function; -import org.apache.arrow.flight.perf.PerformanceTestServer; -import org.apache.arrow.flight.perf.TestPerf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class TestBackPressure { - - private static final int BATCH_SIZE = 4095; - - /** Make sure that failing to consume one stream doesn't block other streams. */ - @Disabled - @Test - public void ensureIndependentSteams() throws Exception { - ensureIndependentSteams((b) -> (location -> new PerformanceTestServer(b, location))); - } - - /** Make sure that failing to consume one stream doesn't block other streams. */ - @Disabled - @Test - public void ensureIndependentSteamsWithCallbacks() throws Exception { - ensureIndependentSteams( - (b) -> - (location -> - new PerformanceTestServer( - b, location, new BackpressureStrategy.CallbackBackpressureStrategy(), true))); - } - - /** Test to make sure stream doesn't go faster than the consumer is consuming. */ - @Disabled - @Test - public void ensureWaitUntilProceed() throws Exception { - ensureWaitUntilProceed(new PollingBackpressureStrategy(), false); - } - - /** - * Test to make sure stream doesn't go faster than the consumer is consuming using a - * callback-based backpressure strategy. - */ - @Disabled - @Test - public void ensureWaitUntilProceedWithCallbacks() throws Exception { - ensureWaitUntilProceed(new RecordingCallbackBackpressureStrategy(), true); - } - - /** Make sure that failing to consume one stream doesn't block other streams. */ - private static void ensureIndependentSteams( - Function> serverConstructor) - throws Exception { - try (final BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - final PerformanceTestServer server = - serverConstructor.apply(a).apply(forGrpcInsecure(LOCALHOST, 0)).start(); - final FlightClient client = FlightClient.builder(a, server.getLocation()).build()) { - try (FlightStream fs1 = - client.getStream( - client - .getInfo(TestPerf.getPerfFlightDescriptor(110L * BATCH_SIZE, BATCH_SIZE, 1)) - .getEndpoints() - .get(0) - .getTicket())) { - consume(fs1, 10); - - // stop consuming fs1 but make sure we can consume a large amount of fs2. - try (FlightStream fs2 = - client.getStream( - client - .getInfo(TestPerf.getPerfFlightDescriptor(200L * BATCH_SIZE, BATCH_SIZE, 1)) - .getEndpoints() - .get(0) - .getTicket())) { - consume(fs2, 100); - - consume(fs1, 100); - consume(fs2, 100); - - consume(fs1); - consume(fs2); - } - } - } - } - - /** Make sure that a stream doesn't go faster than the consumer is consuming. */ - private static void ensureWaitUntilProceed( - SleepTimeRecordingBackpressureStrategy bpStrategy, boolean isNonBlocking) throws Exception { - // request some values. - final long wait = 3000; - final long epsilon = 1000; - - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { - - final FlightProducer producer = - new NoOpFlightProducer() { - - @Override - public void getStream( - CallContext context, Ticket ticket, ServerStreamListener listener) { - bpStrategy.register(listener); - final Runnable loadData = - () -> { - int batches = 0; - final Schema pojoSchema = - new Schema( - ImmutableList.of(Field.nullable("a", MinorType.BIGINT.getType()))); - try (VectorSchemaRoot root = VectorSchemaRoot.create(pojoSchema, allocator)) { - listener.start(root); - while (true) { - bpStrategy.waitForListener(0); - if (batches > 100) { - root.clear(); - listener.completed(); - return; - } - - root.allocateNew(); - root.setRowCount(4095); - listener.putNext(); - batches++; - } - } - }; - - if (!isNonBlocking) { - loadData.run(); - } else { - final ExecutorService service = Executors.newSingleThreadExecutor(); - Future unused = service.submit(loadData); - service.shutdown(); - } - } - }; - - try (BufferAllocator serverAllocator = - allocator.newChildAllocator("server", 0, Long.MAX_VALUE); - FlightServer server = - FlightServer.builder(serverAllocator, forGrpcInsecure(LOCALHOST, 0), producer) - .build() - .start(); - BufferAllocator clientAllocator = - allocator.newChildAllocator("client", 0, Long.MAX_VALUE); - FlightClient client = - FlightClient.builder(clientAllocator, server.getLocation()).build(); - FlightStream stream = client.getStream(new Ticket(new byte[1]))) { - VectorSchemaRoot root = stream.getRoot(); - root.clear(); - Thread.sleep(wait); - while (stream.next()) { - root.clear(); - } - long expected = wait - epsilon; - assertTrue( - bpStrategy.getSleepTime() > expected, - String.format( - "Expected a sleep of at least %dms but only slept for %d", - expected, bpStrategy.getSleepTime())); - } - } - } - - private static void consume(FlightStream stream) { - VectorSchemaRoot root = stream.getRoot(); - while (stream.next()) { - root.clear(); - } - } - - private static void consume(FlightStream stream, int batches) { - VectorSchemaRoot root = stream.getRoot(); - while (batches > 0 && stream.next()) { - root.clear(); - batches--; - } - } - - private interface SleepTimeRecordingBackpressureStrategy extends BackpressureStrategy { - /** - * Returns the total time spent waiting on the listener to be ready. - * - * @return the total time spent waiting on the listener to be ready. - */ - long getSleepTime(); - } - - /** - * Implementation of a backpressure strategy that polls on isReady and records amount of time - * spent in Thread.sleep(). - */ - private static class PollingBackpressureStrategy - implements SleepTimeRecordingBackpressureStrategy { - private final AtomicLong sleepTime = new AtomicLong(0); - private FlightProducer.ServerStreamListener listener; - - @Override - public long getSleepTime() { - return sleepTime.get(); - } - - @Override - public void register(FlightProducer.ServerStreamListener listener) { - this.listener = listener; - } - - @Override - public WaitResult waitForListener(long timeout) { - while (!listener.isReady()) { - try { - Thread.sleep(1); - sleepTime.addAndGet(1L); - } catch (InterruptedException expected) { - // it is expected and no action needed - } - } - return WaitResult.READY; - } - } - - /** - * Implementation of a backpressure strategy that uses callbacks to detect changes in client - * readiness state and records spent time waiting. - */ - private static class RecordingCallbackBackpressureStrategy - extends BackpressureStrategy.CallbackBackpressureStrategy - implements SleepTimeRecordingBackpressureStrategy { - private final AtomicLong sleepTime = new AtomicLong(0); - - @Override - public long getSleepTime() { - return sleepTime.get(); - } - - @Override - public WaitResult waitForListener(long timeout) { - final long startTime = System.currentTimeMillis(); - final WaitResult result = super.waitForListener(timeout); - sleepTime.addAndGet(System.currentTimeMillis() - startTime); - return result; - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java deleted file mode 100644 index 5e818e6f5d02e..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java +++ /dev/null @@ -1,686 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import com.google.common.base.Charsets; -import com.google.protobuf.ByteString; -import io.grpc.MethodDescriptor; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.InetSocketAddress; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.function.BiConsumer; -import java.util.function.Consumer; -import org.apache.arrow.flight.FlightClient.ClientStreamListener; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.flight.impl.Flight.FlightDescriptor.DescriptorType; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ipc.WriteChannel; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnOs; -import org.junit.jupiter.api.condition.OS; - -/** Test the operations of a basic flight service. */ -public class TestBasicOperation { - - @Test - public void fastPathDefaults() { - assertTrue(ArrowMessage.ENABLE_ZERO_COPY_READ); - assertFalse(ArrowMessage.ENABLE_ZERO_COPY_WRITE); - } - - @Test - public void fallbackLocation() { - assertEquals( - "arrow-flight-reuse-connection://?", Location.reuseConnection().getUri().toString()); - } - - /** ARROW-6017: we should be able to construct locations for unknown schemes. */ - @Test - public void unknownScheme() throws URISyntaxException { - final Location location = new Location("s3://unknown"); - assertEquals("s3", location.getUri().getScheme()); - } - - @Test - public void unknownSchemeRemote() throws Exception { - test( - c -> { - try { - final FlightInfo info = c.getInfo(FlightDescriptor.path("test")); - assertEquals( - new URI("https://example.com"), - info.getEndpoints().get(0).getLocations().get(0).getUri()); - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } - }); - } - - @Test - public void roundTripTicket() throws Exception { - final Ticket ticket = new Ticket(new byte[] {0, 1, 2, 3, 4, 5}); - assertEquals(ticket, Ticket.deserialize(ticket.serialize())); - } - - @Test - public void roundTripInfo() throws Exception { - final Map metadata = new HashMap<>(); - metadata.put("foo", "bar"); - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("a", new ArrowType.Int(32, true)), - Field.nullable("b", new ArrowType.FixedSizeBinary(32))), - metadata); - final FlightInfo info1 = - FlightInfo.builder(schema, FlightDescriptor.path(), Collections.emptyList()) - .setAppMetadata("foo".getBytes(StandardCharsets.UTF_8)) - .build(); - final FlightInfo info2 = - new FlightInfo( - schema, - FlightDescriptor.command(new byte[2]), - Collections.singletonList( - FlightEndpoint.builder( - new Ticket(new byte[10]), Location.forGrpcDomainSocket("/tmp/test.sock")) - .setAppMetadata("bar".getBytes(StandardCharsets.UTF_8)) - .build()), - 200, - 500); - final FlightInfo info3 = - new FlightInfo( - schema, - FlightDescriptor.path("a", "b"), - Arrays.asList( - new FlightEndpoint( - new Ticket(new byte[10]), Location.forGrpcDomainSocket("/tmp/test.sock")), - new FlightEndpoint( - new Ticket(new byte[10]), - Location.forGrpcDomainSocket("/tmp/test.sock"), - forGrpcInsecure("localhost", 50051))), - 200, - 500); - final FlightInfo info4 = - new FlightInfo( - schema, - FlightDescriptor.path("a", "b"), - Arrays.asList( - new FlightEndpoint( - new Ticket(new byte[10]), Location.forGrpcDomainSocket("/tmp/test.sock")), - new FlightEndpoint( - new Ticket(new byte[10]), - Location.forGrpcDomainSocket("/tmp/test.sock"), - forGrpcInsecure("localhost", 50051))), - 200, - 500, /*ordered*/ - true, - IpcOption.DEFAULT); - - assertEquals(info1, FlightInfo.deserialize(info1.serialize())); - assertEquals(info2, FlightInfo.deserialize(info2.serialize())); - assertEquals(info3, FlightInfo.deserialize(info3.serialize())); - assertEquals(info4, FlightInfo.deserialize(info4.serialize())); - - assertNotEquals(info3, info4); - - assertFalse(info1.getOrdered()); - assertFalse(info2.getOrdered()); - assertFalse(info3.getOrdered()); - assertTrue(info4.getOrdered()); - } - - @Test - public void roundTripDescriptor() throws Exception { - final FlightDescriptor cmd = - FlightDescriptor.command("test command".getBytes(StandardCharsets.UTF_8)); - assertEquals(cmd, FlightDescriptor.deserialize(cmd.serialize())); - final FlightDescriptor path = FlightDescriptor.path("foo", "bar", "test.arrow"); - assertEquals(path, FlightDescriptor.deserialize(path.serialize())); - } - - @Test - public void getDescriptors() throws Exception { - test( - c -> { - int count = 0; - for (FlightInfo unused : c.listFlights(Criteria.ALL)) { - count += 1; - } - assertEquals(1, count); - }); - } - - @Test - public void getDescriptorsWithCriteria() throws Exception { - test( - c -> { - int count = 0; - for (FlightInfo unused : c.listFlights(new Criteria(new byte[] {1}))) { - - count += 1; - } - assertEquals(0, count); - }); - } - - @Test - public void getDescriptor() throws Exception { - test( - c -> { - System.out.println(c.getInfo(FlightDescriptor.path("hello")).getDescriptor()); - }); - } - - @Test - public void getSchema() throws Exception { - test( - c -> { - System.out.println(c.getSchema(FlightDescriptor.path("hello")).getSchema()); - }); - } - - @Test - public void listActions() throws Exception { - test( - c -> { - for (ActionType at : c.listActions()) { - System.out.println(at.getType()); - } - }); - } - - @Test - public void doAction() throws Exception { - test( - c -> { - Iterator stream = c.doAction(new Action("hello")); - - assertTrue(stream.hasNext()); - Result r = stream.next(); - assertArrayEquals("world".getBytes(Charsets.UTF_8), r.getBody()); - }); - test( - c -> { - Iterator stream = c.doAction(new Action("hellooo")); - - assertTrue(stream.hasNext()); - Result r = stream.next(); - assertArrayEquals("world".getBytes(Charsets.UTF_8), r.getBody()); - - assertTrue(stream.hasNext()); - r = stream.next(); - assertArrayEquals("!".getBytes(Charsets.UTF_8), r.getBody()); - assertFalse(stream.hasNext()); - }); - } - - @Test - public void putStream() throws Exception { - test( - (c, a) -> { - final int size = 10; - - IntVector iv = new IntVector("c1", a); - - try (VectorSchemaRoot root = VectorSchemaRoot.of(iv)) { - ClientStreamListener listener = - c.startPut(FlightDescriptor.path("hello"), root, new AsyncPutListener()); - - // batch 1 - root.allocateNew(); - for (int i = 0; i < size; i++) { - iv.set(i, i); - } - iv.setValueCount(size); - root.setRowCount(size); - listener.putNext(); - - // batch 2 - - root.allocateNew(); - for (int i = 0; i < size; i++) { - iv.set(i, i + size); - } - iv.setValueCount(size); - root.setRowCount(size); - listener.putNext(); - root.clear(); - listener.completed(); - - // wait for ack to avoid memory leaks. - listener.getResult(); - } - }); - } - - @Test - public void propagateErrors() throws Exception { - test( - client -> { - FlightTestUtil.assertCode( - FlightStatusCode.UNIMPLEMENTED, - () -> { - client.doAction(new Action("invalid-action")).forEachRemaining(action -> fail()); - }); - }); - } - - @Test - public void getStream() throws Exception { - test( - c -> { - try (final FlightStream stream = c.getStream(new Ticket(new byte[0]))) { - VectorSchemaRoot root = stream.getRoot(); - IntVector iv = (IntVector) root.getVector("c1"); - int value = 0; - while (stream.next()) { - for (int i = 0; i < root.getRowCount(); i++) { - assertEquals(value, iv.get(i)); - value++; - } - } - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - - /** Ensure the client is configured to accept large messages. */ - @Test - @DisabledOnOs( - value = {OS.WINDOWS}, - disabledReason = "https://github.com/apache/arrow/issues/33237: flaky test") - public void getStreamLargeBatch() throws Exception { - test( - c -> { - try (final FlightStream stream = c.getStream(new Ticket(Producer.TICKET_LARGE_BATCH))) { - assertEquals(128, stream.getRoot().getFieldVectors().size()); - assertTrue(stream.next()); - assertEquals(65536, stream.getRoot().getRowCount()); - assertTrue(stream.next()); - assertEquals(65536, stream.getRoot().getRowCount()); - assertFalse(stream.next()); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - - /** Ensure the server is configured to accept large messages. */ - @Test - public void startPutLargeBatch() throws Exception { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { - final List vectors = new ArrayList<>(); - for (int col = 0; col < 128; col++) { - final BigIntVector vector = new BigIntVector("f" + col, allocator); - for (int row = 0; row < 65536; row++) { - vector.setSafe(row, row); - } - vectors.add(vector); - } - test( - c -> { - try (final VectorSchemaRoot root = new VectorSchemaRoot(vectors)) { - root.setRowCount(65536); - final ClientStreamListener stream = - c.startPut(FlightDescriptor.path(""), root, new SyncPutListener()); - stream.putNext(); - stream.putNext(); - stream.completed(); - stream.getResult(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - } - - private void test(Consumer consumer) throws Exception { - test( - (c, a) -> { - consumer.accept(c); - }); - } - - private void test(BiConsumer consumer) throws Exception { - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - Producer producer = new Producer(a); - FlightServer s = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), producer).build().start()) { - - try (FlightClient c = FlightClient.builder(a, s.getLocation()).build()) { - try (BufferAllocator testAllocator = a.newChildAllocator("testcase", 0, Long.MAX_VALUE)) { - consumer.accept(c, testAllocator); - } - } - } - } - - /** Helper method to convert an ArrowMessage into a Protobuf message. */ - private Flight.FlightData arrowMessageToProtobuf( - MethodDescriptor.Marshaller marshaller, ArrowMessage message) - throws IOException { - final ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (final InputStream serialized = marshaller.stream(message)) { - final byte[] buf = new byte[1024]; - while (true) { - int read = serialized.read(buf); - if (read < 0) { - break; - } - baos.write(buf, 0, read); - } - } - final byte[] serializedMessage = baos.toByteArray(); - return Flight.FlightData.parseFrom(serializedMessage); - } - - /** - * ARROW-10962: accept FlightData messages generated by Protobuf (which can omit empty fields). - */ - @Test - public void testProtobufRecordBatchCompatibility() throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("foo", new ArrowType.Int(32, true)))); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final VectorUnloader unloader = new VectorUnloader(root); - root.setRowCount(0); - final MethodDescriptor.Marshaller marshaller = - ArrowMessage.createMarshaller(allocator); - try (final ArrowMessage message = - new ArrowMessage( - unloader.getRecordBatch(), /* appMetadata */ - null, /* tryZeroCopy */ - false, - IpcOption.DEFAULT)) { - assertEquals(ArrowMessage.HeaderType.RECORD_BATCH, message.getMessageType()); - // Should have at least one empty body buffer (there may be multiple for e.g. data and - // validity) - Iterator iterator = message.getBufs().iterator(); - assertTrue(iterator.hasNext()); - while (iterator.hasNext()) { - assertEquals(0, iterator.next().capacity()); - } - final Flight.FlightData protobufData = - arrowMessageToProtobuf(marshaller, message).toBuilder().clearDataBody().build(); - assertEquals(0, protobufData.getDataBody().size()); - ArrowMessage parsedMessage = - marshaller.parse(new ByteArrayInputStream(protobufData.toByteArray())); - // Should have an empty body buffer - Iterator parsedIterator = parsedMessage.getBufs().iterator(); - assertTrue(parsedIterator.hasNext()); - assertEquals(0, parsedIterator.next().capacity()); - // Should have only one (the parser synthesizes exactly one); in the case of empty buffers, - // this is equivalent - assertFalse(parsedIterator.hasNext()); - // Should not throw - final ArrowRecordBatch rb = parsedMessage.asRecordBatch(); - assertEquals(rb.computeBodyLength(), 0); - } - } - } - - /** - * ARROW-10962: accept FlightData messages generated by Protobuf (which can omit empty fields). - */ - @Test - public void testProtobufSchemaCompatibility() throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("foo", new ArrowType.Int(32, true)))); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { - final MethodDescriptor.Marshaller marshaller = - ArrowMessage.createMarshaller(allocator); - Flight.FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]).toProtocol(); - try (final ArrowMessage message = new ArrowMessage(descriptor, schema, IpcOption.DEFAULT)) { - assertEquals(ArrowMessage.HeaderType.SCHEMA, message.getMessageType()); - // Should have no body buffers - assertFalse(message.getBufs().iterator().hasNext()); - final Flight.FlightData protobufData = - arrowMessageToProtobuf(marshaller, message) - .toBuilder() - .setDataBody(ByteString.EMPTY) - .build(); - assertEquals(0, protobufData.getDataBody().size()); - final ArrowMessage parsedMessage = - marshaller.parse(new ByteArrayInputStream(protobufData.toByteArray())); - // Should have no body buffers - assertFalse(parsedMessage.getBufs().iterator().hasNext()); - // Should not throw - parsedMessage.asSchema(); - } - } - } - - @Test - public void testGrpcInsecureLocation() throws Exception { - Location location = Location.forGrpcInsecure(LOCALHOST, 9000); - assertEquals( - new URI(LocationSchemes.GRPC_INSECURE, null, LOCALHOST, 9000, null, null, null), - location.getUri()); - assertEquals(new InetSocketAddress(LOCALHOST, 9000), location.toSocketAddress()); - } - - @Test - public void testGrpcTlsLocation() throws Exception { - Location location = Location.forGrpcTls(LOCALHOST, 9000); - assertEquals( - new URI(LocationSchemes.GRPC_TLS, null, LOCALHOST, 9000, null, null, null), - location.getUri()); - assertEquals(new InetSocketAddress(LOCALHOST, 9000), location.toSocketAddress()); - } - - /** An example FlightProducer for test purposes. */ - public static class Producer implements FlightProducer, AutoCloseable { - static final byte[] TICKET_LARGE_BATCH = "large-batch".getBytes(StandardCharsets.UTF_8); - - private final BufferAllocator allocator; - - public Producer(BufferAllocator allocator) { - super(); - this.allocator = allocator; - } - - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) { - if (criteria.getExpression().length > 0) { - // Don't send anything if criteria are set - listener.onCompleted(); - } - - Flight.FlightInfo getInfo = - Flight.FlightInfo.newBuilder() - .setFlightDescriptor( - Flight.FlightDescriptor.newBuilder() - .setType(DescriptorType.CMD) - .setCmd(ByteString.copyFrom("cool thing", Charsets.UTF_8))) - .build(); - try { - listener.onNext(new FlightInfo(getInfo)); - } catch (URISyntaxException e) { - listener.onError(e); - return; - } - listener.onCompleted(); - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - return () -> { - while (flightStream.next()) { - // Drain the stream - } - }; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - if (Arrays.equals(TICKET_LARGE_BATCH, ticket.getBytes())) { - getLargeBatch(listener); - return; - } - final int size = 10; - - IntVector iv = new IntVector("c1", allocator); - VectorSchemaRoot root = VectorSchemaRoot.of(iv); - listener.start(root); - - // batch 1 - root.allocateNew(); - for (int i = 0; i < size; i++) { - iv.set(i, i); - } - iv.setValueCount(size); - root.setRowCount(size); - listener.putNext(); - - // batch 2 - - root.allocateNew(); - for (int i = 0; i < size; i++) { - iv.set(i, i + size); - } - iv.setValueCount(size); - root.setRowCount(size); - listener.putNext(); - root.clear(); - listener.completed(); - } - - private void getLargeBatch(ServerStreamListener listener) { - final List vectors = new ArrayList<>(); - for (int col = 0; col < 128; col++) { - final BigIntVector vector = new BigIntVector("f" + col, allocator); - for (int row = 0; row < 65536; row++) { - vector.setSafe(row, row); - } - vectors.add(vector); - } - try (final VectorSchemaRoot root = new VectorSchemaRoot(vectors)) { - root.setRowCount(65536); - listener.start(root); - listener.putNext(); - listener.putNext(); - listener.completed(); - } - } - - @Override - public void close() throws Exception { - allocator.close(); - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - try { - Flight.FlightInfo getInfo = - Flight.FlightInfo.newBuilder() - .setSchema(schemaToByteString(new Schema(Collections.emptyList()))) - .setFlightDescriptor( - Flight.FlightDescriptor.newBuilder() - .setType(DescriptorType.CMD) - .setCmd(ByteString.copyFrom("cool thing", Charsets.UTF_8))) - .addEndpoint( - Flight.FlightEndpoint.newBuilder() - .addLocation(new Location("https://example.com").toProtocol())) - .build(); - return new FlightInfo(getInfo); - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } - } - - private static ByteString schemaToByteString(Schema schema) { - try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - MessageSerializer.serialize( - new WriteChannel(Channels.newChannel(baos)), schema, IpcOption.DEFAULT); - return ByteString.copyFrom(baos.toByteArray()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - switch (action.getType()) { - case "hello": - { - listener.onNext(new Result("world".getBytes(Charsets.UTF_8))); - listener.onCompleted(); - break; - } - case "hellooo": - { - listener.onNext(new Result("world".getBytes(Charsets.UTF_8))); - listener.onNext(new Result("!".getBytes(Charsets.UTF_8))); - listener.onCompleted(); - break; - } - default: - listener.onError( - CallStatus.UNIMPLEMENTED - .withDescription("Action not implemented: " + action.getType()) - .toRuntimeException()); - } - } - - @Override - public void listActions(CallContext context, StreamListener listener) { - listener.onNext(new ActionType("get", "")); - listener.onNext(new ActionType("put", "")); - listener.onNext(new ActionType("hello", "")); - listener.onCompleted(); - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java deleted file mode 100644 index a54ce69812c8b..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import io.grpc.Metadata; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.time.Duration; -import java.time.Instant; -import java.util.Iterator; -import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class TestCallOptions { - - @Test - @Disabled - public void timeoutFires() { - // Ignored due to CI flakiness - test( - (client) -> { - Instant start = Instant.now(); - Iterator results = - client.doAction(new Action("hang"), CallOptions.timeout(1, TimeUnit.SECONDS)); - try { - results.next(); - fail("Call should have failed"); - } catch (RuntimeException e) { - assertTrue(e.getMessage().contains("deadline exceeded"), e.getMessage()); - } - Instant end = Instant.now(); - assertTrue( - Duration.between(start, end).toMillis() < 1500, - "Call took over 1500 ms despite timeout"); - }); - } - - @Test - @Disabled - public void underTimeout() { - // Ignored due to CI flakiness - test( - (client) -> { - Instant start = Instant.now(); - // This shouldn't fail and it should complete within the timeout - Iterator results = - client.doAction(new Action("fast"), CallOptions.timeout(2, TimeUnit.SECONDS)); - assertArrayEquals(new byte[] {42, 42}, results.next().getBody()); - Instant end = Instant.now(); - assertTrue( - Duration.between(start, end).toMillis() < 2500, - "Call took over 2500 ms despite timeout"); - }); - } - - @Test - public void singleProperty() { - final FlightCallHeaders headers = new FlightCallHeaders(); - headers.insert("key", "value"); - testHeaders(headers); - } - - @Test - public void multipleProperties() { - final FlightCallHeaders headers = new FlightCallHeaders(); - headers.insert("key", "value"); - headers.insert("key2", "value2"); - testHeaders(headers); - } - - @Test - public void binaryProperties() { - final FlightCallHeaders headers = new FlightCallHeaders(); - headers.insert("key-bin", "value".getBytes(StandardCharsets.UTF_8)); - headers.insert("key3-bin", "ëfßæ".getBytes(StandardCharsets.UTF_8)); - testHeaders(headers); - } - - @Test - public void mixedProperties() { - final FlightCallHeaders headers = new FlightCallHeaders(); - headers.insert("key", "value"); - headers.insert("key3-bin", "ëfßæ".getBytes(StandardCharsets.UTF_8)); - testHeaders(headers); - } - - private void testHeaders(CallHeaders headers) { - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - HeaderProducer producer = new HeaderProducer(); - FlightServer s = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), producer).build().start(); - FlightClient client = FlightClient.builder(a, s.getLocation()).build()) { - assertFalse(client.doAction(new Action(""), new HeaderCallOption(headers)).hasNext()); - final CallHeaders incomingHeaders = producer.headers(); - for (String key : headers.keys()) { - if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) { - assertArrayEquals(headers.getByte(key), incomingHeaders.getByte(key)); - } else { - assertEquals(headers.get(key), incomingHeaders.get(key)); - } - } - } catch (InterruptedException | IOException e) { - throw new RuntimeException(e); - } - } - - void test(Consumer testFn) { - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - Producer producer = new Producer(); - FlightServer s = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), producer).build().start(); - FlightClient client = FlightClient.builder(a, s.getLocation()).build()) { - testFn.accept(client); - } catch (InterruptedException | IOException e) { - throw new RuntimeException(e); - } - } - - static class HeaderProducer extends NoOpFlightProducer implements AutoCloseable { - CallHeaders headers; - - @Override - public void close() {} - - public CallHeaders headers() { - return headers; - } - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - this.headers = context.getMiddleware(FlightConstants.HEADER_KEY).headers(); - listener.onCompleted(); - } - } - - static class Producer extends NoOpFlightProducer implements AutoCloseable { - - Producer() {} - - @Override - public void close() {} - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - switch (action.getType()) { - case "hang": - { - try { - Thread.sleep(25000); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - listener.onNext(new Result(new byte[] {})); - listener.onCompleted(); - return; - } - case "fast": - { - try { - Thread.sleep(500); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - listener.onNext(new Result(new byte[] {42, 42})); - listener.onCompleted(); - return; - } - default: - { - throw new UnsupportedOperationException(action.getType()); - } - } - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java deleted file mode 100644 index a49406b121084..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java +++ /dev/null @@ -1,372 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.BiConsumer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -/** A basic test of client middleware using a simplified OpenTracing-like example. */ -public class TestClientMiddleware { - - /** - * Test that a client middleware can fail a call before it starts by throwing a {@link - * FlightRuntimeException}. - */ - @Test - public void clientMiddleware_failCallBeforeSending() { - test( - new NoOpFlightProducer(), - null, - Collections.singletonList(new CallRejector.Factory()), - (allocator, client) -> { - FlightTestUtil.assertCode(FlightStatusCode.UNAVAILABLE, client::listActions); - }); - } - - /** - * Test an OpenTracing-like scenario where client and server middleware work together to propagate - * a request ID without explicit intervention from the service implementation. - */ - @Test - public void middleware_propagateHeader() { - final Context context = new Context("span id"); - test( - new NoOpFlightProducer(), - new TestServerMiddleware.ServerMiddlewarePair<>( - FlightServerMiddleware.Key.of("test"), new ServerSpanInjector.Factory()), - Collections.singletonList(new ClientSpanInjector.Factory(context)), - (allocator, client) -> { - FlightTestUtil.assertCode( - FlightStatusCode.UNIMPLEMENTED, () -> client.listActions().forEach(actionType -> {})); - }); - assertEquals(context.outgoingSpanId, context.incomingSpanId); - assertNotNull(context.finalStatus); - assertEquals(FlightStatusCode.UNIMPLEMENTED, context.finalStatus.code()); - } - - /** - * Ensure both server and client can send and receive multi-valued headers (both binary and text - * values). - */ - @Test - public void testMultiValuedHeaders() { - final MultiHeaderClientMiddlewareFactory clientFactory = - new MultiHeaderClientMiddlewareFactory(); - test( - new NoOpFlightProducer(), - new TestServerMiddleware.ServerMiddlewarePair<>( - FlightServerMiddleware.Key.of("test"), new MultiHeaderServerMiddlewareFactory()), - Collections.singletonList(clientFactory), - (allocator, client) -> { - FlightTestUtil.assertCode( - FlightStatusCode.UNIMPLEMENTED, () -> client.listActions().forEach(actionType -> {})); - }); - // The server echoes the headers we send back to us, so ensure all the ones we sent are present - // with the correct - // values in the correct order. - for (final Map.Entry> entry : EXPECTED_BINARY_HEADERS.entrySet()) { - // Compare header values entry-by-entry because byte arrays don't compare via equals - final List receivedValues = clientFactory.lastBinaryHeaders.get(entry.getKey()); - assertNotNull(receivedValues, "Missing for header: " + entry.getKey()); - assertEquals( - entry.getValue().size(), - receivedValues.size(), - "Missing or wrong value for header: " + entry.getKey()); - for (int i = 0; i < entry.getValue().size(); i++) { - assertArrayEquals(entry.getValue().get(i), receivedValues.get(i)); - } - } - for (final Map.Entry> entry : EXPECTED_TEXT_HEADERS.entrySet()) { - assertEquals( - entry.getValue(), - clientFactory.lastTextHeaders.get(entry.getKey()), - "Missing or wrong value for header: " + entry.getKey()); - } - } - - private static void test( - FlightProducer producer, - TestServerMiddleware.ServerMiddlewarePair serverMiddleware, - List clientMiddleware, - BiConsumer body) { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { - final FlightServer.Builder serverBuilder = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer); - if (serverMiddleware != null) { - serverBuilder.middleware(serverMiddleware.key, serverMiddleware.factory); - } - final FlightServer server = serverBuilder.build().start(); - - FlightClient.Builder clientBuilder = FlightClient.builder(allocator, server.getLocation()); - clientMiddleware.forEach(clientBuilder::intercept); - try (final FlightServer ignored = server; - final FlightClient client = clientBuilder.build()) { - body.accept(allocator, client); - } - } catch (InterruptedException | IOException e) { - throw new RuntimeException(e); - } - } - - /** - * A server middleware component that reads a request ID from incoming headers and sends the - * request ID back on outgoing headers. - */ - static class ServerSpanInjector implements FlightServerMiddleware { - - private final String spanId; - - public ServerSpanInjector(String spanId) { - this.spanId = spanId; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - outgoingHeaders.insert("x-span", spanId); - } - - @Override - public void onCallCompleted(CallStatus status) {} - - @Override - public void onCallErrored(Throwable err) {} - - static class Factory implements FlightServerMiddleware.Factory { - - @Override - public ServerSpanInjector onCallStarted( - CallInfo info, CallHeaders incomingHeaders, RequestContext context) { - return new ServerSpanInjector(incomingHeaders.get("x-span")); - } - } - } - - /** - * A client middleware component that, given a mock OpenTracing-like "request context", sends the - * request ID in the context on outgoing headers and reads it from incoming headers. - */ - static class ClientSpanInjector implements FlightClientMiddleware { - - private final Context context; - - public ClientSpanInjector(Context context) { - this.context = context; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - outgoingHeaders.insert("x-span", context.outgoingSpanId); - } - - @Override - public void onHeadersReceived(CallHeaders incomingHeaders) { - context.incomingSpanId = incomingHeaders.get("x-span"); - } - - @Override - public void onCallCompleted(CallStatus status) { - context.finalStatus = status; - } - - static class Factory implements FlightClientMiddleware.Factory { - - private final Context context; - - Factory(Context context) { - this.context = context; - } - - @Override - public FlightClientMiddleware onCallStarted(CallInfo info) { - return new ClientSpanInjector(context); - } - } - } - - /** A mock OpenTracing-like "request context". */ - static class Context { - - final String outgoingSpanId; - String incomingSpanId; - CallStatus finalStatus; - - Context(String spanId) { - this.outgoingSpanId = spanId; - } - } - - /** A client middleware that fails outgoing calls. */ - static class CallRejector implements FlightClientMiddleware { - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {} - - @Override - public void onHeadersReceived(CallHeaders incomingHeaders) {} - - @Override - public void onCallCompleted(CallStatus status) {} - - static class Factory implements FlightClientMiddleware.Factory { - - @Override - public FlightClientMiddleware onCallStarted(CallInfo info) { - throw CallStatus.UNAVAILABLE.withDescription("Rejecting call.").toRuntimeException(); - } - } - } - - // Used to test that middleware can send and receive multi-valued text and binary headers. - static Map> EXPECTED_BINARY_HEADERS = new HashMap>(); - static Map> EXPECTED_TEXT_HEADERS = new HashMap>(); - - static { - EXPECTED_BINARY_HEADERS.put("x-binary-bin", Arrays.asList(new byte[] {0}, new byte[] {1})); - EXPECTED_TEXT_HEADERS.put("x-text", Arrays.asList("foo", "bar")); - } - - static class MultiHeaderServerMiddlewareFactory - implements FlightServerMiddleware.Factory { - @Override - public MultiHeaderServerMiddleware onCallStarted( - CallInfo info, CallHeaders incomingHeaders, RequestContext context) { - // Echo the headers back to the client. Copy values out of CallHeaders since the underlying - // gRPC metadata - // object isn't safe to use after this function returns. - Map> binaryHeaders = new HashMap<>(); - Map> textHeaders = new HashMap<>(); - for (final String key : incomingHeaders.keys()) { - if (key.endsWith("-bin")) { - binaryHeaders.compute( - key, - (ignored, values) -> { - if (values == null) { - values = new ArrayList<>(); - } - incomingHeaders.getAllByte(key).forEach(values::add); - return values; - }); - } else { - textHeaders.compute( - key, - (ignored, values) -> { - if (values == null) { - values = new ArrayList<>(); - } - incomingHeaders.getAll(key).forEach(values::add); - return values; - }); - } - } - return new MultiHeaderServerMiddleware(binaryHeaders, textHeaders); - } - } - - static class MultiHeaderServerMiddleware implements FlightServerMiddleware { - private final Map> binaryHeaders; - private final Map> textHeaders; - - MultiHeaderServerMiddleware( - Map> binaryHeaders, Map> textHeaders) { - this.binaryHeaders = binaryHeaders; - this.textHeaders = textHeaders; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - binaryHeaders.forEach( - (key, values) -> values.forEach(value -> outgoingHeaders.insert(key, value))); - textHeaders.forEach( - (key, values) -> values.forEach(value -> outgoingHeaders.insert(key, value))); - } - - @Override - public void onCallCompleted(CallStatus status) {} - - @Override - public void onCallErrored(Throwable err) {} - } - - static class MultiHeaderClientMiddlewareFactory implements FlightClientMiddleware.Factory { - Map> lastBinaryHeaders = null; - Map> lastTextHeaders = null; - - @Override - public FlightClientMiddleware onCallStarted(CallInfo info) { - return new MultiHeaderClientMiddleware(this); - } - } - - static class MultiHeaderClientMiddleware implements FlightClientMiddleware { - private final MultiHeaderClientMiddlewareFactory factory; - - public MultiHeaderClientMiddleware(MultiHeaderClientMiddlewareFactory factory) { - this.factory = factory; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - for (final Map.Entry> entry : EXPECTED_BINARY_HEADERS.entrySet()) { - entry.getValue().forEach((value) -> outgoingHeaders.insert(entry.getKey(), value)); - assertTrue(outgoingHeaders.containsKey(entry.getKey())); - } - for (final Map.Entry> entry : EXPECTED_TEXT_HEADERS.entrySet()) { - entry.getValue().forEach((value) -> outgoingHeaders.insert(entry.getKey(), value)); - assertTrue(outgoingHeaders.containsKey(entry.getKey())); - } - } - - @Override - public void onHeadersReceived(CallHeaders incomingHeaders) { - factory.lastBinaryHeaders = new HashMap<>(); - factory.lastTextHeaders = new HashMap<>(); - incomingHeaders - .keys() - .forEach( - header -> { - if (header.endsWith("-bin")) { - final List values = new ArrayList<>(); - incomingHeaders.getAllByte(header).forEach(values::add); - factory.lastBinaryHeaders.put(header, values); - } else { - final List values = new ArrayList<>(); - incomingHeaders.getAll(header).forEach(values::add); - factory.lastTextHeaders.put(header, values); - } - }); - } - - @Override - public void onCallCompleted(CallStatus status) {} - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java deleted file mode 100644 index 12d5161ac02ef..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.assertSame; - -import com.google.common.collect.ImmutableList; -import java.util.TreeSet; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link DictionaryUtils}. */ -public class TestDictionaryUtils { - - @Test - public void testReuseSchema() { - FieldType varcharType = new FieldType(true, new ArrowType.Utf8(), null); - FieldType intType = new FieldType(true, new ArrowType.Int(32, true), null); - - ImmutableList build = - ImmutableList.of( - new Field("stringCol", varcharType, null), new Field("intCol", intType, null)); - - Schema schema = new Schema(build); - Schema newSchema = DictionaryUtils.generateSchema(schema, null, new TreeSet<>()); - - // assert that no new schema is created. - assertSame(schema, newSchema); - } - - @Test - public void testCreateSchema() { - try (BufferAllocator allocator = new RootAllocator(1024)) { - DictionaryEncoding dictionaryEncoding = - new DictionaryEncoding(0, true, new ArrowType.Int(8, true)); - VarCharVector dictVec = new VarCharVector("dict vector", allocator); - Dictionary dictionary = new Dictionary(dictVec, dictionaryEncoding); - DictionaryProvider dictProvider = new DictionaryProvider.MapDictionaryProvider(dictionary); - TreeSet dictionaryUsed = new TreeSet<>(); - - FieldType encodedVarcharType = - new FieldType(true, new ArrowType.Int(8, true), dictionaryEncoding); - FieldType intType = new FieldType(true, new ArrowType.Int(32, true), null); - - ImmutableList build = - ImmutableList.of( - new Field("stringCol", encodedVarcharType, null), new Field("intCol", intType, null)); - - Schema schema = new Schema(build); - Schema newSchema = DictionaryUtils.generateSchema(schema, dictProvider, dictionaryUsed); - - // assert that a new schema is created. - assertNotSame(schema, newSchema); - - // assert the column is converted as expected - ArrowType newColType = newSchema.getFields().get(0).getType(); - assertEquals(new ArrowType.Utf8(), newColType); - - assertEquals(1, dictionaryUsed.size()); - assertEquals(0, dictionaryUsed.first()); - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java deleted file mode 100644 index 1cd6e95e3dfb0..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java +++ /dev/null @@ -1,672 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.stream.IntStream; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class TestDoExchange { - static byte[] EXCHANGE_DO_GET = "do-get".getBytes(StandardCharsets.UTF_8); - static byte[] EXCHANGE_DO_PUT = "do-put".getBytes(StandardCharsets.UTF_8); - static byte[] EXCHANGE_ECHO = "echo".getBytes(StandardCharsets.UTF_8); - static byte[] EXCHANGE_METADATA_ONLY = "only-metadata".getBytes(StandardCharsets.UTF_8); - static byte[] EXCHANGE_TRANSFORM = "transform".getBytes(StandardCharsets.UTF_8); - static byte[] EXCHANGE_CANCEL = "cancel".getBytes(StandardCharsets.UTF_8); - static byte[] EXCHANGE_ERROR = "error".getBytes(StandardCharsets.UTF_8); - - private BufferAllocator allocator; - private FlightServer server; - private FlightClient client; - - @BeforeEach - public void setUp() throws Exception { - allocator = new RootAllocator(Integer.MAX_VALUE); - final Location serverLocation = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, 0); - server = FlightServer.builder(allocator, serverLocation, new Producer(allocator)).build(); - server.start(); - final Location clientLocation = - Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, server.getPort()); - client = FlightClient.builder(allocator, clientLocation).build(); - } - - @AfterEach - public void tearDown() throws Exception { - AutoCloseables.close(client, server, allocator); - } - - /** Test a pure-metadata flow. */ - @Test - public void testDoExchangeOnlyMetadata() throws Exception { - // Send a particular descriptor to the server and check for a particular response pattern. - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_METADATA_ONLY))) { - final FlightStream reader = stream.getReader(); - - // Server starts by sending a message without data (hence no VectorSchemaRoot should be - // present) - assertTrue(reader.next()); - assertFalse(reader.hasRoot()); - assertEquals(42, reader.getLatestMetadata().getInt(0)); - - // Write a metadata message to the server (without sending any data) - ArrowBuf buf = allocator.buffer(4); - buf.writeInt(84); - stream.getWriter().putMetadata(buf); - - // Check that the server echoed the metadata back to us - assertTrue(reader.next()); - assertFalse(reader.hasRoot()); - assertEquals(84, reader.getLatestMetadata().getInt(0)); - - // Close our write channel and ensure the server also closes theirs - stream.getWriter().completed(); - assertFalse(reader.next()); - } - } - - /** Emulate a DoGet with a DoExchange. */ - @Test - public void testDoExchangeDoGet() throws Exception { - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_DO_GET))) { - final FlightStream reader = stream.getReader(); - VectorSchemaRoot root = reader.getRoot(); - IntVector iv = (IntVector) root.getVector("a"); - int value = 0; - while (reader.next()) { - for (int i = 0; i < root.getRowCount(); i++) { - assertFalse(iv.isNull(i), String.format("Row %d should not be null", value)); - assertEquals(value, iv.get(i)); - value++; - } - } - assertEquals(100, value); - } - } - - /** Emulate a DoPut with a DoExchange. */ - @Test - public void testDoExchangeDoPut() throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_DO_PUT)); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - IntVector iv = (IntVector) root.getVector("a"); - iv.allocateNew(); - - stream.getWriter().start(root); - int counter = 0; - for (int i = 0; i < 10; i++) { - ValueVectorDataPopulator.setVector( - iv, IntStream.range(0, i).boxed().toArray(Integer[]::new)); - root.setRowCount(i); - counter += i; - stream.getWriter().putNext(); - - assertTrue(stream.getReader().next()); - assertFalse(stream.getReader().hasRoot()); - // For each write, the server sends back a metadata message containing the index of the last - // written batch - final ArrowBuf metadata = stream.getReader().getLatestMetadata(); - assertEquals(counter, metadata.getInt(0)); - } - stream.getWriter().completed(); - - while (stream.getReader().next()) { - // Drain the stream. Otherwise closing the stream sends a CANCEL which seriously screws with - // the server. - // CANCEL -> runs onCancel handler -> closes the FlightStream early - } - } - } - - /** Test a DoExchange that echoes the client message. */ - @Test - public void testDoExchangeEcho() throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_ECHO)); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final FlightStream reader = stream.getReader(); - - // First try writing metadata without starting the Arrow data stream - ArrowBuf buf = allocator.buffer(4); - buf.writeInt(42); - stream.getWriter().putMetadata(buf); - buf = allocator.buffer(4); - buf.writeInt(84); - stream.getWriter().putMetadata(buf); - - // Ensure that the server echoes the metadata back, also without starting its data stream - assertTrue(reader.next()); - assertFalse(reader.hasRoot()); - assertEquals(42, reader.getLatestMetadata().getInt(0)); - assertTrue(reader.next()); - assertFalse(reader.hasRoot()); - assertEquals(84, reader.getLatestMetadata().getInt(0)); - - // Write data and check that it gets echoed back. - IntVector iv = (IntVector) root.getVector("a"); - iv.allocateNew(); - stream.getWriter().start(root); - for (int i = 0; i < 10; i++) { - iv.setSafe(0, i); - root.setRowCount(1); - stream.getWriter().putNext(); - - assertTrue(reader.next()); - assertNull(reader.getLatestMetadata()); - assertEquals(root.getSchema(), reader.getSchema()); - assertEquals(i, ((IntVector) reader.getRoot().getVector("a")).get(0)); - } - - // Complete the stream so that the server knows not to expect any more messages from us. - stream.getWriter().completed(); - // The server will end its side of the call, so this shouldn't block or indicate that - // there is more data. - assertFalse(reader.next(), "We should not be waiting for any messages"); - } - } - - /** Write some data, have it transformed, then read it back. */ - @Test - public void testTransform() throws Exception { - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("a", new ArrowType.Int(32, true)), - Field.nullable("b", new ArrowType.Int(32, true)))); - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_TRANSFORM))) { - // Write ten batches of data to the stream, where batch N contains N rows of data (N in [0, - // 10)) - final FlightStream reader = stream.getReader(); - final FlightClient.ClientStreamListener writer = stream.getWriter(); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - writer.start(root); - for (int batchIndex = 0; batchIndex < 10; batchIndex++) { - for (final FieldVector rawVec : root.getFieldVectors()) { - final IntVector vec = (IntVector) rawVec; - ValueVectorDataPopulator.setVector( - vec, IntStream.range(0, batchIndex).boxed().toArray(Integer[]::new)); - } - root.setRowCount(batchIndex); - writer.putNext(); - } - } - // Indicate that we're done writing so that the server does not expect more data. - writer.completed(); - - // Read back data. We expect the server to double each value in each row of each batch. - assertEquals(schema, reader.getSchema()); - final VectorSchemaRoot root = reader.getRoot(); - for (int batchIndex = 0; batchIndex < 10; batchIndex++) { - assertTrue(reader.next(), "Didn't receive batch #" + batchIndex); - assertEquals(batchIndex, root.getRowCount()); - for (final FieldVector rawVec : root.getFieldVectors()) { - final IntVector vec = (IntVector) rawVec; - for (int row = 0; row < batchIndex; row++) { - assertEquals(2 * row, vec.get(row)); - } - } - } - - // The server also sends back a metadata-only message containing the message count - assertTrue(reader.next(), "There should be one extra message"); - assertEquals(10, reader.getLatestMetadata().getInt(0)); - assertFalse(reader.next(), "There should be no more data"); - } - } - - /** Write some data, have it transformed, then read it back. Use the zero-copy optimization. */ - @Test - public void testTransformZeroCopy() throws Exception { - final int rowsPerBatch = 4096; - final Schema schema = - new Schema( - Arrays.asList( - Field.nullable("a", new ArrowType.Int(32, true)), - Field.nullable("b", new ArrowType.Int(32, true)))); - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_TRANSFORM))) { - // Write ten batches of data to the stream, where batch N contains 1024 rows of data (N in [0, - // 10)) - final FlightStream reader = stream.getReader(); - final FlightClient.ClientStreamListener writer = stream.getWriter(); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - writer.start(root); - // Enable the zero-copy optimization - writer.setUseZeroCopy(true); - for (int batchIndex = 0; batchIndex < 100; batchIndex++) { - for (final FieldVector rawVec : root.getFieldVectors()) { - final IntVector vec = (IntVector) rawVec; - for (int row = 0; row < rowsPerBatch; row++) { - // Use a value that'll be different per batch, so we can detect if we accidentally - // reuse a buffer (and overwrite a buffer that hasn't yet been sent over the network) - vec.setSafe(row, batchIndex + row); - } - } - root.setRowCount(rowsPerBatch); - writer.putNext(); - // Allocate new buffers every time since we don't know if gRPC has written the buffer - // to the network yet - root.allocateNew(); - } - } - // Indicate that we're done writing so that the server does not expect more data. - writer.completed(); - - // Read back data. We expect the server to double each value in each row of each batch. - assertEquals(schema, reader.getSchema()); - final VectorSchemaRoot root = reader.getRoot(); - for (int batchIndex = 0; batchIndex < 100; batchIndex++) { - assertTrue(reader.next(), "Didn't receive batch #" + batchIndex); - assertEquals(rowsPerBatch, root.getRowCount()); - for (final FieldVector rawVec : root.getFieldVectors()) { - final IntVector vec = (IntVector) rawVec; - for (int row = 0; row < rowsPerBatch; row++) { - assertEquals(2 * (batchIndex + row), vec.get(row)); - } - } - } - - // The server also sends back a metadata-only message containing the message count - assertTrue(reader.next(), "There should be one extra message"); - assertEquals(100, reader.getLatestMetadata().getInt(0)); - assertFalse(reader.next(), "There should be no more data"); - } - } - - /** Have the server immediately cancel; ensure the client doesn't hang. */ - @Test - public void testServerCancel() throws Exception { - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_CANCEL))) { - final FlightStream reader = stream.getReader(); - final FlightClient.ClientStreamListener writer = stream.getWriter(); - - final FlightRuntimeException fre = assertThrows(FlightRuntimeException.class, reader::next); - assertEquals(FlightStatusCode.CANCELLED, fre.status().code()); - assertEquals("expected", fre.status().description()); - - // Before, this would hang forever, because the writer checks if the stream is ready and not - // cancelled. - // However, the cancellation flag (was) only updated by reading, and the stream is never ready - // once the call ends. - // The test looks weird since normally, an application shouldn't try to write after the read - // fails. However, - // an application that isn't reading data wouldn't notice, and would instead get stuck on the - // write. - // Here, we read first to avoid a race condition in the test itself. - writer.putMetadata(allocator.getEmpty()); - } - } - - /** Have the server immediately cancel; ensure the server cleans up the FlightStream. */ - @Test - public void testServerCancelLeak() throws Exception { - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_CANCEL))) { - final FlightStream reader = stream.getReader(); - final FlightClient.ClientStreamListener writer = stream.getWriter(); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(Producer.SCHEMA, allocator)) { - writer.start(root); - final IntVector ints = (IntVector) root.getVector("a"); - for (int i = 0; i < 128; i++) { - for (int row = 0; row < 1024; row++) { - ints.setSafe(row, row); - } - root.setRowCount(1024); - writer.putNext(); - } - } - - final FlightRuntimeException fre = assertThrows(FlightRuntimeException.class, reader::next); - assertEquals(FlightStatusCode.CANCELLED, fre.status().code()); - assertEquals("expected", fre.status().description()); - } - } - - /** Have the client cancel without reading; ensure memory is not leaked. */ - @Test - @Disabled - public void testClientCancel() throws Exception { - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_DO_GET))) { - final FlightStream reader = stream.getReader(); - reader.cancel("", null); - // Cancel should be idempotent - reader.cancel("", null); - } - } - - /** Test a DoExchange error handling. */ - @Test - public void testDoExchangeError() throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_ERROR)); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final FlightStream reader = stream.getReader(); - - // Write data and check that it gets echoed back. - IntVector iv = (IntVector) root.getVector("a"); - iv.allocateNew(); - stream.getWriter().start(root); - for (int i = 0; i < 10; i++) { - iv.setSafe(0, i); - root.setRowCount(1); - stream.getWriter().putNext(); - - assertTrue(reader.next()); - assertEquals(root.getSchema(), reader.getSchema()); - assertEquals(i, ((IntVector) reader.getRoot().getVector("a")).get(0)); - } - - // Complete the stream so that the server knows not to expect any more messages from us. - stream.getWriter().completed(); - - // Must call reader.next() to get any errors after exchange, will return false if no error - final FlightRuntimeException fre = - assertThrows(FlightRuntimeException.class, stream::getResult); - assertEquals("error completing exchange", fre.status().description()); - } - } - - /** Have the client close the stream without reading; ensure memory is not leaked. */ - @Test - public void testClientClose() throws Exception { - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_DO_GET))) { - assertEquals(Producer.SCHEMA, stream.getReader().getSchema()); - } - // Intentionally leak the allocator in this test. gRPC has a bug where it does not wait for all - // calls to complete - // when shutting down the server, so this test will fail otherwise because it closes the - // allocator while the - // server-side call still has memory allocated. - // TODO(ARROW-9586): fix this once we track outstanding RPCs outside of gRPC. - // https://stackoverflow.com/questions/46716024/ - allocator = null; - client = null; - } - - /** Test closing with Metadata can't lead to error. */ - @Test - public void testCloseWithMetadata() throws Exception { - // Send a particular descriptor to the server and check for a particular response pattern. - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(EXCHANGE_METADATA_ONLY))) { - final FlightStream reader = stream.getReader(); - - // Server starts by sending a message without data (hence no VectorSchemaRoot should be - // present) - assertTrue(reader.next()); - assertFalse(reader.hasRoot()); - assertEquals(42, reader.getLatestMetadata().getInt(0)); - - // Write a metadata message to the server (without sending any data) - ArrowBuf buf = allocator.buffer(4); - buf.writeInt(84); - stream.getWriter().putMetadata(buf); - - // Check that the server echoed the metadata back to us - assertTrue(reader.next()); - assertFalse(reader.hasRoot()); - assertEquals(84, reader.getLatestMetadata().getInt(0)); - - // Close our write channel and ensure the server also closes theirs - stream.getWriter().completed(); - stream.getResult(); - - // Not necessary to close reader here, but check closing twice doesn't lead to negative refcnt - // from metadata - stream.getReader().close(); - } - } - - static class Producer extends NoOpFlightProducer { - static final Schema SCHEMA = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - private final BufferAllocator allocator; - - Producer(BufferAllocator allocator) { - this.allocator = allocator; - } - - @Override - public void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) { - if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_METADATA_ONLY)) { - metadataOnly(context, reader, writer); - } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_DO_GET)) { - doGet(context, reader, writer); - } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_DO_PUT)) { - doPut(context, reader, writer); - } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_ECHO)) { - echo(context, reader, writer); - } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_TRANSFORM)) { - transform(context, reader, writer); - } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_CANCEL)) { - cancel(context, reader, writer); - } else if (Arrays.equals(reader.getDescriptor().getCommand(), EXCHANGE_ERROR)) { - error(context, reader, writer); - } else { - writer.error( - CallStatus.UNIMPLEMENTED - .withDescription("Command not implemented") - .toRuntimeException()); - } - } - - /** Emulate DoGet. */ - private void doGet( - CallContext unusedContext, FlightStream unusedReader, ServerStreamListener writer) { - try (VectorSchemaRoot root = VectorSchemaRoot.create(SCHEMA, allocator)) { - writer.start(root); - root.allocateNew(); - IntVector iv = (IntVector) root.getVector("a"); - - for (int i = 0; i < 100; i += 2) { - iv.set(0, i); - iv.set(1, i + 1); - root.setRowCount(2); - writer.putNext(); - } - } - writer.completed(); - } - - /** Emulate DoPut. */ - private void doPut( - CallContext unusedContext, FlightStream reader, ServerStreamListener writer) { - int counter = 0; - while (reader.next()) { - if (!reader.hasRoot()) { - writer.error( - CallStatus.INVALID_ARGUMENT - .withDescription("Message has no data") - .toRuntimeException()); - return; - } - counter += reader.getRoot().getRowCount(); - - final ArrowBuf pong = allocator.buffer(4); - pong.writeInt(counter); - writer.putMetadata(pong); - } - writer.completed(); - } - - /** Exchange metadata without ever exchanging data. */ - private void metadataOnly( - CallContext unusedContext, FlightStream reader, ServerStreamListener writer) { - final ArrowBuf buf = allocator.buffer(4); - buf.writeInt(42); - writer.putMetadata(buf); - assertTrue(reader.next()); - assertNotNull(reader.getLatestMetadata()); - reader.getLatestMetadata().getReferenceManager().retain(); - writer.putMetadata(reader.getLatestMetadata()); - writer.completed(); - } - - /** Echo the client's response back to it. */ - private void echo(CallContext unusedContext, FlightStream reader, ServerStreamListener writer) { - VectorSchemaRoot root = null; - VectorLoader loader = null; - while (reader.next()) { - if (reader.hasRoot()) { - if (root == null) { - root = VectorSchemaRoot.create(reader.getSchema(), allocator); - loader = new VectorLoader(root); - writer.start(root); - } - VectorUnloader unloader = new VectorUnloader(reader.getRoot()); - try (final ArrowRecordBatch arb = unloader.getRecordBatch()) { - loader.load(arb); - } - if (reader.getLatestMetadata() != null) { - reader.getLatestMetadata().getReferenceManager().retain(); - writer.putNext(reader.getLatestMetadata()); - } else { - writer.putNext(); - } - } else { - // Pure metadata - reader.getLatestMetadata().getReferenceManager().retain(); - writer.putMetadata(reader.getLatestMetadata()); - } - } - if (root != null) { - root.close(); - } - writer.completed(); - } - - /** Accept a set of messages, then return some result. */ - private void transform( - CallContext unusedContext, FlightStream reader, ServerStreamListener writer) { - final Schema schema = reader.getSchema(); - for (final Field field : schema.getFields()) { - if (!(field.getType() instanceof ArrowType.Int)) { - writer.error( - CallStatus.INVALID_ARGUMENT - .withDescription("Invalid type: " + field) - .toRuntimeException()); - return; - } - final ArrowType.Int intType = (ArrowType.Int) field.getType(); - if (!intType.getIsSigned() || intType.getBitWidth() != 32) { - writer.error( - CallStatus.INVALID_ARGUMENT - .withDescription("Must be i32: " + field) - .toRuntimeException()); - return; - } - } - int batches = 0; - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - writer.start(root); - writer.setUseZeroCopy(true); - final VectorLoader loader = new VectorLoader(root); - final VectorUnloader unloader = new VectorUnloader(reader.getRoot()); - while (reader.next()) { - try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { - loader.load(batch); - } - batches++; - for (final FieldVector rawVec : root.getFieldVectors()) { - final IntVector vec = (IntVector) rawVec; - for (int i = 0; i < root.getRowCount(); i++) { - if (!vec.isNull(i)) { - vec.set(i, vec.get(i) * 2); - } - } - } - writer.putNext(); - } - } - final ArrowBuf count = allocator.buffer(4); - count.writeInt(batches); - writer.putMetadata(count); - writer.completed(); - } - - /** Immediately cancel the call. */ - private void cancel( - CallContext unusedContext, FlightStream unusedReader, ServerStreamListener writer) { - writer.error(CallStatus.CANCELLED.withDescription("expected").toRuntimeException()); - } - - private void error( - CallContext unusedContext, FlightStream reader, ServerStreamListener writer) { - VectorSchemaRoot root = null; - VectorLoader loader = null; - while (reader.next()) { - - if (root == null) { - root = VectorSchemaRoot.create(reader.getSchema(), allocator); - loader = new VectorLoader(root); - writer.start(root); - } - VectorUnloader unloader = new VectorUnloader(reader.getRoot()); - try (final ArrowRecordBatch arb = unloader.getRecordBatch()) { - loader.load(arb); - } - - writer.putNext(); - } - if (root != null) { - root.close(); - } - - // An error occurs before completing the writer - writer.error( - CallStatus.INTERNAL.withDescription("error completing exchange").toRuntimeException()); - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java deleted file mode 100644 index a9a3e355bc37f..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import com.google.protobuf.Any; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.rpc.Status; -import io.grpc.Metadata; -import io.grpc.StatusRuntimeException; -import io.grpc.protobuf.ProtoUtils; -import io.grpc.protobuf.StatusProto; -import java.nio.charset.StandardCharsets; -import org.apache.arrow.flight.perf.impl.PerfOuterClass; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -public class TestErrorMetadata { - private static final Metadata.BinaryMarshaller marshaller = - ProtoUtils.metadataMarshaller(Status.getDefaultInstance()); - - /** Ensure metadata attached to a gRPC error is propagated. */ - @Test - public void testGrpcMetadata() throws Exception { - PerfOuterClass.Perf perf = - PerfOuterClass.Perf.newBuilder() - .setStreamCount(12) - .setRecordsPerBatch(1000) - .setRecordsPerStream(1000000L) - .build(); - StatusRuntimeExceptionProducer producer = new StatusRuntimeExceptionProducer(perf); - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final FlightServer s = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) { - final CallStatus flightStatus = - FlightTestUtil.assertCode( - FlightStatusCode.CANCELLED, - () -> { - FlightStream stream = - client.getStream(new Ticket("abs".getBytes(StandardCharsets.UTF_8))); - stream.next(); - }); - PerfOuterClass.Perf newPerf = null; - ErrorFlightMetadata metadata = flightStatus.metadata(); - assertNotNull(metadata); - assertEquals(2, metadata.keys().size()); - assertTrue(metadata.containsKey("grpc-status-details-bin")); - Status status = marshaller.parseBytes(metadata.getByte("grpc-status-details-bin")); - for (Any details : status.getDetailsList()) { - if (details.is(PerfOuterClass.Perf.class)) { - try { - newPerf = details.unpack(PerfOuterClass.Perf.class); - } catch (InvalidProtocolBufferException e) { - fail(); - } - } - } - assertNotNull(newPerf); - assertEquals(perf, newPerf); - } - } - - /** Ensure metadata attached to a Flight error is propagated. */ - @Test - public void testFlightMetadata() throws Exception { - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final FlightServer s = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), new CallStatusProducer()) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) { - CallStatus flightStatus = - FlightTestUtil.assertCode( - FlightStatusCode.INVALID_ARGUMENT, - () -> { - FlightStream stream = client.getStream(new Ticket(new byte[0])); - stream.next(); - }); - ErrorFlightMetadata metadata = flightStatus.metadata(); - assertNotNull(metadata); - assertEquals("foo", metadata.get("x-foo")); - assertArrayEquals(new byte[] {1}, metadata.getByte("x-bar-bin")); - - flightStatus = - FlightTestUtil.assertCode( - FlightStatusCode.INVALID_ARGUMENT, - () -> { - client.getInfo(FlightDescriptor.command(new byte[0])); - }); - metadata = flightStatus.metadata(); - assertNotNull(metadata); - assertEquals("foo", metadata.get("x-foo")); - assertArrayEquals(new byte[] {1}, metadata.getByte("x-bar-bin")); - } - } - - private static class StatusRuntimeExceptionProducer extends NoOpFlightProducer { - private final PerfOuterClass.Perf perf; - - private StatusRuntimeExceptionProducer(PerfOuterClass.Perf perf) { - this.perf = perf; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - StatusRuntimeException sre = - StatusProto.toStatusRuntimeException( - Status.newBuilder() - .setCode(1) - .setMessage("Testing 1 2 3") - .addDetails(Any.pack(perf, "arrow/meta/types")) - .build()); - listener.error(sre); - } - } - - private static class CallStatusProducer extends NoOpFlightProducer { - ErrorFlightMetadata metadata; - - CallStatusProducer() { - this.metadata = new ErrorFlightMetadata(); - metadata.insert("x-foo", "foo"); - metadata.insert("x-bar-bin", new byte[] {1}); - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - listener.error( - CallStatus.INVALID_ARGUMENT - .withDescription("Failed") - .withMetadata(metadata) - .toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - throw CallStatus.INVALID_ARGUMENT - .withDescription("Failed") - .withMetadata(metadata) - .toRuntimeException(); - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightClient.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightClient.java deleted file mode 100644 index 86e7f2f19097c..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightClient.java +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flight.FlightClient.ClientStreamListener; -import org.apache.arrow.flight.TestBasicOperation.Producer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class TestFlightClient { - /** ARROW-5063: make sure two clients to the same location can be closed independently. */ - @Test - public void independentShutdown() throws Exception { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final FlightServer server = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), new Producer(allocator)) - .build() - .start()) { - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new ArrowType.Int(32, true)))); - try (final FlightClient client1 = - FlightClient.builder(allocator, server.getLocation()).build(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - // Use startPut as this ensures the RPC won't finish until we want it to - final ClientStreamListener listener = - client1.startPut(FlightDescriptor.path("test"), root, new AsyncPutListener()); - try (final FlightClient client2 = - FlightClient.builder(allocator, server.getLocation()).build()) { - client2.listActions().forEach(actionType -> assertNotNull(actionType.getType())); - } - listener.completed(); - listener.getResult(); - } - } - } - - /** - * ARROW-5978: make sure that we can properly close a client/stream after requesting dictionaries. - */ - @Disabled // Unfortunately this test is flaky in CI. - @Test - public void freeDictionaries() throws Exception { - final Schema expectedSchema = - new Schema( - Collections.singletonList( - new Field( - "encoded", - new FieldType( - true, new ArrowType.Int(32, true), new DictionaryEncoding(1L, false, null)), - null))); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final BufferAllocator serverAllocator = - allocator.newChildAllocator("flight-server", 0, Integer.MAX_VALUE); - final FlightServer server = - FlightServer.builder( - serverAllocator, - forGrpcInsecure(LOCALHOST, 0), - new DictionaryProducer(serverAllocator)) - .build() - .start()) { - try (final FlightClient client = - FlightClient.builder(allocator, server.getLocation()).build()) { - try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) { - assertTrue(stream.next()); - assertNotNull(stream.getDictionaryProvider().lookup(1)); - final VectorSchemaRoot root = stream.getRoot(); - assertEquals(expectedSchema, root.getSchema()); - assertEquals(6, root.getVector("encoded").getValueCount()); - try (final ValueVector decoded = - DictionaryEncoder.decode( - root.getVector("encoded"), stream.getDictionaryProvider().lookup(1))) { - assertFalse(decoded.isNull(1)); - assertTrue(decoded instanceof VarCharVector); - assertArrayEquals( - "one".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decoded).get(1)); - } - assertFalse(stream.next()); - } - // Closing stream fails if it doesn't free dictionaries; closing dictionaries fails - // (refcount goes negative) - // if reference isn't retained in ArrowMessage - } - } - } - - /** ARROW-5978: make sure that dictionary ownership can't be claimed twice. */ - @Disabled // Unfortunately this test is flaky in CI. - @Test - public void ownDictionaries() throws Exception { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final BufferAllocator serverAllocator = - allocator.newChildAllocator("flight-server", 0, Integer.MAX_VALUE); - final FlightServer server = - FlightServer.builder( - serverAllocator, - forGrpcInsecure(LOCALHOST, 0), - new DictionaryProducer(serverAllocator)) - .build() - .start()) { - try (final FlightClient client = - FlightClient.builder(allocator, server.getLocation()).build()) { - try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) { - assertTrue(stream.next()); - assertFalse(stream.next()); - final DictionaryProvider provider = stream.takeDictionaryOwnership(); - assertThrows(IllegalStateException.class, stream::takeDictionaryOwnership); - assertThrows(IllegalStateException.class, stream::getDictionaryProvider); - DictionaryUtils.closeDictionaries(stream.getSchema(), provider); - } - } - } - } - - /** ARROW-5978: make sure that dictionaries can be used after closing the stream. */ - @Disabled // Unfortunately this test is flaky in CI. - @Test - public void useDictionariesAfterClose() throws Exception { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final BufferAllocator serverAllocator = - allocator.newChildAllocator("flight-server", 0, Integer.MAX_VALUE); - final FlightServer server = - FlightServer.builder( - serverAllocator, - forGrpcInsecure(LOCALHOST, 0), - new DictionaryProducer(serverAllocator)) - .build() - .start()) { - try (final FlightClient client = - FlightClient.builder(allocator, server.getLocation()).build()) { - final VectorSchemaRoot root; - final DictionaryProvider provider; - try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) { - final VectorUnloader unloader = new VectorUnloader(stream.getRoot()); - root = VectorSchemaRoot.create(stream.getSchema(), allocator); - final VectorLoader loader = new VectorLoader(root); - while (stream.next()) { - try (final ArrowRecordBatch arb = unloader.getRecordBatch()) { - loader.load(arb); - } - } - provider = stream.takeDictionaryOwnership(); - } - try (final ValueVector decoded = - DictionaryEncoder.decode(root.getVector("encoded"), provider.lookup(1))) { - assertFalse(decoded.isNull(1)); - assertTrue(decoded instanceof VarCharVector); - assertArrayEquals( - "one".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decoded).get(1)); - } - root.close(); - DictionaryUtils.closeDictionaries(root.getSchema(), provider); - } - } - } - - static class DictionaryProducer extends NoOpFlightProducer { - - private final BufferAllocator allocator; - - public DictionaryProducer(BufferAllocator allocator) { - this.allocator = allocator; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - final byte[] zero = "zero".getBytes(StandardCharsets.UTF_8); - final byte[] one = "one".getBytes(StandardCharsets.UTF_8); - final byte[] two = "two".getBytes(StandardCharsets.UTF_8); - try (final VarCharVector dictionaryVector = newVarCharVector("dictionary", allocator)) { - final DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - - dictionaryVector.allocateNew(512, 3); - dictionaryVector.setSafe(0, zero, 0, zero.length); - dictionaryVector.setSafe(1, one, 0, one.length); - dictionaryVector.setSafe(2, two, 0, two.length); - dictionaryVector.setValueCount(3); - - final Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - provider.put(dictionary); - - final FieldVector encodedVector; - try (final VarCharVector unencoded = newVarCharVector("encoded", allocator)) { - unencoded.allocateNewSafe(); - unencoded.set(1, one); - unencoded.set(2, two); - unencoded.set(3, zero); - unencoded.set(4, two); - unencoded.setValueCount(6); - encodedVector = (FieldVector) DictionaryEncoder.encode(unencoded, dictionary); - } - - final List fields = Collections.singletonList(encodedVector.getField()); - final List vectors = Collections.singletonList(encodedVector); - - try (final VectorSchemaRoot root = - new VectorSchemaRoot(fields, vectors, encodedVector.getValueCount())) { - listener.start(root, provider); - listener.putNext(); - listener.completed(); - } - } - } - - private static VarCharVector newVarCharVector(String name, BufferAllocator allocator) { - return (VarCharVector) - FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector(name, allocator, null); - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java deleted file mode 100644 index b82e9ee26b15d..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import com.google.protobuf.Empty; -import io.grpc.BindableService; -import io.grpc.ConnectivityState; -import io.grpc.ManagedChannel; -import io.grpc.Server; -import io.grpc.inprocess.InProcessChannelBuilder; -import io.grpc.inprocess.InProcessServerBuilder; -import io.grpc.stub.StreamObserver; -import java.io.IOException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import org.apache.arrow.flight.auth.ServerAuthHandler; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Unit test which adds 2 services to same server end point. */ -public class TestFlightGrpcUtils { - private Server server; - private BufferAllocator allocator; - private String serverName; - - @BeforeEach - public void setup() throws IOException { - // Defines flight service - allocator = new RootAllocator(Integer.MAX_VALUE); - final NoOpFlightProducer producer = new NoOpFlightProducer(); - final ServerAuthHandler authHandler = ServerAuthHandler.NO_OP; - final ExecutorService exec = Executors.newCachedThreadPool(); - final BindableService flightBindingService = - FlightGrpcUtils.createFlightService(allocator, producer, authHandler, exec); - - // initializes server with 2 services - FlightBindingService & TestService - serverName = InProcessServerBuilder.generateName(); - server = - InProcessServerBuilder.forName(serverName) - .directExecutor() - .addService(flightBindingService) - .addService(new TestServiceAdapter()) - .build(); - server.start(); - } - - @AfterEach - public void cleanup() { - server.shutdownNow(); - } - - /** - * This test checks if multiple gRPC services can be added to the same server endpoint and if they - * can be used by different clients via the same channel. - * - * @throws IOException If server fails to start. - */ - @Test - public void testMultipleGrpcServices() throws IOException { - // Initializes channel so that multiple clients can communicate with server - final ManagedChannel managedChannel = - InProcessChannelBuilder.forName(serverName).directExecutor().build(); - - // Defines flight client and calls service method. Since we use a NoOpFlightProducer we expect - // the service - // to throw a RunTimeException - final FlightClient flightClient = FlightGrpcUtils.createFlightClient(allocator, managedChannel); - final Iterable actionTypes = flightClient.listActions(); - assertThrows( - FlightRuntimeException.class, - () -> actionTypes.forEach(actionType -> System.out.println(actionType.toString()))); - - // Define Test client as a blocking stub and call test method which correctly returns an empty - // protobuf object - final TestServiceGrpc.TestServiceBlockingStub blockingStub = - TestServiceGrpc.newBlockingStub(managedChannel); - assertEquals(Empty.newBuilder().build(), blockingStub.test(Empty.newBuilder().build())); - } - - @Test - public void testShutdown() throws IOException, InterruptedException { - // Initializes channel so that multiple clients can communicate with server - final ManagedChannel managedChannel = - InProcessChannelBuilder.forName(serverName).directExecutor().build(); - - // Defines flight client and calls service method. Since we use a NoOpFlightProducer we expect - // the service - // to throw a RunTimeException - final FlightClient flightClient = - FlightGrpcUtils.createFlightClientWithSharedChannel(allocator, managedChannel); - - // Should be a no-op. - flightClient.close(); - assertFalse(managedChannel.isShutdown()); - assertFalse(managedChannel.isTerminated()); - assertEquals(ConnectivityState.IDLE, managedChannel.getState(false)); - managedChannel.shutdownNow(); - } - - @Test - public void testProxyChannel() throws IOException, InterruptedException { - // Initializes channel so that multiple clients can communicate with server - final ManagedChannel managedChannel = - InProcessChannelBuilder.forName(serverName).directExecutor().build(); - - final FlightGrpcUtils.NonClosingProxyManagedChannel proxyChannel = - new FlightGrpcUtils.NonClosingProxyManagedChannel(managedChannel); - assertFalse(proxyChannel.isShutdown()); - assertFalse(proxyChannel.isTerminated()); - proxyChannel.shutdown(); - assertTrue(proxyChannel.isShutdown()); - assertTrue(proxyChannel.isTerminated()); - assertEquals(ConnectivityState.SHUTDOWN, proxyChannel.getState(false)); - try { - proxyChannel.newCall(null, null); - fail(); - } catch (IllegalStateException e) { - // This is expected, since the proxy channel is shut down. - } - - assertFalse(managedChannel.isShutdown()); - assertFalse(managedChannel.isTerminated()); - assertEquals(ConnectivityState.IDLE, managedChannel.getState(false)); - - managedChannel.shutdownNow(); - } - - @Test - public void testProxyChannelWithClosedChannel() throws IOException, InterruptedException { - // Initializes channel so that multiple clients can communicate with server - final ManagedChannel managedChannel = - InProcessChannelBuilder.forName(serverName).directExecutor().build(); - - final FlightGrpcUtils.NonClosingProxyManagedChannel proxyChannel = - new FlightGrpcUtils.NonClosingProxyManagedChannel(managedChannel); - assertFalse(proxyChannel.isShutdown()); - assertFalse(proxyChannel.isTerminated()); - managedChannel.shutdownNow(); - assertTrue(proxyChannel.isShutdown()); - assertTrue(proxyChannel.isTerminated()); - assertEquals(ConnectivityState.SHUTDOWN, proxyChannel.getState(false)); - try { - proxyChannel.newCall(null, null); - fail(); - } catch (IllegalStateException e) { - // This is expected, since the proxy channel is shut down. - } - - assertTrue(managedChannel.isShutdown()); - assertTrue(managedChannel.isTerminated()); - assertEquals(ConnectivityState.SHUTDOWN, managedChannel.getState(false)); - } - - /** Private class used for testing purposes that overrides service behavior. */ - private static class TestServiceAdapter extends TestServiceGrpc.TestServiceImplBase { - - /** - * gRPC service that receives an empty object & returns and empty protobuf object. - * - * @param request google.protobuf.Empty - * @param responseObserver google.protobuf.Empty - */ - @Override - public void test(Empty request, StreamObserver responseObserver) { - responseObserver.onNext(Empty.newBuilder().build()); - responseObserver.onCompleted(); - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java deleted file mode 100644 index fc3f83e4eafd3..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.fail; - -import io.grpc.stub.ServerCallStreamObserver; -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.Optional; -import java.util.Random; -import org.apache.arrow.flight.impl.Flight; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestFlightService { - - private BufferAllocator allocator; - - @BeforeEach - public void setup() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void cleanup() throws Exception { - AutoCloseables.close(allocator); - } - - @Test - public void testFlightServiceWithNoAuthHandlerOrInterceptors() { - // This test is for ARROW-10491. There was a bug where FlightService would try to access the - // RequestContext, - // but the RequestContext was getting set to null because no interceptors were active to - // initialize it - // when using FlightService directly rather than starting up a FlightServer. - - // Arrange - final FlightProducer producer = - new NoOpFlightProducer() { - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - listener.completed(); - } - }; - - // This response observer notifies that the test failed if onError() is called. - final ServerCallStreamObserver observer = - new ServerCallStreamObserver() { - @Override - public boolean isCancelled() { - return false; - } - - @Override - public void setOnCancelHandler(Runnable runnable) {} - - @Override - public void setCompression(String s) {} - - @Override - public boolean isReady() { - return false; - } - - @Override - public void setOnReadyHandler(Runnable runnable) {} - - @Override - public void disableAutoInboundFlowControl() {} - - @Override - public void request(int i) {} - - @Override - public void setMessageCompression(boolean b) {} - - @Override - public void onNext(ArrowMessage arrowMessage) {} - - @Override - public void onError(Throwable throwable) { - fail(throwable); - } - - @Override - public void onCompleted() {} - }; - final FlightService flightService = new FlightService(allocator, producer, null, null); - - // Act - flightService.doGetCustom(Flight.Ticket.newBuilder().build(), observer); - - // fail() would have been called if an error happened during doGetCustom(), so this test passed. - } - - @Test - public void supportsNullSchemas() throws Exception { - final FlightProducer producer = - new NoOpFlightProducer() { - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - return new FlightInfo( - null, - descriptor, - Collections.emptyList(), - 0, - 0, - false, - IpcOption.DEFAULT, - "foo".getBytes(StandardCharsets.UTF_8)); - } - }; - - try (final FlightServer s = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) { - FlightInfo flightInfo = client.getInfo(FlightDescriptor.path("test")); - assertEquals(Optional.empty(), flightInfo.getSchemaOptional()); - assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema()); - assertArrayEquals(flightInfo.getAppMetadata(), "foo".getBytes(StandardCharsets.UTF_8)); - - Exception e = - assertThrows( - FlightRuntimeException.class, () -> client.getSchema(FlightDescriptor.path("test"))); - assertEquals("No schema is present in FlightInfo", e.getMessage()); - } - } - - /** - * Test for GH-41584 where flight defaults for header size was not in sync b\w client and server. - */ - @Test - public void testHeaderSizeExchangeInService() throws Exception { - final FlightProducer producer = - new NoOpFlightProducer() { - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - String longHeader = - context.getMiddleware(FlightConstants.HEADER_KEY).headers().get("long-header"); - return new FlightInfo( - null, - descriptor, - Collections.emptyList(), - 0, - 0, - false, - IpcOption.DEFAULT, - longHeader.getBytes(StandardCharsets.UTF_8)); - } - }; - - String headerVal = generateRandom(1024 * 10); - FlightCallHeaders callHeaders = new FlightCallHeaders(); - callHeaders.insert("long-header", headerVal); - // sever with default header limit same as client - try (final FlightServer s = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) { - FlightInfo flightInfo = - client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders)); - assertEquals(Optional.empty(), flightInfo.getSchemaOptional()); - assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema()); - assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8)); - } - // server with 15kb header limit - try (final FlightServer s = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer) - .setMaxHeaderListSize(1024 * 15) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) { - FlightInfo flightInfo = - client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders)); - assertEquals(Optional.empty(), flightInfo.getSchemaOptional()); - assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema()); - assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8)); - - callHeaders.insert("another-header", headerVal + headerVal); - FlightRuntimeException e = - assertThrows( - FlightRuntimeException.class, - () -> - client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders))); - assertEquals("http2 exception", e.getMessage()); - } - } - - private static String generateRandom(int size) { - String aToZ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890"; - Random random = new Random(); - StringBuilder res = new StringBuilder(); - for (int i = 0; i < size; i++) { - int randIndex = random.nextInt(aToZ.length()); - res.append(aToZ.charAt(randIndex)); - } - return res.toString(); - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java deleted file mode 100644 index 9362e1f55258b..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static com.google.common.collect.ImmutableList.toImmutableList; -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.Arrays; -import java.util.List; -import java.util.stream.Stream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -public class TestLargeMessage { - /** Make sure a Flight client accepts large message payloads by default. */ - @Test - public void getLargeMessage() throws Exception { - try (final BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - final Producer producer = new Producer(a); - final FlightServer s = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), producer).build().start()) { - - try (FlightClient client = FlightClient.builder(a, s.getLocation()).build()) { - try (FlightStream stream = client.getStream(new Ticket(new byte[] {})); - VectorSchemaRoot root = stream.getRoot()) { - while (stream.next()) { - for (final Field field : root.getSchema().getFields()) { - int value = 0; - final IntVector iv = (IntVector) root.getVector(field.getName()); - for (int i = 0; i < root.getRowCount(); i++) { - assertEquals(value, iv.get(i)); - value++; - } - } - } - } - } - } - } - - /** Make sure a Flight server accepts large message payloads by default. */ - @Test - public void putLargeMessage() throws Exception { - try (final BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - final Producer producer = new Producer(a); - final FlightServer s = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), producer).build().start()) { - try (FlightClient client = FlightClient.builder(a, s.getLocation()).build(); - BufferAllocator testAllocator = a.newChildAllocator("testcase", 0, Long.MAX_VALUE); - VectorSchemaRoot root = generateData(testAllocator)) { - final FlightClient.ClientStreamListener listener = - client.startPut(FlightDescriptor.path("hello"), root, new AsyncPutListener()); - listener.putNext(); - listener.completed(); - listener.getResult(); - } - } - } - - private static VectorSchemaRoot generateData(BufferAllocator allocator) { - final int size = 128 * 1024; - final List fieldNames = - Arrays.asList("c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "c9", "c10"); - final Stream fields = - fieldNames.stream() - .map( - fieldName -> - new Field(fieldName, FieldType.nullable(new ArrowType.Int(32, true)), null)); - final Schema schema = new Schema(fields.collect(toImmutableList()), null); - - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - root.allocateNew(); - for (final String fieldName : fieldNames) { - final IntVector iv = (IntVector) root.getVector(fieldName); - iv.setValueCount(size); - for (int i = 0; i < size; i++) { - iv.set(i, i); - } - } - root.setRowCount(size); - return root; - } - - private static class Producer implements FlightProducer, AutoCloseable { - private final BufferAllocator allocator; - - Producer(BufferAllocator allocator) { - this.allocator = allocator; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - try (VectorSchemaRoot root = generateData(allocator)) { - listener.start(root); - listener.putNext(); - listener.completed(); - } - } - - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) {} - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - return null; - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - return () -> { - try (VectorSchemaRoot root = flightStream.getRoot()) { - while (flightStream.next()) {; - } - } - }; - } - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - listener.onCompleted(); - } - - @Override - public void listActions(CallContext context, StreamListener listener) {} - - @Override - public void close() throws Exception { - allocator.close(); - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLeak.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLeak.java deleted file mode 100644 index c9ca72e8454d3..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLeak.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; - -import java.util.Arrays; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -/** Tests for scenarios where Flight could leak memory. */ -public class TestLeak { - - private static final int ROWS = 2048; - - private static Schema getSchema() { - return new Schema( - Arrays.asList( - Field.nullable("0", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("1", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("2", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("3", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("4", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("5", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("6", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("7", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("8", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("9", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Field.nullable("10", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)))); - } - - /** - * Ensure that if the client cancels, the server does not leak memory. - * - *

    In gRPC, canceling the stream from the client sends an event to the server. Once processed, - * gRPC will start silently rejecting messages sent by the server. However, Flight depends on gRPC - * processing these messages in order to free the associated memory. - */ - @Test - public void testCancelingDoGetDoesNotLeak() throws Exception { - final CountDownLatch callFinished = new CountDownLatch(1); - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final FlightServer s = - FlightServer.builder( - allocator, - forGrpcInsecure(LOCALHOST, 0), - new LeakFlightProducer(allocator, callFinished)) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) { - - final FlightStream stream = client.getStream(new Ticket(new byte[0])); - stream.getRoot(); - stream.cancel("Cancel", null); - - // Wait for the call to finish. (Closing the allocator while a call is ongoing is a guaranteed - // leak.) - callFinished.await(60, TimeUnit.SECONDS); - - s.shutdown(); - s.awaitTermination(); - } - } - - @Test - public void testCancelingDoPutDoesNotBlock() throws Exception { - final CountDownLatch callFinished = new CountDownLatch(1); - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final FlightServer s = - FlightServer.builder( - allocator, - forGrpcInsecure(LOCALHOST, 0), - new LeakFlightProducer(allocator, callFinished)) - .build() - .start(); - final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) { - - try (final VectorSchemaRoot root = VectorSchemaRoot.create(getSchema(), allocator)) { - final FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]); - final SyncPutListener listener = new SyncPutListener(); - final FlightClient.ClientStreamListener stream = - client.startPut(descriptor, root, listener); - // Wait for the server to cancel - callFinished.await(60, TimeUnit.SECONDS); - - for (int col = 0; col < 11; col++) { - final Float8Vector vector = (Float8Vector) root.getVector(Integer.toString(col)); - vector.allocateNew(); - for (int row = 0; row < ROWS; row++) { - vector.setSafe(row, 10.); - } - } - root.setRowCount(ROWS); - // Unlike DoGet, this method fairly reliably will write the message to the stream, so even - // without the fix - // for ARROW-7343, this won't leak memory. - // However, it will block if FlightClient doesn't check for cancellation. - stream.putNext(); - stream.completed(); - } - - s.shutdown(); - s.awaitTermination(); - } - } - - /** A FlightProducer that always produces a fixed data stream with metadata on the side. */ - private static class LeakFlightProducer extends NoOpFlightProducer { - - private final BufferAllocator allocator; - private final CountDownLatch callFinished; - - public LeakFlightProducer(BufferAllocator allocator, CountDownLatch callFinished) { - this.allocator = allocator; - this.callFinished = callFinished; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - BufferAllocator childAllocator = allocator.newChildAllocator("foo", 0, Long.MAX_VALUE); - VectorSchemaRoot root = VectorSchemaRoot.create(TestLeak.getSchema(), childAllocator); - root.allocateNew(); - listener.start(root); - - // We can't poll listener#isCancelled since gRPC has two distinct "is cancelled" flags. - // TODO: should we continue leaking gRPC semantics? Can we even avoid this? - listener.setOnCancelHandler( - () -> { - try { - for (int col = 0; col < 11; col++) { - final Float8Vector vector = (Float8Vector) root.getVector(Integer.toString(col)); - vector.allocateNew(); - for (int row = 0; row < ROWS; row++) { - vector.setSafe(row, 10.); - } - } - root.setRowCount(ROWS); - // Once the call is "really cancelled" (setOnCancelListener has run/is running), this - // call is actually a - // no-op on the gRPC side and will leak the ArrowMessage unless Flight checks for - // this. - listener.putNext(); - listener.completed(); - } finally { - try { - root.close(); - childAllocator.close(); - } finally { - // Don't let the test hang if we throw above - callFinished.countDown(); - } - } - }); - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - return () -> { - flightStream.getRoot(); - ackStream.onError(CallStatus.CANCELLED.withDescription("CANCELLED").toRuntimeException()); - callFinished.countDown(); - ackStream.onCompleted(); - }; - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestMetadataVersion.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestMetadataVersion.java deleted file mode 100644 index 6af1d7154fbf7..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestMetadataVersion.java +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.Optional; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -/** Test clients/servers with different metadata versions. */ -public class TestMetadataVersion { - private static BufferAllocator allocator; - private static Schema schema; - private static IpcOption optionV4; - private static IpcOption optionV5; - private static Schema unionSchema; - - @BeforeAll - public static void setUpClass() { - allocator = new RootAllocator(Integer.MAX_VALUE); - schema = - new Schema(Collections.singletonList(Field.nullable("foo", new ArrowType.Int(32, true)))); - unionSchema = - new Schema( - Collections.singletonList( - Field.nullable("union", new ArrowType.Union(UnionMode.Dense, new int[] {0})))); - - // avoid writing legacy ipc format by default - optionV4 = new IpcOption(false, MetadataVersion.V4); - optionV5 = IpcOption.DEFAULT; - } - - @AfterAll - public static void tearDownClass() { - allocator.close(); - } - - @Test - public void testGetFlightInfoV4() throws Exception { - try (final FlightServer server = startServer(optionV4); - final FlightClient client = connect(server)) { - final FlightInfo result = client.getInfo(FlightDescriptor.command(new byte[0])); - assertEquals(Optional.of(schema), result.getSchemaOptional()); - } - } - - @Test - public void testGetSchemaV4() throws Exception { - try (final FlightServer server = startServer(optionV4); - final FlightClient client = connect(server)) { - final SchemaResult result = client.getSchema(FlightDescriptor.command(new byte[0])); - assertEquals(schema, result.getSchema()); - } - } - - @Test - public void testUnionCheck() throws Exception { - assertThrows(IllegalArgumentException.class, () -> new SchemaResult(unionSchema, optionV4)); - assertThrows( - IllegalArgumentException.class, - () -> - new FlightInfo( - unionSchema, - FlightDescriptor.command(new byte[0]), - Collections.emptyList(), - -1, - -1, - optionV4)); - try (final FlightServer server = startServer(optionV4); - final FlightClient client = connect(server); - final FlightStream stream = - client.getStream(new Ticket("union".getBytes(StandardCharsets.UTF_8)))) { - final FlightRuntimeException err = assertThrows(FlightRuntimeException.class, stream::next); - assertTrue( - err.getMessage().contains("Cannot write union with V4 metadata"), err.getMessage()); - } - - try (final FlightServer server = startServer(optionV4); - final FlightClient client = connect(server); - final VectorSchemaRoot root = VectorSchemaRoot.create(unionSchema, allocator)) { - final FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]); - final SyncPutListener reader = new SyncPutListener(); - final FlightClient.ClientStreamListener listener = client.startPut(descriptor, reader); - final IllegalArgumentException err = - assertThrows(IllegalArgumentException.class, () -> listener.start(root, null, optionV4)); - assertTrue( - err.getMessage().contains("Cannot write union with V4 metadata"), err.getMessage()); - } - } - - @Test - public void testPutV4() throws Exception { - try (final FlightServer server = startServer(optionV4); - final FlightClient client = connect(server); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - generateData(root); - final FlightDescriptor descriptor = FlightDescriptor.command(new byte[0]); - final SyncPutListener reader = new SyncPutListener(); - final FlightClient.ClientStreamListener listener = client.startPut(descriptor, reader); - listener.start(root, null, optionV4); - listener.putNext(); - listener.completed(); - listener.getResult(); - } - } - - @Test - public void testGetV4() throws Exception { - try (final FlightServer server = startServer(optionV4); - final FlightClient client = connect(server); - final FlightStream stream = client.getStream(new Ticket(new byte[0]))) { - assertTrue(stream.next()); - assertEquals(optionV4.metadataVersion, stream.metadataVersion); - validateRoot(stream.getRoot()); - assertFalse(stream.next()); - } - } - - @Test - public void testExchangeV4ToV5() throws Exception { - try (final FlightServer server = startServer(optionV5); - final FlightClient client = connect(server); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(new byte[0]))) { - stream.getWriter().start(root, null, optionV4); - generateData(root); - stream.getWriter().putNext(); - stream.getWriter().completed(); - assertTrue(stream.getReader().next()); - assertEquals(optionV5.metadataVersion, stream.getReader().metadataVersion); - validateRoot(stream.getReader().getRoot()); - assertFalse(stream.getReader().next()); - } - } - - @Test - public void testExchangeV5ToV4() throws Exception { - try (final FlightServer server = startServer(optionV4); - final FlightClient client = connect(server); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(new byte[0]))) { - stream.getWriter().start(root, null, optionV5); - generateData(root); - stream.getWriter().putNext(); - stream.getWriter().completed(); - assertTrue(stream.getReader().next()); - assertEquals(optionV4.metadataVersion, stream.getReader().metadataVersion); - validateRoot(stream.getReader().getRoot()); - assertFalse(stream.getReader().next()); - } - } - - @Test - public void testExchangeV4ToV4() throws Exception { - try (final FlightServer server = startServer(optionV4); - final FlightClient client = connect(server); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(new byte[0]))) { - stream.getWriter().start(root, null, optionV4); - generateData(root); - stream.getWriter().putNext(); - stream.getWriter().completed(); - assertTrue(stream.getReader().next()); - assertEquals(optionV4.metadataVersion, stream.getReader().metadataVersion); - validateRoot(stream.getReader().getRoot()); - assertFalse(stream.getReader().next()); - } - } - - private static void generateData(VectorSchemaRoot root) { - assertEquals(schema, root.getSchema()); - final IntVector vector = (IntVector) root.getVector("foo"); - vector.setSafe(0, 0); - vector.setSafe(1, 1); - vector.setSafe(2, 4); - root.setRowCount(3); - } - - private static void validateRoot(VectorSchemaRoot root) { - assertEquals(schema, root.getSchema()); - assertEquals(3, root.getRowCount()); - final IntVector vector = (IntVector) root.getVector("foo"); - assertEquals(0, vector.get(0)); - assertEquals(1, vector.get(1)); - assertEquals(4, vector.get(2)); - } - - FlightServer startServer(IpcOption option) throws Exception { - Location location = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, 0); - VersionFlightProducer producer = new VersionFlightProducer(allocator, option); - final FlightServer server = FlightServer.builder(allocator, location, producer).build(); - server.start(); - return server; - } - - FlightClient connect(FlightServer server) { - Location location = Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, server.getPort()); - return FlightClient.builder(allocator, location).build(); - } - - static final class VersionFlightProducer extends NoOpFlightProducer { - private final BufferAllocator allocator; - private final IpcOption option; - - VersionFlightProducer(BufferAllocator allocator, IpcOption option) { - this.allocator = allocator; - this.option = option; - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - return new FlightInfo(schema, descriptor, Collections.emptyList(), -1, -1, option); - } - - @Override - public SchemaResult getSchema(CallContext context, FlightDescriptor descriptor) { - return new SchemaResult(schema, option); - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - if (Arrays.equals("union".getBytes(StandardCharsets.UTF_8), ticket.getBytes())) { - try (final VectorSchemaRoot root = VectorSchemaRoot.create(unionSchema, allocator)) { - listener.start(root, null, option); - } catch (IllegalArgumentException e) { - listener.error( - CallStatus.INTERNAL - .withCause(e) - .withDescription(e.getMessage()) - .toRuntimeException()); - return; - } - listener.error( - CallStatus.INTERNAL - .withDescription("Expected exception not raised") - .toRuntimeException()); - return; - } - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - listener.start(root, null, option); - generateData(root); - listener.putNext(); - listener.completed(); - } - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - return () -> { - try { - assertTrue(flightStream.next()); - assertEquals(option.metadataVersion, flightStream.metadataVersion); - validateRoot(flightStream.getRoot()); - } catch (AssertionError err) { - // gRPC doesn't propagate stack traces across the wire. - err.printStackTrace(); - ackStream.onError( - CallStatus.INVALID_ARGUMENT - .withCause(err) - .withDescription("Server assertion failed: " + err) - .toRuntimeException()); - return; - } catch (RuntimeException err) { - err.printStackTrace(); - ackStream.onError( - CallStatus.INTERNAL - .withCause(err) - .withDescription("Server assertion failed: " + err) - .toRuntimeException()); - return; - } - ackStream.onCompleted(); - }; - } - - @Override - public void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) { - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - try { - assertTrue(reader.next()); - validateRoot(reader.getRoot()); - assertFalse(reader.next()); - } catch (AssertionError err) { - // gRPC doesn't propagate stack traces across the wire. - err.printStackTrace(); - writer.error( - CallStatus.INVALID_ARGUMENT - .withCause(err) - .withDescription("Server assertion failed: " + err) - .toRuntimeException()); - return; - } catch (RuntimeException err) { - err.printStackTrace(); - writer.error( - CallStatus.INTERNAL - .withCause(err) - .withDescription("Server assertion failed: " + err) - .toRuntimeException()); - return; - } - - writer.start(root, null, option); - generateData(root); - writer.putNext(); - writer.completed(); - } - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java deleted file mode 100644 index c60843d2a279c..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.fail; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.function.BiConsumer; -import org.apache.arrow.flight.FlightClient.ClientStreamListener; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -public class TestServerMiddleware { - - /** Make sure errors in DoPut are intercepted. */ - @Test - public void doPutErrors() { - test( - new ErrorProducer(new RuntimeException("test")), - (allocator, client) -> { - final FlightDescriptor descriptor = FlightDescriptor.path("test"); - try (final VectorSchemaRoot root = - VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) { - final ClientStreamListener listener = - client.startPut(descriptor, root, new SyncPutListener()); - listener.completed(); - FlightTestUtil.assertCode(FlightStatusCode.INTERNAL, listener::getResult); - } - }, - (recorder) -> { - final CallStatus status = recorder.statusFuture.get(); - assertNotNull(status); - assertNotNull(status.cause()); - assertEquals(FlightStatusCode.INTERNAL, status.code()); - }); - // Check the status after server shutdown (to make sure gRPC finishes pending calls on the - // server side) - } - - /** Make sure custom error codes in DoPut are intercepted. */ - @Test - public void doPutCustomCode() { - test( - new ErrorProducer( - CallStatus.UNAVAILABLE.withDescription("description").toRuntimeException()), - (allocator, client) -> { - final FlightDescriptor descriptor = FlightDescriptor.path("test"); - try (final VectorSchemaRoot root = - VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) { - final ClientStreamListener listener = - client.startPut(descriptor, root, new SyncPutListener()); - listener.completed(); - FlightTestUtil.assertCode(FlightStatusCode.UNAVAILABLE, listener::getResult); - } - }, - (recorder) -> { - final CallStatus status = recorder.statusFuture.get(); - assertNotNull(status); - assertNull(status.cause()); - assertEquals(FlightStatusCode.UNAVAILABLE, status.code()); - assertEquals("description", status.description()); - }); - } - - /** Make sure uncaught exceptions in DoPut are intercepted. */ - @Test - public void doPutUncaught() { - test( - new ServerErrorProducer(new RuntimeException("test")), - (allocator, client) -> { - final FlightDescriptor descriptor = FlightDescriptor.path("test"); - try (final VectorSchemaRoot root = - VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) { - final ClientStreamListener listener = - client.startPut(descriptor, root, new SyncPutListener()); - listener.completed(); - listener.getResult(); - } - }, - (recorder) -> { - final CallStatus status = recorder.statusFuture.get(); - final Throwable err = recorder.errFuture.get(); - assertNotNull(status); - assertEquals(FlightStatusCode.OK, status.code()); - assertNull(status.cause()); - assertNotNull(err); - assertEquals("test", err.getMessage()); - }); - } - - @Test - public void listFlightsUncaught() { - test( - new ServerErrorProducer(new RuntimeException("test")), - (allocator, client) -> - client.listFlights(new Criteria(new byte[0])).forEach((action) -> {}), - (recorder) -> { - final CallStatus status = recorder.statusFuture.get(); - final Throwable err = recorder.errFuture.get(); - assertNotNull(status); - assertEquals(FlightStatusCode.OK, status.code()); - assertNull(status.cause()); - assertNotNull(err); - assertEquals("test", err.getMessage()); - }); - } - - @Test - public void doActionUncaught() { - test( - new ServerErrorProducer(new RuntimeException("test")), - (allocator, client) -> client.doAction(new Action("test")).forEachRemaining(result -> {}), - (recorder) -> { - final CallStatus status = recorder.statusFuture.get(); - final Throwable err = recorder.errFuture.get(); - assertNotNull(status); - assertEquals(FlightStatusCode.OK, status.code()); - assertNull(status.cause()); - assertNotNull(err); - assertEquals("test", err.getMessage()); - }); - } - - @Test - public void listActionsUncaught() { - test( - new ServerErrorProducer(new RuntimeException("test")), - (allocator, client) -> client.listActions().forEach(result -> {}), - (recorder) -> { - final CallStatus status = recorder.statusFuture.get(); - final Throwable err = recorder.errFuture.get(); - assertNotNull(status); - assertEquals(FlightStatusCode.OK, status.code()); - assertNull(status.cause()); - assertNotNull(err); - assertEquals("test", err.getMessage()); - }); - } - - @Test - public void getFlightInfoUncaught() { - test( - new ServerErrorProducer(new RuntimeException("test")), - (allocator, client) -> { - FlightTestUtil.assertCode( - FlightStatusCode.INTERNAL, () -> client.getInfo(FlightDescriptor.path("test"))); - }, - (recorder) -> { - final CallStatus status = recorder.statusFuture.get(); - assertNotNull(status); - assertEquals(FlightStatusCode.INTERNAL, status.code()); - assertNotNull(status.cause()); - assertEquals(new RuntimeException("test").getMessage(), status.cause().getMessage()); - }); - } - - @Test - public void doGetUncaught() { - test( - new ServerErrorProducer(new RuntimeException("test")), - (allocator, client) -> { - try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) { - while (stream.next()) {} - } catch (Exception e) { - fail(e.toString()); - } - }, - (recorder) -> { - final CallStatus status = recorder.statusFuture.get(); - final Throwable err = recorder.errFuture.get(); - assertNotNull(status); - assertEquals(FlightStatusCode.OK, status.code()); - assertNull(status.cause()); - assertNotNull(err); - assertEquals("test", err.getMessage()); - }); - } - - /** A middleware that records the last error on any call. */ - static class ErrorRecorder implements FlightServerMiddleware { - - CompletableFuture statusFuture = new CompletableFuture<>(); - CompletableFuture errFuture = new CompletableFuture<>(); - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {} - - @Override - public void onCallCompleted(CallStatus status) { - statusFuture.complete(status); - } - - @Override - public void onCallErrored(Throwable err) { - errFuture.complete(err); - } - - static class Factory implements FlightServerMiddleware.Factory { - - ErrorRecorder instance = new ErrorRecorder(); - - @Override - public ErrorRecorder onCallStarted( - CallInfo info, CallHeaders incomingHeaders, RequestContext context) { - return instance; - } - } - } - - /** A producer that throws the given exception on a call. */ - static class ErrorProducer extends NoOpFlightProducer { - - final RuntimeException error; - - ErrorProducer(RuntimeException t) { - error = t; - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - return () -> { - // Drain queue to avoid FlightStream#close cancelling the call - while (flightStream.next()) {} - throw error; - }; - } - } - - /** - * A producer that throws the given exception on a call, but only after sending a success to the - * client. - */ - static class ServerErrorProducer extends NoOpFlightProducer { - - final RuntimeException error; - - ServerErrorProducer(RuntimeException t) { - error = t; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final VectorSchemaRoot root = - VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) { - listener.start(root); - listener.completed(); - } - throw error; - } - - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) { - listener.onCompleted(); - throw error; - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - throw error; - } - - @Override - public Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - return () -> { - while (flightStream.next()) {} - ackStream.onCompleted(); - throw error; - }; - } - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - listener.onCompleted(); - throw error; - } - - @Override - public void listActions(CallContext context, StreamListener listener) { - listener.onCompleted(); - throw error; - } - } - - static class ServerMiddlewarePair { - - final FlightServerMiddleware.Key key; - final FlightServerMiddleware.Factory factory; - - ServerMiddlewarePair( - FlightServerMiddleware.Key key, FlightServerMiddleware.Factory factory) { - this.key = key; - this.factory = factory; - } - } - - /** - * Spin up a service with the given middleware and producer. - * - * @param producer The Flight producer to use. - * @param middleware A list of middleware to register. - * @param body A function to run as the body of the test. - * @param The middleware type. - */ - static void test( - FlightProducer producer, - List> middleware, - BiConsumer body) { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { - final FlightServer.Builder builder = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer); - middleware.forEach(pair -> builder.middleware(pair.key, pair.factory)); - final FlightServer server = builder.build().start(); - try (final FlightServer ignored = server; - final FlightClient client = - FlightClient.builder(allocator, server.getLocation()).build()) { - body.accept(allocator, client); - } - } catch (InterruptedException | IOException e) { - throw new RuntimeException(e); - } - } - - static void test( - FlightProducer producer, - BiConsumer body, - ErrorConsumer verify) { - final ErrorRecorder.Factory factory = new ErrorRecorder.Factory(); - final List> middleware = - Collections.singletonList( - new ServerMiddlewarePair<>(FlightServerMiddleware.Key.of("m"), factory)); - test( - producer, - middleware, - (allocator, client) -> { - body.accept(allocator, client); - try { - verify.accept(factory.instance); - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - - @FunctionalInterface - interface ErrorConsumer { - void accept(T obj) throws Exception; - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerOptions.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerOptions.java deleted file mode 100644 index c39ac922cfaad..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerOptions.java +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import io.grpc.Channel; -import io.grpc.MethodDescriptor; -import io.grpc.ServerServiceDefinition; -import io.grpc.health.v1.HealthCheckRequest; -import io.grpc.health.v1.HealthCheckResponse; -import io.grpc.health.v1.HealthGrpc; -import io.grpc.netty.NettyChannelBuilder; -import io.grpc.netty.NettyServerBuilder; -import io.grpc.protobuf.services.HealthStatusManager; -import java.io.File; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Consumer; -import org.apache.arrow.flight.TestBasicOperation.Producer; -import org.apache.arrow.flight.auth.ServerAuthHandler; -import org.apache.arrow.flight.impl.FlightServiceGrpc; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.junit.jupiter.api.Assumptions; -import org.junit.jupiter.api.Test; - -public class TestServerOptions { - - @Test - public void builderConsumer() throws Exception { - final AtomicBoolean consumerCalled = new AtomicBoolean(); - final Consumer consumer = (builder) -> consumerCalled.set(true); - - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - Producer producer = new Producer(a); - FlightServer s = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), producer) - .transportHint("grpc.builderConsumer", consumer) - .build() - .start()) { - assertTrue(consumerCalled.get()); - } - } - - /** - * Make sure that if Flight supplies a default executor to gRPC, then it is closed along with the - * server. - */ - @Test - public void defaultExecutorClosed() throws Exception { - final ExecutorService executor; - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - FlightServer server = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), new NoOpFlightProducer()) - .build() - .start()) { - assertNotNull(server.grpcExecutor); - executor = server.grpcExecutor; - } - assertTrue(executor.isShutdown()); - } - - /** Make sure that if the user provides an executor to gRPC, then Flight does not close it. */ - @Test - public void suppliedExecutorNotClosed() throws Exception { - final ExecutorService executor = Executors.newSingleThreadExecutor(); - try { - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - FlightServer server = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), new NoOpFlightProducer()) - .executor(executor) - .build() - .start()) { - assertNull(server.grpcExecutor); - } - assertFalse(executor.isShutdown()); - } finally { - executor.shutdown(); - } - } - - @Test - public void domainSocket() throws Exception { - Assumptions.assumeTrue( - FlightTestUtil.isNativeTransportAvailable(), "We have a native transport available"); - final File domainSocket = File.createTempFile("flight-unit-test-", ".sock"); - assertTrue(domainSocket.delete()); - // Domain socket paths have a platform-dependent limit. Set a conservative limit and skip the - // test if the temporary - // file name is too long. (We do not assume a particular platform-dependent temporary directory - // path.) - Assumptions.assumeTrue( - domainSocket.getAbsolutePath().length() < 100, "The domain socket path is not too long"); - final Location location = Location.forGrpcDomainSocket(domainSocket.getAbsolutePath()); - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - Producer producer = new Producer(a); - FlightServer s = FlightServer.builder(a, location, producer).build().start(); ) { - try (FlightClient c = FlightClient.builder(a, location).build()) { - try (FlightStream stream = c.getStream(new Ticket(new byte[0]))) { - VectorSchemaRoot root = stream.getRoot(); - IntVector iv = (IntVector) root.getVector("c1"); - int value = 0; - while (stream.next()) { - for (int i = 0; i < root.getRowCount(); i++) { - assertEquals(value, iv.get(i)); - value++; - } - } - } - } - } - } - - @Test - public void checkReflectionMetadata() { - // This metadata is needed for gRPC reflection to work. - final ExecutorService executorService = Executors.newSingleThreadExecutor(); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { - final FlightBindingService service = - new FlightBindingService( - allocator, new NoOpFlightProducer(), ServerAuthHandler.NO_OP, executorService); - final ServerServiceDefinition definition = service.bindService(); - assertEquals( - FlightServiceGrpc.getServiceDescriptor().getSchemaDescriptor(), - definition.getServiceDescriptor().getSchemaDescriptor()); - - final Map> definedMethods = new HashMap<>(); - final Map> serviceMethods = new HashMap<>(); - - // Make sure that the reflection metadata object is identical across all the places where it's - // accessible - definition - .getMethods() - .forEach( - method -> - definedMethods.put( - method.getMethodDescriptor().getFullMethodName(), - method.getMethodDescriptor())); - definition - .getServiceDescriptor() - .getMethods() - .forEach(method -> serviceMethods.put(method.getFullMethodName(), method)); - - for (final MethodDescriptor descriptor : - FlightServiceGrpc.getServiceDescriptor().getMethods()) { - final String methodName = descriptor.getFullMethodName(); - assertTrue( - definedMethods.containsKey(methodName), - "Method is missing from ServerServiceDefinition: " + methodName); - assertTrue( - definedMethods.containsKey(methodName), - "Method is missing from ServiceDescriptor: " + methodName); - - assertEquals( - descriptor.getSchemaDescriptor(), definedMethods.get(methodName).getSchemaDescriptor()); - assertEquals( - descriptor.getSchemaDescriptor(), serviceMethods.get(methodName).getSchemaDescriptor()); - } - } finally { - executorService.shutdown(); - } - } - - /* - * This is an extension of builderConsumer test. - * Test that Flight interceptors don't break other registered services - */ - @Test - public void addHealthCheckService() throws Exception { - final HealthStatusManager statusManager = new HealthStatusManager(); - final Consumer consumer = - (builder) -> { - builder.addService(statusManager.getHealthService()); - }; - final Location location = forGrpcInsecure(LOCALHOST, 5555); - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - Producer producer = new Producer(a); - FlightServer s = - FlightServer.builder(a, location, producer) - .transportHint("grpc.builderConsumer", consumer) - .build() - .start(); ) { - Channel channel = - NettyChannelBuilder.forAddress(location.toSocketAddress()).usePlaintext().build(); - HealthCheckResponse response = - HealthGrpc.newBlockingStub(channel).check(HealthCheckRequest.getDefaultInstance()); - - assertEquals(response.getStatus(), HealthCheckResponse.ServingStatus.SERVING); - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java deleted file mode 100644 index 60dd131a182ed..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; - -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.util.Iterator; -import java.util.function.Consumer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -/** Tests for TLS in Flight. */ -public class TestTls { - - /** Test a basic request over TLS. */ - @Test - public void connectTls() { - test( - (builder) -> { - try (final InputStream roots = - new FileInputStream(FlightTestUtil.exampleTlsRootCert().toFile()); - final FlightClient client = builder.trustedCertificates(roots).build()) { - final Iterator responses = client.doAction(new Action("hello-world")); - final byte[] response = responses.next().getBody(); - assertEquals("Hello, world!", new String(response, StandardCharsets.UTF_8)); - assertFalse(responses.hasNext()); - } catch (InterruptedException | IOException e) { - throw new RuntimeException(e); - } - }); - } - - /** Make sure that connections are rejected when the root certificate isn't trusted. */ - @Test - public void rejectInvalidCert() { - test( - (builder) -> { - try (final FlightClient client = builder.build()) { - final Iterator responses = client.doAction(new Action("hello-world")); - FlightTestUtil.assertCode( - FlightStatusCode.UNAVAILABLE, () -> responses.next().getBody()); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - }); - } - - /** Make sure that connections are rejected when the hostname doesn't match. */ - @Test - public void rejectHostname() { - test( - (builder) -> { - try (final InputStream roots = - new FileInputStream(FlightTestUtil.exampleTlsRootCert().toFile()); - final FlightClient client = - builder.trustedCertificates(roots).overrideHostname("fakehostname").build()) { - final Iterator responses = client.doAction(new Action("hello-world")); - FlightTestUtil.assertCode( - FlightStatusCode.UNAVAILABLE, () -> responses.next().getBody()); - } catch (InterruptedException | IOException e) { - throw new RuntimeException(e); - } - }); - } - - /** Test a basic request over TLS. */ - @Test - public void connectTlsDisableServerVerification() { - test( - (builder) -> { - try (final FlightClient client = builder.verifyServer(false).build()) { - final Iterator responses = client.doAction(new Action("hello-world")); - final byte[] response = responses.next().getBody(); - assertEquals("Hello, world!", new String(response, StandardCharsets.UTF_8)); - assertFalse(responses.hasNext()); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - }); - } - - void test(Consumer testFn) { - final FlightTestUtil.CertKeyPair certKey = FlightTestUtil.exampleTlsCerts().get(0); - try (BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - Producer producer = new Producer(); - FlightServer s = - FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), producer) - .useTls(certKey.cert, certKey.key) - .build() - .start()) { - final FlightClient.Builder builder = - FlightClient.builder(a, Location.forGrpcTls(FlightTestUtil.LOCALHOST, s.getPort())); - testFn.accept(builder); - } catch (InterruptedException | IOException e) { - throw new RuntimeException(e); - } - } - - static class Producer extends NoOpFlightProducer implements AutoCloseable { - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - if (action.getType().equals("hello-world")) { - listener.onNext(new Result("Hello, world!".getBytes(StandardCharsets.UTF_8))); - listener.onCompleted(); - return; - } - listener.onError( - CallStatus.UNIMPLEMENTED - .withDescription("Invalid action " + action.getType()) - .toRuntimeException()); - } - - @Override - public void close() {} - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java deleted file mode 100644 index 0c63785c88fd1..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.fail; - -import com.google.common.collect.ImmutableList; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Optional; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStatusCode; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.FlightTestUtil; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestBasicAuth { - - private static final String USERNAME = "flight"; - private static final String PASSWORD = "woohoo"; - private static final byte[] VALID_TOKEN = "my_token".getBytes(StandardCharsets.UTF_8); - - private FlightClient client; - private static FlightServer server; - private static BufferAllocator allocator; - - @Test - public void validAuth() { - client.authenticateBasic(USERNAME, PASSWORD); - assertEquals(0, ImmutableList.copyOf(client.listFlights(Criteria.ALL)).size()); - } - - @Test - public void asyncCall() throws Exception { - client.authenticateBasic(USERNAME, PASSWORD); - client.listFlights(Criteria.ALL); - try (final FlightStream s = client.getStream(new Ticket(new byte[1]))) { - while (s.next()) { - assertEquals(4095, s.getRoot().getRowCount()); - } - } - } - - @Test - public void invalidAuth() { - FlightTestUtil.assertCode( - FlightStatusCode.UNAUTHENTICATED, - () -> { - client.authenticateBasic(USERNAME, "WRONG"); - }); - - FlightTestUtil.assertCode( - FlightStatusCode.UNAUTHENTICATED, - () -> { - client.listFlights(Criteria.ALL).forEach(action -> fail()); - }); - } - - @Test - public void didntAuth() { - FlightTestUtil.assertCode( - FlightStatusCode.UNAUTHENTICATED, - () -> { - client.listFlights(Criteria.ALL).forEach(action -> fail()); - }); - } - - @BeforeEach - public void testSetup() throws IOException { - client = FlightClient.builder(allocator, server.getLocation()).build(); - } - - @BeforeAll - public static void setup() throws IOException { - allocator = new RootAllocator(Long.MAX_VALUE); - final BasicServerAuthHandler.BasicAuthValidator validator = - new BasicServerAuthHandler.BasicAuthValidator() { - - @Override - public Optional isValid(byte[] token) { - if (Arrays.equals(token, VALID_TOKEN)) { - return Optional.of(USERNAME); - } - return Optional.empty(); - } - - @Override - public byte[] getToken(String username, String password) { - if (USERNAME.equals(username) && PASSWORD.equals(password)) { - return VALID_TOKEN; - } else { - throw new IllegalArgumentException("invalid credentials"); - } - } - }; - - server = - FlightServer.builder( - allocator, - forGrpcInsecure(LOCALHOST, 0), - new NoOpFlightProducer() { - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) { - if (!context.peerIdentity().equals(USERNAME)) { - listener.onError(new IllegalArgumentException("Invalid username")); - return; - } - listener.onCompleted(); - } - - @Override - public void getStream( - CallContext context, Ticket ticket, ServerStreamListener listener) { - if (!context.peerIdentity().equals(USERNAME)) { - listener.error(new IllegalArgumentException("Invalid username")); - return; - } - final Schema pojoSchema = - new Schema( - ImmutableList.of( - Field.nullable("a", Types.MinorType.BIGINT.getType()))); - try (VectorSchemaRoot root = VectorSchemaRoot.create(pojoSchema, allocator)) { - listener.start(root); - root.allocateNew(); - root.setRowCount(4095); - listener.putNext(); - listener.completed(); - } - } - }) - .authHandler(new BasicServerAuthHandler(validator)) - .build() - .start(); - } - - @AfterEach - public void tearDown() throws Exception { - AutoCloseables.close(client); - } - - @AfterAll - public static void shutdown() throws Exception { - AutoCloseables.close(server); - - allocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(allocator); - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java deleted file mode 100644 index 2d1eb9331ee63..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/auth2/TestBasicAuth2.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.auth2; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import com.google.common.base.Strings; -import com.google.common.collect.ImmutableList; -import java.io.IOException; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStatusCode; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.FlightTestUtil; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.grpc.CredentialCallOption; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -public class TestBasicAuth2 { - - private static final String USERNAME_1 = "flight1"; - private static final String USERNAME_2 = "flight2"; - private static final String NO_USERNAME = ""; - private static final String PASSWORD_1 = "woohoo1"; - private static final String PASSWORD_2 = "woohoo2"; - private static BufferAllocator allocator; - private static FlightServer server; - private static FlightClient client; - private static FlightClient client2; - - @BeforeAll - public static void setup() throws Exception { - allocator = new RootAllocator(Long.MAX_VALUE); - startServerAndClient(); - } - - private static FlightProducer getFlightProducer() { - return new NoOpFlightProducer() { - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) { - if (!context.peerIdentity().equals(USERNAME_1) - && !context.peerIdentity().equals(USERNAME_2)) { - listener.onError(new IllegalArgumentException("Invalid username")); - return; - } - listener.onCompleted(); - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - if (!context.peerIdentity().equals(USERNAME_1) - && !context.peerIdentity().equals(USERNAME_2)) { - listener.error(new IllegalArgumentException("Invalid username")); - return; - } - final Schema pojoSchema = - new Schema(ImmutableList.of(Field.nullable("a", Types.MinorType.BIGINT.getType()))); - try (VectorSchemaRoot root = VectorSchemaRoot.create(pojoSchema, allocator)) { - listener.start(root); - root.allocateNew(); - root.setRowCount(4095); - listener.putNext(); - listener.completed(); - } - } - }; - } - - private static void startServerAndClient() throws IOException { - final FlightProducer flightProducer = getFlightProducer(); - server = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), flightProducer) - .headerAuthenticator( - new GeneratedBearerTokenAuthenticator( - new BasicCallHeaderAuthenticator(TestBasicAuth2::validate))) - .build() - .start(); - client = FlightClient.builder(allocator, server.getLocation()).build(); - } - - @AfterAll - public static void shutdown() throws Exception { - AutoCloseables.close(client, client2, server); - client = null; - client2 = null; - server = null; - - allocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(allocator); - allocator = null; - } - - private void startClient2() throws IOException { - client2 = FlightClient.builder(allocator, server.getLocation()).build(); - } - - private static CallHeaderAuthenticator.AuthResult validate(String username, String password) { - if (Strings.isNullOrEmpty(username)) { - throw CallStatus.UNAUTHENTICATED - .withDescription("Credentials not supplied.") - .toRuntimeException(); - } - final String identity; - if (USERNAME_1.equals(username) && PASSWORD_1.equals(password)) { - identity = USERNAME_1; - } else if (USERNAME_2.equals(username) && PASSWORD_2.equals(password)) { - identity = USERNAME_2; - } else { - throw CallStatus.UNAUTHENTICATED - .withDescription("Username or password is invalid.") - .toRuntimeException(); - } - return () -> identity; - } - - @Test - public void validAuthWithBearerAuthServer() throws IOException { - testValidAuth(client); - } - - @Test - public void validAuthWithMultipleClientsWithSameCredentialsWithBearerAuthServer() - throws IOException { - startClient2(); - testValidAuthWithMultipleClientsWithSameCredentials(client, client2); - } - - @Test - public void validAuthWithMultipleClientsWithDifferentCredentialsWithBearerAuthServer() - throws IOException { - startClient2(); - testValidAuthWithMultipleClientsWithDifferentCredentials(client, client2); - } - - @Test - public void asyncCall() throws Exception { - final CredentialCallOption bearerToken = - client.authenticateBasicToken(USERNAME_1, PASSWORD_1).get(); - client.listFlights(Criteria.ALL, bearerToken); - try (final FlightStream s = client.getStream(new Ticket(new byte[1]), bearerToken)) { - while (s.next()) { - assertEquals(4095, s.getRoot().getRowCount()); - } - } - } - - @Test - public void invalidAuthWithBearerAuthServer() throws IOException { - testInvalidAuth(client); - } - - @Test - public void didntAuthWithBearerAuthServer() throws IOException { - didntAuth(client); - } - - private void testValidAuth(FlightClient client) { - final CredentialCallOption bearerToken = - client.authenticateBasicToken(USERNAME_1, PASSWORD_1).get(); - assertTrue(ImmutableList.copyOf(client.listFlights(Criteria.ALL, bearerToken)).isEmpty()); - } - - private void testValidAuthWithMultipleClientsWithSameCredentials( - FlightClient client1, FlightClient client2) { - final CredentialCallOption bearerToken1 = - client1.authenticateBasicToken(USERNAME_1, PASSWORD_1).get(); - final CredentialCallOption bearerToken2 = - client2.authenticateBasicToken(USERNAME_1, PASSWORD_1).get(); - assertTrue(ImmutableList.copyOf(client1.listFlights(Criteria.ALL, bearerToken1)).isEmpty()); - assertTrue(ImmutableList.copyOf(client2.listFlights(Criteria.ALL, bearerToken2)).isEmpty()); - } - - private void testValidAuthWithMultipleClientsWithDifferentCredentials( - FlightClient client1, FlightClient client2) { - final CredentialCallOption bearerToken1 = - client1.authenticateBasicToken(USERNAME_1, PASSWORD_1).get(); - final CredentialCallOption bearerToken2 = - client2.authenticateBasicToken(USERNAME_2, PASSWORD_2).get(); - assertTrue(ImmutableList.copyOf(client1.listFlights(Criteria.ALL, bearerToken1)).isEmpty()); - assertTrue(ImmutableList.copyOf(client2.listFlights(Criteria.ALL, bearerToken2)).isEmpty()); - } - - private void testInvalidAuth(FlightClient client) { - FlightTestUtil.assertCode( - FlightStatusCode.UNAUTHENTICATED, () -> client.authenticateBasicToken(USERNAME_1, "WRONG")); - - FlightTestUtil.assertCode( - FlightStatusCode.UNAUTHENTICATED, - () -> client.authenticateBasicToken(NO_USERNAME, PASSWORD_1)); - - FlightTestUtil.assertCode( - FlightStatusCode.UNAUTHENTICATED, - () -> client.listFlights(Criteria.ALL).forEach(action -> fail())); - } - - private void didntAuth(FlightClient client) { - FlightTestUtil.assertCode( - FlightStatusCode.UNAUTHENTICATED, - () -> client.listFlights(Criteria.ALL).forEach(action -> fail())); - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/CustomHeaderTest.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/CustomHeaderTest.java deleted file mode 100644 index 059107b0106cc..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/CustomHeaderTest.java +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.client; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.google.common.collect.ImmutableMap; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.flight.Action; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.FlightCallHeaders; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightClient.ClientStreamListener; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightMethod; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightServerMiddleware; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.FlightTestUtil; -import org.apache.arrow.flight.HeaderCallOption; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.RequestContext; -import org.apache.arrow.flight.SyncPutListener; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Tests to ensure custom headers are passed along to the server for each command. */ -public class CustomHeaderTest { - FlightServer server; - FlightClient client; - BufferAllocator allocator; - TestCustomHeaderMiddleware.Factory headersMiddleware; - HeaderCallOption headers; - Map testHeaders = - ImmutableMap.of( - "foo", "bar", - "bar", "foo", - "answer", "42"); - - @BeforeEach - public void setUp() throws Exception { - allocator = new RootAllocator(Integer.MAX_VALUE); - headersMiddleware = new TestCustomHeaderMiddleware.Factory(); - FlightCallHeaders callHeaders = new FlightCallHeaders(); - for (Map.Entry entry : testHeaders.entrySet()) { - callHeaders.insert(entry.getKey(), entry.getValue()); - } - headers = new HeaderCallOption(callHeaders); - server = - FlightServer.builder( - allocator, - Location.forGrpcInsecure(FlightTestUtil.LOCALHOST, /*port*/ 0), - new NoOpFlightProducer()) - .middleware(FlightServerMiddleware.Key.of("customHeader"), headersMiddleware) - .build(); - server.start(); - client = FlightClient.builder(allocator, server.getLocation()).build(); - } - - @AfterEach - public void tearDown() throws Exception { - allocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(allocator, server, client); - } - - @Test - public void testHandshake() { - try { - client.handshake(headers); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.HANDSHAKE); - } - - @Test - public void testGetSchema() { - try { - client.getSchema(FlightDescriptor.command(new byte[0]), headers); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.GET_SCHEMA); - } - - @Test - public void testGetFlightInfo() { - try { - client.getInfo(FlightDescriptor.command(new byte[0]), headers); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.GET_FLIGHT_INFO); - } - - @Test - public void testListActions() { - try { - client.listActions(headers).iterator().next(); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.LIST_ACTIONS); - } - - @Test - public void testListFlights() { - try { - client.listFlights(new Criteria(new byte[] {1}), headers).iterator().next(); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.LIST_FLIGHTS); - } - - @Test - public void testDoAction() { - try { - client.doAction(new Action("test"), headers).next(); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.DO_ACTION); - } - - @Test - public void testStartPut() { - try { - final ClientStreamListener listener = - client.startPut(FlightDescriptor.command(new byte[0]), new SyncPutListener(), headers); - listener.getResult(); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.DO_PUT); - } - - @Test - public void testGetStream() { - try (final FlightStream stream = client.getStream(new Ticket(new byte[0]), headers)) { - stream.next(); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.DO_GET); - } - - @Test - public void testDoExchange() { - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(new byte[0]), headers)) { - stream.getReader().next(); - } catch (Exception ignored) { - } - - assertHeadersMatch(FlightMethod.DO_EXCHANGE); - } - - private void assertHeadersMatch(FlightMethod method) { - for (Map.Entry entry : testHeaders.entrySet()) { - assertEquals(entry.getValue(), headersMiddleware.getCustomHeader(method, entry.getKey())); - } - } - - /** A middleware used to test if customHeaders are being sent to the server properly. */ - static class TestCustomHeaderMiddleware implements FlightServerMiddleware { - - public TestCustomHeaderMiddleware() {} - - @Override - public void onBeforeSendingHeaders(CallHeaders callHeaders) {} - - @Override - public void onCallCompleted(CallStatus callStatus) {} - - @Override - public void onCallErrored(Throwable throwable) {} - - /** - * A factory for the middleware that keeps track of the received headers and provides a way to - * check those values for a given Flight Method. - */ - static class Factory implements FlightServerMiddleware.Factory { - private final Map receivedCallHeaders = new HashMap<>(); - - @Override - public TestCustomHeaderMiddleware onCallStarted( - CallInfo callInfo, CallHeaders callHeaders, RequestContext requestContext) { - - receivedCallHeaders.put(callInfo.method(), callHeaders); - return new TestCustomHeaderMiddleware(); - } - - public String getCustomHeader(FlightMethod method, String key) { - CallHeaders headers = receivedCallHeaders.get(method); - if (headers == null) { - return null; - } - return headers.get(key); - } - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java deleted file mode 100644 index d89fd64ebadf7..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.client; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.ErrorFlightMetadata; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightMethod; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightServerMiddleware; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.RequestContext; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** - * Tests for correct handling of cookies from the FlightClient using {@link ClientCookieMiddleware}. - */ -public class TestCookieHandling { - private static final String SET_COOKIE_HEADER = "Set-Cookie"; - private static final String COOKIE_HEADER = "Cookie"; - private BufferAllocator allocator; - private FlightServer server; - private FlightClient client; - - private ClientCookieMiddlewareTestFactory testFactory = new ClientCookieMiddlewareTestFactory(); - private ClientCookieMiddleware cookieMiddleware = new ClientCookieMiddleware(testFactory); - - @BeforeEach - public void setup() throws Exception { - allocator = new RootAllocator(Long.MAX_VALUE); - startServerAndClient(); - } - - @AfterEach - public void cleanup() throws Exception { - testFactory = new ClientCookieMiddlewareTestFactory(); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - AutoCloseables.close(client, server, allocator); - client = null; - server = null; - allocator = null; - } - - @Test - public void basicCookie() { - CallHeaders headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "k=v"); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - assertEquals("k=v", cookieMiddleware.getValidCookiesAsString()); - } - - @Test - public void cookieStaysAfterMultipleRequests() { - CallHeaders headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "k=v"); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - assertEquals("k=v", cookieMiddleware.getValidCookiesAsString()); - - headersToSend = new ErrorFlightMetadata(); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - assertEquals("k=v", cookieMiddleware.getValidCookiesAsString()); - - headersToSend = new ErrorFlightMetadata(); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - assertEquals("k=v", cookieMiddleware.getValidCookiesAsString()); - } - - @Disabled - @Test - public void cookieAutoExpires() { - CallHeaders headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=2"); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - // Note: using max-age changes cookie version from 0->1, which quotes values. - assertEquals("k=\"v\"", cookieMiddleware.getValidCookiesAsString()); - - headersToSend = new ErrorFlightMetadata(); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - assertEquals("k=\"v\"", cookieMiddleware.getValidCookiesAsString()); - - try { - Thread.sleep(5000); - } catch (InterruptedException ignored) { - } - - // Verify that the k cookie was discarded because it expired. - assertTrue(cookieMiddleware.getValidCookiesAsString().isEmpty()); - } - - @Test - public void cookieExplicitlyExpires() { - CallHeaders headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=2"); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - // Note: using max-age changes cookie version from 0->1, which quotes values. - assertEquals("k=\"v\"", cookieMiddleware.getValidCookiesAsString()); - - // Note: The JDK treats Max-Age < 0 as not expired and treats 0 as expired. - // This violates the RFC, which states that less than zero and zero should both be expired. - headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=0"); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - - // Verify that the k cookie was discarded because the server told the client it is expired. - assertTrue(cookieMiddleware.getValidCookiesAsString().isEmpty()); - } - - @Disabled - @Test - public void cookieExplicitlyExpiresWithMaxAgeMinusOne() { - CallHeaders headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=2"); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - // Note: using max-age changes cookie version from 0->1, which quotes values. - assertEquals("k=\"v\"", cookieMiddleware.getValidCookiesAsString()); - - headersToSend = new ErrorFlightMetadata(); - - // The Java HttpCookie class has a bug where it uses a -1 maxAge to indicate - // a persistent cookie, when the RFC spec says this should mean the cookie expires immediately. - headersToSend.insert(SET_COOKIE_HEADER, "k=v; Max-Age=-1"); - cookieMiddleware = testFactory.onCallStarted(new CallInfo(FlightMethod.DO_ACTION)); - cookieMiddleware.onHeadersReceived(headersToSend); - - // Verify that the k cookie was discarded because the server told the client it is expired. - assertTrue(cookieMiddleware.getValidCookiesAsString().isEmpty()); - } - - @Test - public void changeCookieValue() { - CallHeaders headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "k=v"); - cookieMiddleware.onHeadersReceived(headersToSend); - assertEquals("k=v", cookieMiddleware.getValidCookiesAsString()); - - headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "k=v2"); - cookieMiddleware.onHeadersReceived(headersToSend); - assertEquals("k=v2", cookieMiddleware.getValidCookiesAsString()); - } - - @Test - public void multipleCookiesWithSetCookie() { - CallHeaders headersToSend = new ErrorFlightMetadata(); - headersToSend.insert(SET_COOKIE_HEADER, "firstKey=firstVal"); - headersToSend.insert(SET_COOKIE_HEADER, "secondKey=secondVal"); - cookieMiddleware.onHeadersReceived(headersToSend); - assertEquals( - "firstKey=firstVal; secondKey=secondVal", cookieMiddleware.getValidCookiesAsString()); - } - - @Test - public void cookieStaysAfterMultipleRequestsEndToEnd() { - client.handshake(); - assertEquals("k=v", testFactory.clientCookieMiddleware.getValidCookiesAsString()); - client.handshake(); - assertEquals("k=v", testFactory.clientCookieMiddleware.getValidCookiesAsString()); - client.listFlights(Criteria.ALL); - assertEquals("k=v", testFactory.clientCookieMiddleware.getValidCookiesAsString()); - } - - /** A server middleware component that injects SET_COOKIE_HEADER into the outgoing headers. */ - static class SetCookieHeaderInjector implements FlightServerMiddleware { - private final Factory factory; - - public SetCookieHeaderInjector(Factory factory) { - this.factory = factory; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - if (!factory.receivedCookieHeader) { - outgoingHeaders.insert(SET_COOKIE_HEADER, "k=v"); - } - } - - @Override - public void onCallCompleted(CallStatus status) {} - - @Override - public void onCallErrored(Throwable err) {} - - static class Factory implements FlightServerMiddleware.Factory { - private boolean receivedCookieHeader = false; - - @Override - public SetCookieHeaderInjector onCallStarted( - CallInfo info, CallHeaders incomingHeaders, RequestContext context) { - receivedCookieHeader = null != incomingHeaders.get(COOKIE_HEADER); - return new SetCookieHeaderInjector(this); - } - } - } - - public static class ClientCookieMiddlewareTestFactory extends ClientCookieMiddleware.Factory { - - private ClientCookieMiddleware clientCookieMiddleware; - - @Override - public ClientCookieMiddleware onCallStarted(CallInfo info) { - this.clientCookieMiddleware = new ClientCookieMiddleware(this); - return this.clientCookieMiddleware; - } - } - - private void startServerAndClient() throws IOException { - final FlightProducer flightProducer = - new NoOpFlightProducer() { - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) { - listener.onCompleted(); - } - }; - - this.server = - FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), flightProducer) - .middleware( - FlightServerMiddleware.Key.of("test"), new SetCookieHeaderInjector.Factory()) - .build() - .start(); - - this.client = - FlightClient.builder(allocator, server.getLocation()).intercept(testFactory).build(); - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java deleted file mode 100644 index f5ab712a75ff0..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.grpc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import io.grpc.Metadata; -import io.grpc.Status; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightStatusCode; -import org.junit.jupiter.api.Test; - -public class TestStatusUtils { - - @Test - public void testParseTrailers() { - Status status = Status.CANCELLED; - Metadata trailers = new Metadata(); - - // gRPC can have trailers with certain metadata keys beginning with ":", such as ":status". - // See https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md - trailers.put(StatusUtils.keyOfAscii(":status"), "502"); - trailers.put(StatusUtils.keyOfAscii("date"), "Fri, 13 Sep 2015 11:23:58 GMT"); - trailers.put(StatusUtils.keyOfAscii("content-type"), "text/html"); - - CallStatus callStatus = StatusUtils.fromGrpcStatusAndTrailers(status, trailers); - - assertEquals(FlightStatusCode.CANCELLED, callStatus.code()); - assertTrue(callStatus.metadata().containsKey(":status")); - assertEquals("502", callStatus.metadata().get(":status")); - assertTrue(callStatus.metadata().containsKey("date")); - assertEquals("Fri, 13 Sep 2015 11:23:58 GMT", callStatus.metadata().get("date")); - assertTrue(callStatus.metadata().containsKey("content-type")); - assertEquals("text/html", callStatus.metadata().get("content-type")); - } - - @Test - public void testGrpcResourceExhaustedTranslatedToFlightStatus() { - Status status = Status.RESOURCE_EXHAUSTED; - - CallStatus callStatus = StatusUtils.fromGrpcStatus(status); - assertEquals(FlightStatusCode.RESOURCE_EXHAUSTED, callStatus.code()); - - FlightStatusCode flightStatusCode = StatusUtils.fromGrpcStatusCode(status.getCode()); - assertEquals(FlightStatusCode.RESOURCE_EXHAUSTED, flightStatusCode); - } - - @Test - public void testFlightResourceExhaustedTranslatedToGrpcStatua() { - CallStatus callStatus = CallStatus.RESOURCE_EXHAUSTED; - - Status.Code grpcStatusCode = StatusUtils.toGrpcStatusCode(callStatus.code()); - assertEquals(Status.RESOURCE_EXHAUSTED.getCode(), grpcStatusCode); - - Status grpcStatus = StatusUtils.toGrpcStatus(callStatus); - assertEquals(Status.RESOURCE_EXHAUSTED.getCode(), grpcStatus.getCode()); - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java deleted file mode 100644 index 5f8bfe5f29b23..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.perf; - -import com.google.common.collect.ImmutableList; -import com.google.protobuf.InvalidProtocolBufferException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import org.apache.arrow.flight.BackpressureStrategy; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.perf.impl.PerfOuterClass.Perf; -import org.apache.arrow.flight.perf.impl.PerfOuterClass.Token; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -public class PerformanceTestServer implements AutoCloseable { - - private final FlightServer flightServer; - private final BufferAllocator allocator; - private final PerfProducer producer; - private final boolean isNonBlocking; - - public PerformanceTestServer(BufferAllocator incomingAllocator, Location location) { - this( - incomingAllocator, - location, - new BackpressureStrategy() { - private FlightProducer.ServerStreamListener listener; - - @Override - public void register(FlightProducer.ServerStreamListener listener) { - this.listener = listener; - } - - @Override - public WaitResult waitForListener(long timeout) { - while (!listener.isReady() && !listener.isCancelled()) { - // busy wait - } - return WaitResult.READY; - } - }, - false); - } - - public PerformanceTestServer( - BufferAllocator incomingAllocator, - Location location, - BackpressureStrategy bpStrategy, - boolean isNonBlocking) { - this.allocator = incomingAllocator.newChildAllocator("perf-server", 0, Long.MAX_VALUE); - this.producer = new PerfProducer(bpStrategy); - this.flightServer = FlightServer.builder(this.allocator, location, producer).build(); - this.isNonBlocking = isNonBlocking; - } - - public Location getLocation() { - return flightServer.getLocation(); - } - - public PerformanceTestServer start() throws IOException { - flightServer.start(); - return this; - } - - @Override - public void close() throws Exception { - AutoCloseables.close(flightServer, allocator); - } - - private final class PerfProducer extends NoOpFlightProducer { - private final BackpressureStrategy bpStrategy; - - private PerfProducer(BackpressureStrategy bpStrategy) { - this.bpStrategy = bpStrategy; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - bpStrategy.register(listener); - final Runnable loadData = - () -> { - Token token = null; - try { - token = Token.parseFrom(ticket.getBytes()); - } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(e); - } - Perf perf = token.getDefinition(); - Schema schema = Schema.deserializeMessage(perf.getSchema().asReadOnlyByteBuffer()); - try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - BigIntVector a = (BigIntVector) root.getVector("a")) { - listener.setUseZeroCopy(true); - listener.start(root); - root.allocateNew(); - - int current = 0; - long i = token.getStart(); - while (i < token.getEnd()) { - if (listener.isCancelled()) { - root.clear(); - return; - } - - if (TestPerf.VALIDATE) { - a.setSafe(current, i); - } - - i++; - current++; - if (i % perf.getRecordsPerBatch() == 0) { - root.setRowCount(current); - - bpStrategy.waitForListener(0); - if (listener.isCancelled()) { - root.clear(); - return; - } - listener.putNext(); - current = 0; - root.allocateNew(); - } - } - - // send last partial batch. - if (current != 0) { - root.setRowCount(current); - listener.putNext(); - } - listener.completed(); - } - }; - - if (!isNonBlocking) { - loadData.run(); - } else { - final ExecutorService service = Executors.newSingleThreadExecutor(); - Future unused = service.submit(loadData); - service.shutdown(); - } - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - try { - Preconditions.checkArgument(descriptor.isCommand()); - Perf exec = Perf.parseFrom(descriptor.getCommand()); - - final Schema pojoSchema = - new Schema( - ImmutableList.of( - Field.nullable("a", MinorType.BIGINT.getType()), - Field.nullable("b", MinorType.BIGINT.getType()), - Field.nullable("c", MinorType.BIGINT.getType()), - Field.nullable("d", MinorType.BIGINT.getType()))); - - Token token = - Token.newBuilder() - .setDefinition(exec) - .setStart(0) - .setEnd(exec.getRecordsPerStream()) - .build(); - final Ticket ticket = new Ticket(token.toByteArray()); - - List endpoints = new ArrayList<>(); - for (int i = 0; i < exec.getStreamCount(); i++) { - endpoints.add(new FlightEndpoint(ticket, getLocation())); - } - - return new FlightInfo( - pojoSchema, - descriptor, - endpoints, - -1, - exec.getRecordsPerStream() * exec.getStreamCount()); - } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(e); - } - } - } -} diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java deleted file mode 100644 index 338771286d462..0000000000000 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.perf; - -import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST; -import static org.apache.arrow.flight.Location.forGrpcInsecure; - -import com.google.common.base.MoreObjects; -import com.google.common.base.Stopwatch; -import com.google.common.collect.ImmutableList; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.protobuf.ByteString; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.perf.impl.PerfOuterClass.Perf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -@Disabled -public class TestPerf { - - public static final boolean VALIDATE = false; - - public static FlightDescriptor getPerfFlightDescriptor( - long recordCount, int recordsPerBatch, int streamCount) { - final Schema pojoSchema = - new Schema( - ImmutableList.of( - Field.nullable("a", MinorType.BIGINT.getType()), - Field.nullable("b", MinorType.BIGINT.getType()), - Field.nullable("c", MinorType.BIGINT.getType()), - Field.nullable("d", MinorType.BIGINT.getType()))); - - byte[] bytes = pojoSchema.serializeAsMessage(); - ByteString serializedSchema = ByteString.copyFrom(bytes); - - return FlightDescriptor.command( - Perf.newBuilder() - .setRecordsPerStream(recordCount) - .setRecordsPerBatch(recordsPerBatch) - .setSchema(serializedSchema) - .setStreamCount(streamCount) - .build() - .toByteArray()); - } - - public static void main(String[] args) throws Exception { - new TestPerf().throughput(); - } - - @Test - public void throughput() throws Exception { - final int numRuns = 10; - ListeningExecutorService pool = - MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(4)); - double[] throughPuts = new double[numRuns]; - - for (int i = 0; i < numRuns; i++) { - try (final BufferAllocator a = new RootAllocator(Long.MAX_VALUE); - final PerformanceTestServer server = - new PerformanceTestServer(a, forGrpcInsecure(LOCALHOST, 0)).start(); - final FlightClient client = FlightClient.builder(a, server.getLocation()).build(); ) { - final FlightInfo info = client.getInfo(getPerfFlightDescriptor(50_000_000L, 4095, 2)); - List> results = - info.getEndpoints().stream() - .map(t -> new Consumer(client, t.getTicket())) - .map(t -> pool.submit(t)) - .collect(Collectors.toList()); - - final Result r = - Futures.whenAllSucceed(results) - .call( - () -> { - Result res = new Result(); - for (ListenableFuture f : results) { - res.add(f.get()); - } - return res; - }, - pool) - .get(); - - double seconds = r.nanos * 1.0d / 1000 / 1000 / 1000; - throughPuts[i] = (r.bytes * 1.0d / 1024 / 1024) / seconds; - System.out.printf( - "Transferred %d records totaling %s bytes at %f MiB/s. %f record/s. %f batch/s.%n", - r.rows, - r.bytes, - throughPuts[i], - (r.rows * 1.0d) / seconds, - (r.batches * 1.0d) / seconds); - } - } - pool.shutdown(); - - System.out.println("Summary: "); - double average = Arrays.stream(throughPuts).sum() / numRuns; - double sqrSum = - Arrays.stream(throughPuts).map(val -> val - average).map(val -> val * val).sum(); - double stddev = Math.sqrt(sqrSum / numRuns); - System.out.printf( - "Average throughput: %f MiB/s, standard deviation: %f MiB/s%n", average, stddev); - } - - private static final class Consumer implements Callable { - - private final FlightClient client; - private final Ticket ticket; - - public Consumer(FlightClient client, Ticket ticket) { - super(); - this.client = client; - this.ticket = ticket; - } - - @Override - public Result call() throws Exception { - final Result r = new Result(); - Stopwatch watch = Stopwatch.createStarted(); - try (final FlightStream stream = client.getStream(ticket)) { - final VectorSchemaRoot root = stream.getRoot(); - try { - BigIntVector a = (BigIntVector) root.getVector("a"); - while (stream.next()) { - int rows = root.getRowCount(); - long aSum = r.aSum; - for (int i = 0; i < rows; i++) { - if (VALIDATE) { - aSum += a.get(i); - } - } - r.bytes += rows * 32L; - r.rows += rows; - r.aSum = aSum; - r.batches++; - } - - r.nanos = watch.elapsed(TimeUnit.NANOSECONDS); - return r; - } finally { - root.clear(); - } - } - } - } - - private static final class Result { - private long rows; - private long aSum; - private long bytes; - private long nanos; - private long batches; - - public void add(Result r) { - rows += r.rows; - aSum += r.aSum; - bytes += r.bytes; - batches += r.batches; - nanos = Math.max(nanos, r.nanos); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("rows", rows) - .add("aSum", aSum) - .add("batches", batches) - .add("bytes", bytes) - .add("nanos", nanos) - .toString(); - } - } -} diff --git a/java/flight/flight-core/src/test/proto/perf.proto b/java/flight/flight-core/src/test/proto/perf.proto deleted file mode 100644 index 99f35a9e65de1..0000000000000 --- a/java/flight/flight-core/src/test/proto/perf.proto +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

    - * http://www.apache.org/licenses/LICENSE-2.0 - *

    - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -syntax = "proto3"; - -option java_package = "org.apache.arrow.flight.perf.impl"; - -message Perf { - bytes schema = 1; - int32 stream_count = 2; - int64 records_per_stream = 3; - int32 records_per_batch = 4; -} - -/* - * Payload of ticket - */ -message Token { - - // definition of entire flight. - Perf definition = 1; - - // inclusive start - int64 start = 2; - - // exclusive end - int64 end = 3; - -} - diff --git a/java/flight/flight-core/src/test/proto/test.proto b/java/flight/flight-core/src/test/proto/test.proto deleted file mode 100644 index 6fa1890b2b71d..0000000000000 --- a/java/flight/flight-core/src/test/proto/test.proto +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -syntax = "proto3"; - -option java_package = "org.apache.arrow.flight"; - -import "google/protobuf/empty.proto"; - -service TestService { - rpc Test(google.protobuf.Empty) returns (google.protobuf.Empty) {} -} diff --git a/java/flight/flight-core/src/test/resources/logback.xml b/java/flight/flight-core/src/test/resources/logback.xml deleted file mode 100644 index 444b2ed6d8392..0000000000000 --- a/java/flight/flight-core/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - true - 10000 - true - ${LILITH_HOSTNAME:-localhost} - - - - - - - - diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml deleted file mode 100644 index e43bcd0571102..0000000000000 --- a/java/flight/flight-integration-tests/pom.xml +++ /dev/null @@ -1,100 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-flight - 19.0.0-SNAPSHOT - - - flight-integration-tests - jar - Arrow Flight Integration Tests - Integration tests for Flight RPC. - - - - org.apache.arrow - arrow-vector - - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - flight-core - - - org.apache.arrow - flight-sql - - - com.google.protobuf - protobuf-java - - - com.google.guava - guava - - - commons-cli - commons-cli - 1.9.0 - - - org.slf4j - slf4j-api - - - - - - - maven-shade-plugin - - - make-assembly - - shade - - package - - false - true - jar-with-dependencies - - - - **/module-info.class - - - - - - - - - - - - - diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/AppMetadataFlightInfoEndpointScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/AppMetadataFlightInfoEndpointScenario.java deleted file mode 100644 index 2599005275751..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/AppMetadataFlightInfoEndpointScenario.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Test app_metadata in FlightInfo and FlightEndpoint. */ -final class AppMetadataFlightInfoEndpointScenario implements Scenario { - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new AppMetadataFlightInfoEndpointProducer(); - } - - @Override - public void buildServer(FlightServer.Builder builder) throws Exception {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - byte[] cmd = "foobar".getBytes(StandardCharsets.UTF_8); - FlightInfo info = client.getInfo(FlightDescriptor.command(cmd)); - IntegrationAssertions.assertEquals(info.getAppMetadata(), cmd); - IntegrationAssertions.assertEquals(info.getEndpoints().size(), 1); - IntegrationAssertions.assertEquals(info.getEndpoints().get(0).getAppMetadata(), cmd); - } - - /** producer for app_metadata test. */ - static class AppMetadataFlightInfoEndpointProducer extends NoOpFlightProducer { - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - byte[] cmd = descriptor.getCommand(); - - Schema schema = - new Schema( - Collections.singletonList( - Field.notNullable("number", Types.MinorType.UINT4.getType()))); - - List endpoints = - Collections.singletonList( - FlightEndpoint.builder(new Ticket("".getBytes(StandardCharsets.UTF_8))) - .setAppMetadata(cmd) - .build()); - - return FlightInfo.builder(schema, descriptor, endpoints).setAppMetadata(cmd).build(); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/AuthBasicProtoScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/AuthBasicProtoScenario.java deleted file mode 100644 index 6fc96706c88c8..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/AuthBasicProtoScenario.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Optional; -import org.apache.arrow.flight.Action; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStatusCode; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.auth.BasicClientAuthHandler; -import org.apache.arrow.flight.auth.BasicServerAuthHandler; -import org.apache.arrow.memory.BufferAllocator; - -/** A scenario testing the built-in basic authentication Protobuf. */ -final class AuthBasicProtoScenario implements Scenario { - - static final String USERNAME = "arrow"; - static final String PASSWORD = "flight"; - - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) { - return new NoOpFlightProducer() { - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - listener.onNext(new Result(context.peerIdentity().getBytes(StandardCharsets.UTF_8))); - listener.onCompleted(); - } - }; - } - - @Override - public void buildServer(FlightServer.Builder builder) { - builder.authHandler( - new BasicServerAuthHandler( - new BasicServerAuthHandler.BasicAuthValidator() { - @Override - public byte[] getToken(String username, String password) throws Exception { - if (!USERNAME.equals(username) || !PASSWORD.equals(password)) { - throw CallStatus.UNAUTHENTICATED - .withDescription("Username or password is invalid.") - .toRuntimeException(); - } - return ("valid:" + username).getBytes(StandardCharsets.UTF_8); - } - - @Override - public Optional isValid(byte[] token) { - if (token != null) { - final String credential = new String(token, StandardCharsets.UTF_8); - if (credential.startsWith("valid:")) { - return Optional.of(credential.substring(6)); - } - } - return Optional.empty(); - } - })); - } - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) { - final FlightRuntimeException e = - IntegrationAssertions.assertThrows( - FlightRuntimeException.class, - () -> { - client.listActions().forEach(act -> {}); - }); - if (!FlightStatusCode.UNAUTHENTICATED.equals(e.status().code())) { - throw new AssertionError("Expected UNAUTHENTICATED but found " + e.status().code(), e); - } - - client.authenticate(new BasicClientAuthHandler(USERNAME, PASSWORD)); - final Result result = client.doAction(new Action("")).next(); - if (!USERNAME.equals(new String(result.getBody(), StandardCharsets.UTF_8))) { - throw new AssertionError( - "Expected " + USERNAME + " but got " + Arrays.toString(result.getBody())); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/DoExchangeEchoScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/DoExchangeEchoScenario.java deleted file mode 100644 index 3e7fa19a81927..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/DoExchangeEchoScenario.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Validator; - -/** Test DoExchange by echoing data back to the client. */ -final class DoExchangeEchoScenario implements Scenario { - public static final byte[] COMMAND = "echo".getBytes(StandardCharsets.UTF_8); - - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new DoExchangeProducer(allocator); - } - - @Override - public void buildServer(FlightServer.Builder builder) {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - final Schema schema = - new Schema(Collections.singletonList(Field.notNullable("x", new ArrowType.Int(32, true)))); - try (final FlightClient.ExchangeReaderWriter stream = - client.doExchange(FlightDescriptor.command(COMMAND)); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - final FlightStream reader = stream.getReader(); - - // Write data and check that it gets echoed back. - IntVector iv = (IntVector) root.getVector("x"); - iv.allocateNew(); - stream.getWriter().start(root); - int rowCount = 10; - for (int batchIdx = 0; batchIdx < 4; batchIdx++) { - for (int rowIdx = 0; rowIdx < rowCount; rowIdx++) { - iv.setSafe(rowIdx, batchIdx + rowIdx); - } - root.setRowCount(rowCount); - boolean writeMetadata = batchIdx % 2 == 0; - final byte[] rawMetadata = Integer.toString(batchIdx).getBytes(StandardCharsets.UTF_8); - if (writeMetadata) { - final ArrowBuf metadata = allocator.buffer(rawMetadata.length); - metadata.writeBytes(rawMetadata); - stream.getWriter().putNext(metadata); - } else { - stream.getWriter().putNext(); - } - - IntegrationAssertions.assertTrue("Unexpected end of reader", reader.next()); - if (writeMetadata) { - IntegrationAssertions.assertNotNull(reader.getLatestMetadata()); - final byte[] readMetadata = new byte[rawMetadata.length]; - reader.getLatestMetadata().readBytes(readMetadata); - IntegrationAssertions.assertEquals(rawMetadata, readMetadata); - } else { - IntegrationAssertions.assertNull(reader.getLatestMetadata()); - } - IntegrationAssertions.assertEquals(root.getSchema(), reader.getSchema()); - Validator.compareVectorSchemaRoot(reader.getRoot(), root); - } - - stream.getWriter().completed(); - IntegrationAssertions.assertFalse("Expected to reach end of reader", reader.next()); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/DoExchangeProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/DoExchangeProducer.java deleted file mode 100644 index 2e28ab1233e7c..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/DoExchangeProducer.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.util.Arrays; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; - -/** The server used for testing the Flight do_exchange method. */ -final class DoExchangeProducer extends NoOpFlightProducer { - private final BufferAllocator allocator; - - DoExchangeProducer(BufferAllocator allocator) { - this.allocator = allocator; - } - - @Override - public void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) { - FlightDescriptor descriptor = reader.getDescriptor(); - if (descriptor.isCommand()) { - if (Arrays.equals(DoExchangeEchoScenario.COMMAND, descriptor.getCommand())) { - doEcho(reader, writer); - } - } - throw CallStatus.UNIMPLEMENTED - .withDescription("Unsupported descriptor: " + descriptor.toString()) - .toRuntimeException(); - } - - private void doEcho(FlightStream reader, ServerStreamListener writer) { - VectorSchemaRoot root = null; - VectorLoader loader = null; - while (reader.next()) { - if (reader.hasRoot()) { - if (root == null) { - root = VectorSchemaRoot.create(reader.getSchema(), allocator); - loader = new VectorLoader(root); - writer.start(root); - } - VectorUnloader unloader = new VectorUnloader(reader.getRoot()); - try (final ArrowRecordBatch arb = unloader.getRecordBatch()) { - loader.load(arb); - } - if (reader.getLatestMetadata() != null) { - reader.getLatestMetadata().getReferenceManager().retain(); - writer.putNext(reader.getLatestMetadata()); - } else { - writer.putNext(); - } - } else { - // Pure metadata - reader.getLatestMetadata().getReferenceManager().retain(); - writer.putMetadata(reader.getLatestMetadata()); - } - } - if (root != null) { - root.close(); - } - writer.completed(); - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeCancelFlightInfoScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeCancelFlightInfoScenario.java deleted file mode 100644 index 8d2f4b0e0ade5..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeCancelFlightInfoScenario.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.flight.CancelFlightInfoRequest; -import org.apache.arrow.flight.CancelFlightInfoResult; -import org.apache.arrow.flight.CancelStatus; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; - -/** Test CancelFlightInfo. */ -final class ExpirationTimeCancelFlightInfoScenario implements Scenario { - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new ExpirationTimeProducer(allocator); - } - - @Override - public void buildServer(FlightServer.Builder builder) {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - FlightInfo info = - client.getInfo(FlightDescriptor.command("expiration".getBytes(StandardCharsets.UTF_8))); - CancelFlightInfoRequest request = new CancelFlightInfoRequest(info); - CancelFlightInfoResult result = client.cancelFlightInfo(request); - IntegrationAssertions.assertEquals(CancelStatus.CANCELLED, result.getStatus()); - - // All requests should fail - for (FlightEndpoint endpoint : info.getEndpoints()) { - IntegrationAssertions.assertThrows( - FlightRuntimeException.class, - () -> { - try (FlightStream stream = client.getStream(endpoint.getTicket())) { - while (stream.next()) {} - } - }); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeDoGetScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeDoGetScenario.java deleted file mode 100644 index d6f57aa2f046e..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeDoGetScenario.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; - -/** Test DoGet with expiration times. */ -final class ExpirationTimeDoGetScenario implements Scenario { - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new ExpirationTimeProducer(allocator); - } - - @Override - public void buildServer(FlightServer.Builder builder) {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - FlightInfo info = - client.getInfo( - FlightDescriptor.command("expiration_time".getBytes(StandardCharsets.UTF_8))); - - List batches = new ArrayList<>(); - - try { - for (FlightEndpoint endpoint : info.getEndpoints()) { - if (batches.size() == 0) { - IntegrationAssertions.assertFalse( - "endpoints[0] must not have expiration time", - endpoint.getExpirationTime().isPresent()); - } else { - IntegrationAssertions.assertTrue( - "endpoints[" + batches.size() + "] must have expiration time", - endpoint.getExpirationTime().isPresent()); - } - try (FlightStream stream = client.getStream(endpoint.getTicket())) { - while (stream.next()) { - batches.add(new VectorUnloader(stream.getRoot()).getRecordBatch()); - } - } - } - - // Check data - IntegrationAssertions.assertEquals(3, batches.size()); - try (final VectorSchemaRoot root = - VectorSchemaRoot.create(ExpirationTimeProducer.SCHEMA, allocator)) { - final VectorLoader loader = new VectorLoader(root); - - loader.load(batches.get(0)); - IntegrationAssertions.assertEquals(1, root.getRowCount()); - IntegrationAssertions.assertEquals(0, ((UInt4Vector) root.getVector(0)).getObject(0)); - - loader.load(batches.get(1)); - IntegrationAssertions.assertEquals(1, root.getRowCount()); - IntegrationAssertions.assertEquals(1, ((UInt4Vector) root.getVector(0)).getObject(0)); - - loader.load(batches.get(2)); - IntegrationAssertions.assertEquals(1, root.getRowCount()); - IntegrationAssertions.assertEquals(2, ((UInt4Vector) root.getVector(0)).getObject(0)); - } - } finally { - AutoCloseables.close(batches); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeListActionsScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeListActionsScenario.java deleted file mode 100644 index 2bfb01b766c00..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeListActionsScenario.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.util.Iterator; -import org.apache.arrow.flight.ActionType; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightConstants; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; - -/** Test ListActions with expiration times. */ -final class ExpirationTimeListActionsScenario implements Scenario { - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new ExpirationTimeProducer(allocator); - } - - @Override - public void buildServer(FlightServer.Builder builder) {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - Iterator actions = client.listActions().iterator(); - IntegrationAssertions.assertTrue("Expected 2 actions", actions.hasNext()); - ActionType action = actions.next(); - IntegrationAssertions.assertEquals( - FlightConstants.CANCEL_FLIGHT_INFO.getType(), action.getType()); - - IntegrationAssertions.assertTrue("Expected 2 actions", actions.hasNext()); - action = actions.next(); - IntegrationAssertions.assertEquals( - FlightConstants.RENEW_FLIGHT_ENDPOINT.getType(), action.getType()); - - IntegrationAssertions.assertFalse("Expected 2 actions", actions.hasNext()); - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeProducer.java deleted file mode 100644 index 3dec8321b443b..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeProducer.java +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.time.Instant; -import java.time.temporal.ChronoUnit; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flight.Action; -import org.apache.arrow.flight.ActionType; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.CancelFlightInfoRequest; -import org.apache.arrow.flight.CancelFlightInfoResult; -import org.apache.arrow.flight.CancelStatus; -import org.apache.arrow.flight.FlightConstants; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.RenewFlightEndpointRequest; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * The server used for testing FlightEndpoint.expiration_time. - * - *

    GetFlightInfo() returns a FlightInfo that has the following three FlightEndpoints: - * - *

      - *
    1. No expiration time - *
    2. 5 seconds expiration time - *
    3. 6 seconds expiration time - *
    - * - * The client can't read data from the first endpoint multiple times but can read data from the - * second and third endpoints. The client can't re-read data from the second endpoint 5 seconds - * later. The client can't re-read data from the third endpoint 6 seconds later. - * - *

    The client can cancel a returned FlightInfo by pre-defined CancelFlightInfo action. The client - * can't read data from endpoints even within 6 seconds after the action. - * - *

    The client can extend the expiration time of a FlightEndpoint in a returned FlightInfo by - * pre-defined RenewFlightEndpoint action. The client can read data from endpoints multiple times - * within more 10 seconds after the action. - */ -final class ExpirationTimeProducer extends NoOpFlightProducer { - public static final Schema SCHEMA = - new Schema( - Collections.singletonList(Field.notNullable("number", Types.MinorType.UINT4.getType()))); - - private final BufferAllocator allocator; - private final List statuses; - - ExpirationTimeProducer(BufferAllocator allocator) { - this.allocator = allocator; - this.statuses = new ArrayList<>(); - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - statuses.clear(); - List endpoints = new ArrayList<>(); - Instant now = Instant.now(); - endpoints.add(addEndpoint("No expiration time", null)); - endpoints.add(addEndpoint("5 seconds", now.plus(5, ChronoUnit.SECONDS))); - endpoints.add(addEndpoint("6 seconds", now.plus(6, ChronoUnit.SECONDS))); - return new FlightInfo(SCHEMA, descriptor, endpoints, -1, -1); - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - // Obviously, not safe (since we don't lock), but we assume calls are not concurrent - int index = parseIndexFromTicket(ticket); - EndpointStatus status = statuses.get(index); - if (status.cancelled) { - listener.error( - CallStatus.NOT_FOUND - .withDescription( - "Invalid flight: cancelled: " - + new String(ticket.getBytes(), StandardCharsets.UTF_8)) - .toRuntimeException()); - return; - } else if (status.expirationTime != null && Instant.now().isAfter(status.expirationTime)) { - listener.error( - CallStatus.NOT_FOUND - .withDescription( - "Invalid flight: expired: " - + new String(ticket.getBytes(), StandardCharsets.UTF_8)) - .toRuntimeException()); - return; - } else if (status.expirationTime == null && status.numGets > 0) { - listener.error( - CallStatus.NOT_FOUND - .withDescription( - "Invalid flight: can't read multiple times: " - + new String(ticket.getBytes(), StandardCharsets.UTF_8)) - .toRuntimeException()); - return; - } - status.numGets++; - - try (final VectorSchemaRoot root = VectorSchemaRoot.create(SCHEMA, allocator)) { - listener.start(root); - UInt4Vector vector = (UInt4Vector) root.getVector(0); - vector.setSafe(0, index); - root.setRowCount(1); - listener.putNext(); - } - listener.completed(); - } - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - try { - if (action.getType().equals(FlightConstants.CANCEL_FLIGHT_INFO.getType())) { - CancelFlightInfoRequest request = - CancelFlightInfoRequest.deserialize(ByteBuffer.wrap(action.getBody())); - CancelStatus cancelStatus = CancelStatus.UNSPECIFIED; - for (FlightEndpoint endpoint : request.getInfo().getEndpoints()) { - int index = parseIndexFromTicket(endpoint.getTicket()); - EndpointStatus status = statuses.get(index); - if (status.cancelled) { - cancelStatus = CancelStatus.NOT_CANCELLABLE; - } else { - status.cancelled = true; - if (cancelStatus == CancelStatus.UNSPECIFIED) { - cancelStatus = CancelStatus.CANCELLED; - } - } - } - listener.onNext(new Result(new CancelFlightInfoResult(cancelStatus).serialize().array())); - } else if (action.getType().equals(FlightConstants.RENEW_FLIGHT_ENDPOINT.getType())) { - RenewFlightEndpointRequest request = - RenewFlightEndpointRequest.deserialize(ByteBuffer.wrap(action.getBody())); - FlightEndpoint endpoint = request.getFlightEndpoint(); - int index = parseIndexFromTicket(endpoint.getTicket()); - EndpointStatus status = statuses.get(index); - if (status.cancelled) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Invalid flight: cancelled: " + index) - .toRuntimeException()); - return; - } - - String ticketBody = new String(endpoint.getTicket().getBytes(), StandardCharsets.UTF_8); - ticketBody += ": renewed (+ 10 seconds)"; - Ticket ticket = new Ticket(ticketBody.getBytes(StandardCharsets.UTF_8)); - Instant expiration = Instant.now().plus(10, ChronoUnit.SECONDS); - status.expirationTime = expiration; - FlightEndpoint newEndpoint = - new FlightEndpoint( - ticket, expiration, endpoint.getLocations().toArray(new Location[0])); - listener.onNext(new Result(newEndpoint.serialize().array())); - } else { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Unknown action: " + action.getType()) - .toRuntimeException()); - return; - } - } catch (IOException | URISyntaxException e) { - listener.onError( - CallStatus.INTERNAL.withCause(e).withDescription(e.toString()).toRuntimeException()); - return; - } - listener.onCompleted(); - } - - @Override - public void listActions(CallContext context, StreamListener listener) { - listener.onNext(FlightConstants.CANCEL_FLIGHT_INFO); - listener.onNext(FlightConstants.RENEW_FLIGHT_ENDPOINT); - listener.onCompleted(); - } - - private FlightEndpoint addEndpoint(String ticket, Instant expirationTime) { - Ticket flightTicket = - new Ticket( - String.format("%d: %s", statuses.size(), ticket).getBytes(StandardCharsets.UTF_8)); - statuses.add(new EndpointStatus(expirationTime)); - return new FlightEndpoint(flightTicket, expirationTime); - } - - private int parseIndexFromTicket(Ticket ticket) { - final String contents = new String(ticket.getBytes(), StandardCharsets.UTF_8); - int index = contents.indexOf(':'); - if (index == -1) { - throw CallStatus.INVALID_ARGUMENT - .withDescription( - "Invalid ticket: " + new String(ticket.getBytes(), StandardCharsets.UTF_8)) - .toRuntimeException(); - } - int endpointIndex = Integer.parseInt(contents.substring(0, index)); - if (endpointIndex < 0 || endpointIndex >= statuses.size()) { - throw CallStatus.NOT_FOUND.withDescription("Out of bounds").toRuntimeException(); - } - return endpointIndex; - } - - /** The status of a returned endpoint. */ - static final class EndpointStatus { - Instant expirationTime; - int numGets; - boolean cancelled; - - EndpointStatus(Instant expirationTime) { - this.expirationTime = expirationTime; - this.numGets = 0; - this.cancelled = false; - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeRenewFlightEndpointScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeRenewFlightEndpointScenario.java deleted file mode 100644 index 5b280746f09e7..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/ExpirationTimeRenewFlightEndpointScenario.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.time.Instant; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.RenewFlightEndpointRequest; -import org.apache.arrow.memory.BufferAllocator; - -/** Test RenewFlightEndpoint. */ -final class ExpirationTimeRenewFlightEndpointScenario implements Scenario { - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new ExpirationTimeProducer(allocator); - } - - @Override - public void buildServer(FlightServer.Builder builder) {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - FlightInfo info = - client.getInfo(FlightDescriptor.command("expiration".getBytes(StandardCharsets.UTF_8))); - - // Renew all endpoints with expiration time - for (FlightEndpoint endpoint : info.getEndpoints()) { - if (!endpoint.getExpirationTime().isPresent()) { - continue; - } - Instant expiration = endpoint.getExpirationTime().get(); - FlightEndpoint renewed = client.renewFlightEndpoint(new RenewFlightEndpointRequest(endpoint)); - - IntegrationAssertions.assertTrue( - "Renewed FlightEndpoint must have expiration time", - renewed.getExpirationTime().isPresent()); - IntegrationAssertions.assertTrue( - "Renewed FlightEndpoint must have newer expiration time", - renewed.getExpirationTime().get().isAfter(expiration)); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java deleted file mode 100644 index 69b02030ccd3d..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.util.Map; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.SchemaResult; -import org.apache.arrow.flight.sql.CancelResult; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; - -/** - * Integration test scenario for validating Flight SQL specs across multiple implementations. This - * should ensure that RPC objects are being built and parsed correctly for multiple languages and - * that the Arrow schemas are returned as expected. - */ -public class FlightSqlExtensionScenario extends FlightSqlScenario { - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - try (final FlightSqlClient sqlClient = new FlightSqlClient(client)) { - validateMetadataRetrieval(sqlClient); - validateStatementExecution(sqlClient); - validatePreparedStatementExecution(allocator, sqlClient); - validateTransactions(allocator, sqlClient); - } - } - - private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception { - validate( - FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, - sqlClient.getSqlInfo(), - sqlClient, - s -> { - Map infoValues = readSqlInfoStream(s); - IntegrationAssertions.assertEquals( - Boolean.FALSE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SQL_VALUE)); - IntegrationAssertions.assertEquals( - Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_VALUE)); - IntegrationAssertions.assertEquals( - "min_version", - infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION_VALUE)); - IntegrationAssertions.assertEquals( - "max_version", - infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION_VALUE)); - IntegrationAssertions.assertEquals( - FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT_VALUE, - infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_VALUE)); - IntegrationAssertions.assertEquals( - Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE)); - IntegrationAssertions.assertEquals( - 42, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE)); - IntegrationAssertions.assertEquals( - 7, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT_VALUE)); - }); - } - - private void validateStatementExecution(FlightSqlClient sqlClient) throws Exception { - FlightInfo info = sqlClient.executeSubstrait(SUBSTRAIT_PLAN); - validate(FlightSqlScenarioProducer.getQuerySchema(), info, sqlClient); - - SchemaResult result = sqlClient.getExecuteSubstraitSchema(SUBSTRAIT_PLAN); - validateSchema(FlightSqlScenarioProducer.getQuerySchema(), result); - - IntegrationAssertions.assertEquals(CancelResult.CANCELLED, sqlClient.cancelQuery(info)); - - IntegrationAssertions.assertEquals( - sqlClient.executeSubstraitUpdate(SUBSTRAIT_PLAN), UPDATE_STATEMENT_EXPECTED_ROWS); - } - - private void validatePreparedStatementExecution( - BufferAllocator allocator, FlightSqlClient sqlClient) throws Exception { - try (FlightSqlClient.PreparedStatement preparedStatement = sqlClient.prepare(SUBSTRAIT_PLAN); - VectorSchemaRoot parameters = - VectorSchemaRoot.create(FlightSqlScenarioProducer.getQuerySchema(), allocator)) { - parameters.setRowCount(1); - preparedStatement.setParameters(parameters); - validate(FlightSqlScenarioProducer.getQuerySchema(), preparedStatement.execute(), sqlClient); - validateSchema(FlightSqlScenarioProducer.getQuerySchema(), preparedStatement.fetchSchema()); - } - - try (FlightSqlClient.PreparedStatement preparedStatement = sqlClient.prepare(SUBSTRAIT_PLAN)) { - IntegrationAssertions.assertEquals( - preparedStatement.executeUpdate(), UPDATE_PREPARED_STATEMENT_EXPECTED_ROWS); - } - } - - private void validateTransactions(BufferAllocator allocator, FlightSqlClient sqlClient) - throws Exception { - final FlightSqlClient.Transaction transaction = sqlClient.beginTransaction(); - IntegrationAssertions.assertEquals(TRANSACTION_ID, transaction.getTransactionId()); - - final FlightSqlClient.Savepoint savepoint = - sqlClient.beginSavepoint(transaction, SAVEPOINT_NAME); - IntegrationAssertions.assertEquals(SAVEPOINT_ID, savepoint.getSavepointId()); - - FlightInfo info = sqlClient.execute("SELECT STATEMENT", transaction); - validate(FlightSqlScenarioProducer.getQueryWithTransactionSchema(), info, sqlClient); - - info = sqlClient.executeSubstrait(SUBSTRAIT_PLAN, transaction); - validate(FlightSqlScenarioProducer.getQueryWithTransactionSchema(), info, sqlClient); - - SchemaResult schema = sqlClient.getExecuteSchema("SELECT STATEMENT", transaction); - validateSchema(FlightSqlScenarioProducer.getQueryWithTransactionSchema(), schema); - - schema = sqlClient.getExecuteSubstraitSchema(SUBSTRAIT_PLAN, transaction); - validateSchema(FlightSqlScenarioProducer.getQueryWithTransactionSchema(), schema); - - IntegrationAssertions.assertEquals( - sqlClient.executeUpdate("UPDATE STATEMENT", transaction), - UPDATE_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS); - IntegrationAssertions.assertEquals( - sqlClient.executeSubstraitUpdate(SUBSTRAIT_PLAN, transaction), - UPDATE_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS); - - try (FlightSqlClient.PreparedStatement preparedStatement = - sqlClient.prepare("SELECT PREPARED STATEMENT", transaction); - VectorSchemaRoot parameters = - VectorSchemaRoot.create(FlightSqlScenarioProducer.getQuerySchema(), allocator)) { - parameters.setRowCount(1); - preparedStatement.setParameters(parameters); - validate( - FlightSqlScenarioProducer.getQueryWithTransactionSchema(), - preparedStatement.execute(), - sqlClient); - schema = preparedStatement.fetchSchema(); - validateSchema(FlightSqlScenarioProducer.getQueryWithTransactionSchema(), schema); - } - - try (FlightSqlClient.PreparedStatement preparedStatement = - sqlClient.prepare(SUBSTRAIT_PLAN, transaction); - VectorSchemaRoot parameters = - VectorSchemaRoot.create(FlightSqlScenarioProducer.getQuerySchema(), allocator)) { - parameters.setRowCount(1); - preparedStatement.setParameters(parameters); - validate( - FlightSqlScenarioProducer.getQueryWithTransactionSchema(), - preparedStatement.execute(), - sqlClient); - schema = preparedStatement.fetchSchema(); - validateSchema(FlightSqlScenarioProducer.getQueryWithTransactionSchema(), schema); - } - - try (FlightSqlClient.PreparedStatement preparedStatement = - sqlClient.prepare("UPDATE PREPARED STATEMENT", transaction)) { - IntegrationAssertions.assertEquals( - preparedStatement.executeUpdate(), - UPDATE_PREPARED_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS); - } - - try (FlightSqlClient.PreparedStatement preparedStatement = - sqlClient.prepare(SUBSTRAIT_PLAN, transaction)) { - IntegrationAssertions.assertEquals( - preparedStatement.executeUpdate(), - UPDATE_PREPARED_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS); - } - - sqlClient.rollback(savepoint); - - final FlightSqlClient.Savepoint savepoint2 = - sqlClient.beginSavepoint(transaction, SAVEPOINT_NAME); - IntegrationAssertions.assertEquals(SAVEPOINT_ID, savepoint2.getSavepointId()); - sqlClient.release(savepoint); - - sqlClient.commit(transaction); - - final FlightSqlClient.Transaction transaction2 = sqlClient.beginTransaction(); - IntegrationAssertions.assertEquals(TRANSACTION_ID, transaction2.getTransactionId()); - sqlClient.rollback(transaction); - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java deleted file mode 100644 index 981ce89f1b88a..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import com.google.common.collect.ImmutableMap; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.FlightSqlClient.ExecuteIngestOptions; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * Integration test scenario for validating Flight SQL specs across multiple implementations. This - * should ensure that RPC objects are being built and parsed correctly for multiple languages and - * that the Arrow schemas are returned as expected. - */ -public class FlightSqlIngestionScenario extends FlightSqlScenario { - - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - FlightSqlScenarioProducer producer = - (FlightSqlScenarioProducer) super.producer(allocator, location); - producer - .getSqlInfoBuilder() - .withFlightSqlServerBulkIngestionTransaction(true) - .withFlightSqlServerBulkIngestion(true); - return producer; - } - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - try (final FlightSqlClient sqlClient = new FlightSqlClient(client)) { - validateMetadataRetrieval(sqlClient); - validateIngestion(allocator, sqlClient); - } - } - - private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception { - validate( - FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, - sqlClient.getSqlInfo( - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION), - sqlClient, - s -> { - Map infoValues = readSqlInfoStream(s); - IntegrationAssertions.assertEquals( - Boolean.TRUE, - infoValues.get( - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED_VALUE)); - IntegrationAssertions.assertEquals( - Boolean.TRUE, - infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION_VALUE)); - }); - } - - private VectorSchemaRoot getIngestVectorRoot(BufferAllocator allocator) { - Schema schema = FlightSqlScenarioProducer.getIngestSchema(); - VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - root.setRowCount(3); - return root; - } - - private void validateIngestion(BufferAllocator allocator, FlightSqlClient sqlClient) { - try (VectorSchemaRoot data = getIngestVectorRoot(allocator)) { - TableDefinitionOptions tableDefinitionOptions = - TableDefinitionOptions.newBuilder() - .setIfExists(TableDefinitionOptions.TableExistsOption.TABLE_EXISTS_OPTION_REPLACE) - .setIfNotExist( - TableDefinitionOptions.TableNotExistOption.TABLE_NOT_EXIST_OPTION_CREATE) - .build(); - Map options = new HashMap<>(ImmutableMap.of("key1", "val1", "key2", "val2")); - ExecuteIngestOptions executeIngestOptions = - new ExecuteIngestOptions( - "test_table", tableDefinitionOptions, true, "test_catalog", "test_schema", options); - FlightSqlClient.Transaction transaction = - new FlightSqlClient.Transaction(BULK_INGEST_TRANSACTION_ID); - long updatedRows = sqlClient.executeIngest(data, executeIngestOptions, transaction); - - IntegrationAssertions.assertEquals(3L, updatedRows); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java deleted file mode 100644 index e370a30bdc6ff..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import static java.util.Objects.isNull; - -import com.google.protobuf.Any; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.function.Consumer; -import org.apache.arrow.flight.CallOption; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.SchemaResult; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.flight.sql.FlightSqlUtils; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.flight.sql.util.TableRef; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * Integration test scenario for validating Flight SQL specs across multiple implementations. This - * should ensure that RPC objects are being built and parsed correctly for multiple languages and - * that the Arrow schemas are returned as expected. - */ -public class FlightSqlScenario implements Scenario { - public static final long UPDATE_STATEMENT_EXPECTED_ROWS = 10000L; - public static final long UPDATE_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS = 15000L; - public static final long UPDATE_PREPARED_STATEMENT_EXPECTED_ROWS = 20000L; - public static final long UPDATE_PREPARED_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS = 25000L; - public static final byte[] SAVEPOINT_ID = "savepoint_id".getBytes(StandardCharsets.UTF_8); - public static final String SAVEPOINT_NAME = "savepoint_name"; - public static final byte[] SUBSTRAIT_PLAN_TEXT = "plan".getBytes(StandardCharsets.UTF_8); - public static final String SUBSTRAIT_VERSION = "version"; - public static final FlightSqlClient.SubstraitPlan SUBSTRAIT_PLAN = - new FlightSqlClient.SubstraitPlan(SUBSTRAIT_PLAN_TEXT, SUBSTRAIT_VERSION); - public static final byte[] TRANSACTION_ID = "transaction_id".getBytes(StandardCharsets.UTF_8); - public static final byte[] BULK_INGEST_TRANSACTION_ID = "123".getBytes(StandardCharsets.UTF_8); - - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new FlightSqlScenarioProducer(allocator); - } - - @Override - public void buildServer(FlightServer.Builder builder) throws Exception {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - try (final FlightSqlClient sqlClient = new FlightSqlClient(client)) { - validateMetadataRetrieval(sqlClient); - validateStatementExecution(sqlClient); - validatePreparedStatementExecution(allocator, sqlClient); - } - } - - private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception { - final CallOption[] options = new CallOption[0]; - - validate( - FlightSqlProducer.Schemas.GET_CATALOGS_SCHEMA, sqlClient.getCatalogs(options), sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_CATALOGS_SCHEMA, sqlClient.getCatalogsSchema(options)); - - validate( - FlightSqlProducer.Schemas.GET_SCHEMAS_SCHEMA, - sqlClient.getSchemas("catalog", "db_schema_filter_pattern", options), - sqlClient); - validateSchema(FlightSqlProducer.Schemas.GET_SCHEMAS_SCHEMA, sqlClient.getSchemasSchema()); - - validate( - FlightSqlProducer.Schemas.GET_TABLES_SCHEMA, - sqlClient.getTables( - "catalog", - "db_schema_filter_pattern", - "table_filter_pattern", - Arrays.asList("table", "view"), - true, - options), - sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_TABLES_SCHEMA, - sqlClient.getTablesSchema(/*includeSchema*/ true, options)); - validateSchema( - FlightSqlProducer.Schemas.GET_TABLES_SCHEMA_NO_SCHEMA, - sqlClient.getTablesSchema(/*includeSchema*/ false, options)); - - validate( - FlightSqlProducer.Schemas.GET_TABLE_TYPES_SCHEMA, - sqlClient.getTableTypes(options), - sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_TABLE_TYPES_SCHEMA, sqlClient.getTableTypesSchema(options)); - - validate( - FlightSqlProducer.Schemas.GET_PRIMARY_KEYS_SCHEMA, - sqlClient.getPrimaryKeys(TableRef.of("catalog", "db_schema", "table"), options), - sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_PRIMARY_KEYS_SCHEMA, sqlClient.getPrimaryKeysSchema(options)); - - validate( - FlightSqlProducer.Schemas.GET_EXPORTED_KEYS_SCHEMA, - sqlClient.getExportedKeys(TableRef.of("catalog", "db_schema", "table"), options), - sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_EXPORTED_KEYS_SCHEMA, - sqlClient.getExportedKeysSchema(options)); - - validate( - FlightSqlProducer.Schemas.GET_IMPORTED_KEYS_SCHEMA, - sqlClient.getImportedKeys(TableRef.of("catalog", "db_schema", "table"), options), - sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_IMPORTED_KEYS_SCHEMA, - sqlClient.getImportedKeysSchema(options)); - - validate( - FlightSqlProducer.Schemas.GET_CROSS_REFERENCE_SCHEMA, - sqlClient.getCrossReference( - TableRef.of("pk_catalog", "pk_db_schema", "pk_table"), - TableRef.of("fk_catalog", "fk_db_schema", "fk_table"), - options), - sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_CROSS_REFERENCE_SCHEMA, - sqlClient.getCrossReferenceSchema(options)); - - validate( - FlightSqlProducer.Schemas.GET_TYPE_INFO_SCHEMA, - sqlClient.getXdbcTypeInfo(options), - sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_TYPE_INFO_SCHEMA, sqlClient.getXdbcTypeInfoSchema(options)); - - FlightInfo sqlInfoFlightInfo = - sqlClient.getSqlInfo( - new FlightSql.SqlInfo[] { - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY - }, - options); - - Ticket ticket = sqlInfoFlightInfo.getEndpoints().get(0).getTicket(); - FlightSql.CommandGetSqlInfo requestSqlInfoCommand = - FlightSqlUtils.unpackOrThrow( - Any.parseFrom(ticket.getBytes()), FlightSql.CommandGetSqlInfo.class); - IntegrationAssertions.assertEquals( - requestSqlInfoCommand.getInfo(0), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE); - IntegrationAssertions.assertEquals( - requestSqlInfoCommand.getInfo(1), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE); - validate(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlInfoFlightInfo, sqlClient); - validateSchema( - FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlClient.getSqlInfoSchema(options)); - } - - private void validateStatementExecution(FlightSqlClient sqlClient) throws Exception { - FlightInfo info = sqlClient.execute("SELECT STATEMENT"); - validate(FlightSqlScenarioProducer.getQuerySchema(), info, sqlClient); - validateSchema( - FlightSqlScenarioProducer.getQuerySchema(), sqlClient.getExecuteSchema("SELECT STATEMENT")); - - IntegrationAssertions.assertEquals( - sqlClient.executeUpdate("UPDATE STATEMENT"), UPDATE_STATEMENT_EXPECTED_ROWS); - } - - private void validatePreparedStatementExecution( - BufferAllocator allocator, FlightSqlClient sqlClient) throws Exception { - try (FlightSqlClient.PreparedStatement preparedStatement = - sqlClient.prepare("SELECT PREPARED STATEMENT"); - VectorSchemaRoot parameters = - VectorSchemaRoot.create(FlightSqlScenarioProducer.getQuerySchema(), allocator)) { - parameters.setRowCount(1); - preparedStatement.setParameters(parameters); - validate(FlightSqlScenarioProducer.getQuerySchema(), preparedStatement.execute(), sqlClient); - validateSchema(FlightSqlScenarioProducer.getQuerySchema(), preparedStatement.fetchSchema()); - } - - try (FlightSqlClient.PreparedStatement preparedStatement = - sqlClient.prepare("UPDATE PREPARED STATEMENT")) { - IntegrationAssertions.assertEquals( - preparedStatement.executeUpdate(), UPDATE_PREPARED_STATEMENT_EXPECTED_ROWS); - } - } - - protected void validate(Schema expectedSchema, FlightInfo flightInfo, FlightSqlClient sqlClient) - throws Exception { - validate(expectedSchema, flightInfo, sqlClient, null); - } - - protected void validate( - Schema expectedSchema, - FlightInfo flightInfo, - FlightSqlClient sqlClient, - Consumer streamConsumer) - throws Exception { - Ticket ticket = flightInfo.getEndpoints().get(0).getTicket(); - try (FlightStream stream = sqlClient.getStream(ticket)) { - Schema actualSchema = stream.getSchema(); - IntegrationAssertions.assertEquals(expectedSchema, actualSchema); - if (!isNull(streamConsumer)) { - streamConsumer.accept(stream); - } - } - } - - protected void validateSchema(Schema expected, SchemaResult actual) { - IntegrationAssertions.assertEquals(expected, actual.getSchema()); - } - - protected Map readSqlInfoStream(FlightStream stream) { - Map infoValues = new HashMap<>(); - while (stream.next()) { - UInt4Vector infoName = (UInt4Vector) stream.getRoot().getVector(0); - DenseUnionVector value = (DenseUnionVector) stream.getRoot().getVector(1); - - for (int i = 0; i < stream.getRoot().getRowCount(); i++) { - final int code = infoName.get(i); - if (infoValues.containsKey(code)) { - throw new AssertionError("Duplicate SqlInfo value: " + code); - } - Object object; - byte typeId = value.getTypeId(i); - switch (typeId) { - case 0: // string - object = - Preconditions.checkNotNull( - value.getVarCharVector(typeId).getObject(value.getOffset(i))) - .toString(); - break; - case 1: // bool - object = value.getBitVector(typeId).getObject(value.getOffset(i)); - break; - case 2: // int64 - object = value.getBigIntVector(typeId).getObject(value.getOffset(i)); - break; - case 3: // int32 - object = value.getIntVector(typeId).getObject(value.getOffset(i)); - break; - default: - throw new AssertionError("Decoding SqlInfo of type code " + typeId); - } - infoValues.put(code, object); - } - } - return infoValues; - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java deleted file mode 100644 index be746b575761d..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java +++ /dev/null @@ -1,834 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import com.google.common.collect.ImmutableMap; -import com.google.protobuf.Any; -import com.google.protobuf.ByteString; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.Message; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.SchemaResult; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.CancelResult; -import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.flight.sql.SqlInfoBuilder; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Hardcoded Flight SQL producer used for cross-language integration tests. */ -public class FlightSqlScenarioProducer implements FlightSqlProducer { - public static final String SERVER_NAME = "Flight SQL Integration Test Server"; - private final BufferAllocator allocator; - - private final SqlInfoBuilder sqlInfoBuilder; - - /** Constructor. */ - public FlightSqlScenarioProducer(BufferAllocator allocator) { - this.allocator = allocator; - sqlInfoBuilder = - new SqlInfoBuilder() - .withFlightSqlServerName(SERVER_NAME) - .withFlightSqlServerReadOnly(false) - .withFlightSqlServerSql(false) - .withFlightSqlServerSubstrait(true) - .withFlightSqlServerSubstraitMinVersion("min_version") - .withFlightSqlServerSubstraitMaxVersion("max_version") - .withFlightSqlServerTransaction( - FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT) - .withFlightSqlServerCancel(true) - .withFlightSqlServerStatementTimeout(42) - .withFlightSqlServerTransactionTimeout(7); - } - - /** - * Schema to be returned for mocking the statement/prepared statement results. Must be the same - * across all languages. - */ - static Schema getQuerySchema() { - return new Schema( - Collections.singletonList( - new Field( - "id", - new FieldType( - true, - new ArrowType.Int(64, true), - null, - new FlightSqlColumnMetadata.Builder() - .tableName("test") - .isAutoIncrement(true) - .isCaseSensitive(false) - .typeName("type_test") - .schemaName("schema_test") - .isSearchable(true) - .catalogName("catalog_test") - .precision(100) - .build() - .getMetadataMap()), - null))); - } - - /** - * The expected schema for queries with transactions. - * - *

    Must be the same across all languages. - */ - static Schema getQueryWithTransactionSchema() { - return new Schema( - Collections.singletonList( - new Field( - "pkey", - new FieldType( - true, - new ArrowType.Int(32, true), - null, - new FlightSqlColumnMetadata.Builder() - .tableName("test") - .isAutoIncrement(true) - .isCaseSensitive(false) - .typeName("type_test") - .schemaName("schema_test") - .isSearchable(true) - .catalogName("catalog_test") - .precision(100) - .build() - .getMetadataMap()), - null))); - } - - static Schema getIngestSchema() { - return new Schema( - Collections.singletonList(Field.nullable("test_field", new ArrowType.Int(64, true)))); - } - - protected SqlInfoBuilder getSqlInfoBuilder() { - return sqlInfoBuilder; - } - - @Override - public void beginSavepoint( - FlightSql.ActionBeginSavepointRequest request, - CallContext context, - StreamListener listener) { - if (!request.getName().equals(FlightSqlScenario.SAVEPOINT_NAME)) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Expected name '%s', not '%s'", - FlightSqlScenario.SAVEPOINT_NAME, request.getName())) - .toRuntimeException()); - return; - } - if (!Arrays.equals( - request.getTransactionId().toByteArray(), FlightSqlScenario.TRANSACTION_ID)) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Expected transaction ID '%s', not '%s'", - Arrays.toString(FlightSqlScenario.TRANSACTION_ID), - Arrays.toString(request.getTransactionId().toByteArray()))) - .toRuntimeException()); - return; - } - listener.onNext( - FlightSql.ActionBeginSavepointResult.newBuilder() - .setSavepointId(ByteString.copyFrom(FlightSqlScenario.SAVEPOINT_ID)) - .build()); - listener.onCompleted(); - } - - @Override - public void beginTransaction( - FlightSql.ActionBeginTransactionRequest request, - CallContext context, - StreamListener listener) { - listener.onNext( - FlightSql.ActionBeginTransactionResult.newBuilder() - .setTransactionId(ByteString.copyFrom(FlightSqlScenario.TRANSACTION_ID)) - .build()); - listener.onCompleted(); - } - - @Override - public void cancelQuery( - FlightInfo info, CallContext context, StreamListener listener) { - final String expectedTicket = "PLAN HANDLE"; - if (info.getEndpoints().size() != 1) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - String.format("Expected 1 endpoint, got %d", info.getEndpoints().size())) - .toRuntimeException()); - } - final FlightEndpoint endpoint = info.getEndpoints().get(0); - try { - final Any any = Any.parseFrom(endpoint.getTicket().getBytes()); - if (!any.is(FlightSql.TicketStatementQuery.class)) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - String.format("Expected TicketStatementQuery, found '%s'", any.getTypeUrl())) - .toRuntimeException()); - } - final FlightSql.TicketStatementQuery ticket = - any.unpack(FlightSql.TicketStatementQuery.class); - if (!ticket.getStatementHandle().toStringUtf8().equals(expectedTicket)) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription(String.format("Expected ticket '%s'", expectedTicket)) - .toRuntimeException()); - } - listener.onNext(CancelResult.CANCELLED); - listener.onCompleted(); - } catch (InvalidProtocolBufferException e) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Invalid Protobuf:" + e) - .withCause(e) - .toRuntimeException()); - } - } - - @Override - public void createPreparedStatement( - FlightSql.ActionCreatePreparedStatementRequest request, - CallContext context, - StreamListener listener) { - IntegrationAssertions.assertTrue( - "Expect to be one of the two queries used on tests", - request.getQuery().equals("SELECT PREPARED STATEMENT") - || request.getQuery().equals("UPDATE PREPARED STATEMENT")); - - String text = request.getQuery(); - if (!request.getTransactionId().isEmpty()) { - text += " WITH TXN"; - } - text += " HANDLE"; - final FlightSql.ActionCreatePreparedStatementResult result = - FlightSql.ActionCreatePreparedStatementResult.newBuilder() - .setPreparedStatementHandle(ByteString.copyFromUtf8(text)) - .build(); - listener.onNext(new Result(Any.pack(result).toByteArray())); - listener.onCompleted(); - } - - @Override - public void createPreparedSubstraitPlan( - FlightSql.ActionCreatePreparedSubstraitPlanRequest request, - CallContext context, - StreamListener listener) { - if (!Arrays.equals( - request.getPlan().getPlan().toByteArray(), FlightSqlScenario.SUBSTRAIT_PLAN_TEXT)) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Expected plan '%s', not '%s'", - Arrays.toString(FlightSqlScenario.SUBSTRAIT_PLAN_TEXT), - Arrays.toString(request.getPlan().getPlan().toByteArray()))) - .toRuntimeException()); - return; - } - if (!FlightSqlScenario.SUBSTRAIT_VERSION.equals(request.getPlan().getVersion())) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Expected version '%s', not '%s'", - FlightSqlScenario.SUBSTRAIT_VERSION, request.getPlan().getVersion())) - .toRuntimeException()); - return; - } - final String handle = - request.getTransactionId().isEmpty() - ? "PREPARED PLAN HANDLE" - : "PREPARED PLAN WITH TXN HANDLE"; - final FlightSql.ActionCreatePreparedStatementResult result = - FlightSql.ActionCreatePreparedStatementResult.newBuilder() - .setPreparedStatementHandle(ByteString.copyFromUtf8(handle)) - .build(); - listener.onNext(result); - listener.onCompleted(); - } - - @Override - public void closePreparedStatement( - FlightSql.ActionClosePreparedStatementRequest request, - CallContext context, - StreamListener listener) { - final String handle = request.getPreparedStatementHandle().toStringUtf8(); - IntegrationAssertions.assertTrue( - "Expect to be one of the queries used on tests", - handle.equals("SELECT PREPARED STATEMENT HANDLE") - || handle.equals("SELECT PREPARED STATEMENT WITH TXN HANDLE") - || handle.equals("UPDATE PREPARED STATEMENT HANDLE") - || handle.equals("UPDATE PREPARED STATEMENT WITH TXN HANDLE") - || handle.equals("PREPARED PLAN HANDLE") - || handle.equals("PREPARED PLAN WITH TXN HANDLE")); - listener.onCompleted(); - } - - @Override - public void endSavepoint( - FlightSql.ActionEndSavepointRequest request, - CallContext context, - StreamListener listener) { - switch (request.getAction()) { - case END_SAVEPOINT_RELEASE: - case END_SAVEPOINT_ROLLBACK: - if (!Arrays.equals( - request.getSavepointId().toByteArray(), FlightSqlScenario.SAVEPOINT_ID)) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - "Unexpected ID: " + Arrays.toString(request.getSavepointId().toByteArray())) - .toRuntimeException()); - } - break; - case UNRECOGNIZED: - default: - { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Unknown action: " + request.getAction()) - .toRuntimeException()); - return; - } - } - listener.onCompleted(); - } - - @Override - public void endTransaction( - FlightSql.ActionEndTransactionRequest request, - CallContext context, - StreamListener listener) { - switch (request.getAction()) { - case END_TRANSACTION_COMMIT: - case END_TRANSACTION_ROLLBACK: - if (!Arrays.equals( - request.getTransactionId().toByteArray(), FlightSqlScenario.TRANSACTION_ID)) { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - "Unexpected ID: " + Arrays.toString(request.getTransactionId().toByteArray())) - .toRuntimeException()); - } - break; - case UNRECOGNIZED: - default: - { - listener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Unknown action: " + request.getAction()) - .toRuntimeException()); - return; - } - } - listener.onCompleted(); - } - - @Override - public FlightInfo getFlightInfoStatement( - FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals(command.getQuery(), "SELECT STATEMENT"); - if (command.getTransactionId().isEmpty()) { - String handle = "SELECT STATEMENT HANDLE"; - FlightSql.TicketStatementQuery ticket = - FlightSql.TicketStatementQuery.newBuilder() - .setStatementHandle(ByteString.copyFromUtf8(handle)) - .build(); - return getFlightInfoForSchema(ticket, descriptor, getQuerySchema()); - } else { - String handle = "SELECT STATEMENT WITH TXN HANDLE"; - FlightSql.TicketStatementQuery ticket = - FlightSql.TicketStatementQuery.newBuilder() - .setStatementHandle(ByteString.copyFromUtf8(handle)) - .build(); - return getFlightInfoForSchema(ticket, descriptor, getQueryWithTransactionSchema()); - } - } - - @Override - public FlightInfo getFlightInfoSubstraitPlan( - FlightSql.CommandStatementSubstraitPlan command, - CallContext context, - FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals( - command.getPlan().getPlan().toByteArray(), FlightSqlScenario.SUBSTRAIT_PLAN_TEXT); - IntegrationAssertions.assertEquals( - command.getPlan().getVersion(), FlightSqlScenario.SUBSTRAIT_VERSION); - String handle = command.getTransactionId().isEmpty() ? "PLAN HANDLE" : "PLAN WITH TXN HANDLE"; - FlightSql.TicketStatementQuery ticket = - FlightSql.TicketStatementQuery.newBuilder() - .setStatementHandle(ByteString.copyFromUtf8(handle)) - .build(); - return getFlightInfoForSchema(ticket, descriptor, getQuerySchema()); - } - - @Override - public FlightInfo getFlightInfoPreparedStatement( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - FlightDescriptor descriptor) { - String handle = command.getPreparedStatementHandle().toStringUtf8(); - if (handle.equals("SELECT PREPARED STATEMENT HANDLE") - || handle.equals("PREPARED PLAN HANDLE")) { - return getFlightInfoForSchema(command, descriptor, getQuerySchema()); - } else if (handle.equals("SELECT PREPARED STATEMENT WITH TXN HANDLE") - || handle.equals("PREPARED PLAN WITH TXN HANDLE")) { - return getFlightInfoForSchema(command, descriptor, getQueryWithTransactionSchema()); - } - throw CallStatus.INVALID_ARGUMENT - .withDescription("Unknown handle: " + handle) - .toRuntimeException(); - } - - @Override - public SchemaResult getSchemaStatement( - FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals(command.getQuery(), "SELECT STATEMENT"); - if (command.getTransactionId().isEmpty()) { - return new SchemaResult(getQuerySchema()); - } - return new SchemaResult(getQueryWithTransactionSchema()); - } - - @Override - public SchemaResult getSchemaPreparedStatement( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - FlightDescriptor descriptor) { - String handle = command.getPreparedStatementHandle().toStringUtf8(); - if (handle.equals("SELECT PREPARED STATEMENT HANDLE") - || handle.equals("PREPARED PLAN HANDLE")) { - return new SchemaResult(getQuerySchema()); - } else if (handle.equals("SELECT PREPARED STATEMENT WITH TXN HANDLE") - || handle.equals("PREPARED PLAN WITH TXN HANDLE")) { - return new SchemaResult(getQueryWithTransactionSchema()); - } - throw CallStatus.INVALID_ARGUMENT - .withDescription("Unknown handle: " + handle) - .toRuntimeException(); - } - - @Override - public SchemaResult getSchemaSubstraitPlan( - FlightSql.CommandStatementSubstraitPlan command, - CallContext context, - FlightDescriptor descriptor) { - if (!Arrays.equals( - command.getPlan().getPlan().toByteArray(), FlightSqlScenario.SUBSTRAIT_PLAN_TEXT)) { - throw CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Expected plan '%s', not '%s'", - Arrays.toString(FlightSqlScenario.SUBSTRAIT_PLAN_TEXT), - Arrays.toString(command.getPlan().getPlan().toByteArray()))) - .toRuntimeException(); - } - if (!FlightSqlScenario.SUBSTRAIT_VERSION.equals(command.getPlan().getVersion())) { - throw CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Expected version '%s', not '%s'", - FlightSqlScenario.SUBSTRAIT_VERSION, command.getPlan().getVersion())) - .toRuntimeException(); - } - if (command.getTransactionId().isEmpty()) { - return new SchemaResult(getQuerySchema()); - } - return new SchemaResult(getQueryWithTransactionSchema()); - } - - @Override - public void getStreamStatement( - FlightSql.TicketStatementQuery ticket, CallContext context, ServerStreamListener listener) { - final String handle = ticket.getStatementHandle().toStringUtf8(); - if (handle.equals("SELECT STATEMENT HANDLE") || handle.equals("PLAN HANDLE")) { - putEmptyBatchToStreamListener(listener, getQuerySchema()); - } else if (handle.equals("SELECT STATEMENT WITH TXN HANDLE") - || handle.equals("PLAN WITH TXN HANDLE")) { - putEmptyBatchToStreamListener(listener, getQueryWithTransactionSchema()); - } else { - listener.error( - CallStatus.INVALID_ARGUMENT - .withDescription("Unknown handle: " + handle) - .toRuntimeException()); - } - } - - @Override - public void getStreamPreparedStatement( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - ServerStreamListener listener) { - String handle = command.getPreparedStatementHandle().toStringUtf8(); - if (handle.equals("SELECT PREPARED STATEMENT HANDLE") - || handle.equals("PREPARED PLAN HANDLE")) { - putEmptyBatchToStreamListener(listener, getQuerySchema()); - } else if (handle.equals("SELECT PREPARED STATEMENT WITH TXN HANDLE") - || handle.equals("PREPARED PLAN WITH TXN HANDLE")) { - putEmptyBatchToStreamListener(listener, getQueryWithTransactionSchema()); - } else { - listener.error( - CallStatus.INVALID_ARGUMENT - .withDescription("Unknown handle: " + handle) - .toRuntimeException()); - } - } - - private Runnable acceptPutReturnConstant(StreamListener ackStream, long value) { - return () -> { - final FlightSql.DoPutUpdateResult build = - FlightSql.DoPutUpdateResult.newBuilder().setRecordCount(value).build(); - - try (final ArrowBuf buffer = allocator.buffer(build.getSerializedSize())) { - buffer.writeBytes(build.toByteArray()); - ackStream.onNext(PutResult.metadata(buffer)); - ackStream.onCompleted(); - } - }; - } - - @Override - public Runnable acceptPutStatement( - FlightSql.CommandStatementUpdate command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - IntegrationAssertions.assertEquals(command.getQuery(), "UPDATE STATEMENT"); - return acceptPutReturnConstant( - ackStream, - command.getTransactionId().isEmpty() - ? FlightSqlScenario.UPDATE_STATEMENT_EXPECTED_ROWS - : FlightSqlScenario.UPDATE_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS); - } - - @Override - public Runnable acceptPutStatementBulkIngest( - FlightSql.CommandStatementIngest command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - - IntegrationAssertions.assertEquals( - TableExistsOption.TABLE_EXISTS_OPTION_REPLACE, - command.getTableDefinitionOptions().getIfExists()); - IntegrationAssertions.assertEquals( - TableNotExistOption.TABLE_NOT_EXIST_OPTION_CREATE, - command.getTableDefinitionOptions().getIfNotExist()); - IntegrationAssertions.assertEquals("test_table", command.getTable()); - IntegrationAssertions.assertEquals("test_catalog", command.getCatalog()); - IntegrationAssertions.assertEquals("test_schema", command.getSchema()); - IntegrationAssertions.assertEquals(true, command.getTemporary()); - IntegrationAssertions.assertEquals( - FlightSqlScenario.BULK_INGEST_TRANSACTION_ID, command.getTransactionId().toByteArray()); - - Map expectedOptions = - new HashMap<>(ImmutableMap.of("key1", "val1", "key2", "val2")); - IntegrationAssertions.assertEquals(expectedOptions.size(), command.getOptionsCount()); - - for (Map.Entry optionEntry : expectedOptions.entrySet()) { - String key = optionEntry.getKey(); - IntegrationAssertions.assertEquals(optionEntry.getValue(), command.getOptionsOrThrow(key)); - } - - IntegrationAssertions.assertEquals(getIngestSchema(), flightStream.getSchema()); - long rowCount = 0; - while (flightStream.next()) { - rowCount += flightStream.getRoot().getRowCount(); - } - - return acceptPutReturnConstant(ackStream, rowCount); - } - - @Override - public Runnable acceptPutSubstraitPlan( - FlightSql.CommandStatementSubstraitPlan command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - IntegrationAssertions.assertEquals( - command.getPlan().getPlan().toByteArray(), FlightSqlScenario.SUBSTRAIT_PLAN_TEXT); - IntegrationAssertions.assertEquals( - command.getPlan().getVersion(), FlightSqlScenario.SUBSTRAIT_VERSION); - return acceptPutReturnConstant( - ackStream, - command.getTransactionId().isEmpty() - ? FlightSqlScenario.UPDATE_STATEMENT_EXPECTED_ROWS - : FlightSqlScenario.UPDATE_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS); - } - - @Override - public Runnable acceptPutPreparedStatementUpdate( - FlightSql.CommandPreparedStatementUpdate command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - final String handle = command.getPreparedStatementHandle().toStringUtf8(); - if (handle.equals("UPDATE PREPARED STATEMENT HANDLE") - || handle.equals("PREPARED PLAN HANDLE")) { - return acceptPutReturnConstant( - ackStream, FlightSqlScenario.UPDATE_PREPARED_STATEMENT_EXPECTED_ROWS); - } else if (handle.equals("UPDATE PREPARED STATEMENT WITH TXN HANDLE") - || handle.equals("PREPARED PLAN WITH TXN HANDLE")) { - return acceptPutReturnConstant( - ackStream, FlightSqlScenario.UPDATE_PREPARED_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS); - } - return () -> { - ackStream.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Unknown handle: " + handle) - .toRuntimeException()); - }; - } - - @Override - public Runnable acceptPutPreparedStatementQuery( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - final String handle = command.getPreparedStatementHandle().toStringUtf8(); - if (handle.equals("SELECT PREPARED STATEMENT HANDLE") - || handle.equals("SELECT PREPARED STATEMENT WITH TXN HANDLE") - || handle.equals("PREPARED PLAN HANDLE") - || handle.equals("PREPARED PLAN WITH TXN HANDLE")) { - IntegrationAssertions.assertEquals(getQuerySchema(), flightStream.getSchema()); - return ackStream::onCompleted; - } - return () -> { - ackStream.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Unknown handle: " + handle) - .toRuntimeException()); - }; - } - - @Override - public FlightInfo getFlightInfoSqlInfo( - FlightSql.CommandGetSqlInfo request, CallContext context, FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_SQL_INFO_SCHEMA); - } - - @Override - public void getStreamSqlInfo( - FlightSql.CommandGetSqlInfo command, CallContext context, ServerStreamListener listener) { - if (command.getInfoCount() == 2 - && command.getInfo(0) == FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE - && command.getInfo(1) == FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE) { - // Integration test for the protocol messages - putEmptyBatchToStreamListener(listener, Schemas.GET_SQL_INFO_SCHEMA); - return; - } - sqlInfoBuilder.send(command.getInfoList(), listener); - } - - @Override - public void getStreamTypeInfo( - FlightSql.CommandGetXdbcTypeInfo request, - CallContext context, - ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_TYPE_INFO_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoTypeInfo( - FlightSql.CommandGetXdbcTypeInfo request, CallContext context, FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_TYPE_INFO_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoCatalogs( - FlightSql.CommandGetCatalogs request, CallContext context, FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_CATALOGS_SCHEMA); - } - - private void putEmptyBatchToStreamListener(ServerStreamListener stream, Schema schema) { - try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - stream.start(root); - stream.putNext(); - stream.completed(); - } - } - - @Override - public void getStreamCatalogs(CallContext context, ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_CATALOGS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoSchemas( - FlightSql.CommandGetDbSchemas request, CallContext context, FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals(request.getCatalog(), "catalog"); - IntegrationAssertions.assertEquals( - request.getDbSchemaFilterPattern(), "db_schema_filter_pattern"); - - return getFlightInfoForSchema(request, descriptor, Schemas.GET_SCHEMAS_SCHEMA); - } - - @Override - public void getStreamSchemas( - FlightSql.CommandGetDbSchemas command, CallContext context, ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_SCHEMAS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoTables( - FlightSql.CommandGetTables request, CallContext context, FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals(request.getCatalog(), "catalog"); - IntegrationAssertions.assertEquals( - request.getDbSchemaFilterPattern(), "db_schema_filter_pattern"); - IntegrationAssertions.assertEquals(request.getTableNameFilterPattern(), "table_filter_pattern"); - IntegrationAssertions.assertEquals(request.getTableTypesCount(), 2); - IntegrationAssertions.assertEquals(request.getTableTypes(0), "table"); - IntegrationAssertions.assertEquals(request.getTableTypes(1), "view"); - - return getFlightInfoForSchema(request, descriptor, Schemas.GET_TABLES_SCHEMA); - } - - @Override - public void getStreamTables( - FlightSql.CommandGetTables command, CallContext context, ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_TABLES_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoTableTypes( - FlightSql.CommandGetTableTypes request, CallContext context, FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_TABLE_TYPES_SCHEMA); - } - - @Override - public void getStreamTableTypes(CallContext context, ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_TABLE_TYPES_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoPrimaryKeys( - FlightSql.CommandGetPrimaryKeys request, CallContext context, FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals(request.getCatalog(), "catalog"); - IntegrationAssertions.assertEquals(request.getDbSchema(), "db_schema"); - IntegrationAssertions.assertEquals(request.getTable(), "table"); - - return getFlightInfoForSchema(request, descriptor, Schemas.GET_PRIMARY_KEYS_SCHEMA); - } - - @Override - public void getStreamPrimaryKeys( - FlightSql.CommandGetPrimaryKeys command, CallContext context, ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_PRIMARY_KEYS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoExportedKeys( - FlightSql.CommandGetExportedKeys request, CallContext context, FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals(request.getCatalog(), "catalog"); - IntegrationAssertions.assertEquals(request.getDbSchema(), "db_schema"); - IntegrationAssertions.assertEquals(request.getTable(), "table"); - - return getFlightInfoForSchema(request, descriptor, Schemas.GET_EXPORTED_KEYS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoImportedKeys( - FlightSql.CommandGetImportedKeys request, CallContext context, FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals(request.getCatalog(), "catalog"); - IntegrationAssertions.assertEquals(request.getDbSchema(), "db_schema"); - IntegrationAssertions.assertEquals(request.getTable(), "table"); - - return getFlightInfoForSchema(request, descriptor, Schemas.GET_IMPORTED_KEYS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoCrossReference( - FlightSql.CommandGetCrossReference request, - CallContext context, - FlightDescriptor descriptor) { - IntegrationAssertions.assertEquals(request.getPkCatalog(), "pk_catalog"); - IntegrationAssertions.assertEquals(request.getPkDbSchema(), "pk_db_schema"); - IntegrationAssertions.assertEquals(request.getPkTable(), "pk_table"); - IntegrationAssertions.assertEquals(request.getFkCatalog(), "fk_catalog"); - IntegrationAssertions.assertEquals(request.getFkDbSchema(), "fk_db_schema"); - IntegrationAssertions.assertEquals(request.getFkTable(), "fk_table"); - - return getFlightInfoForSchema(request, descriptor, Schemas.GET_CROSS_REFERENCE_SCHEMA); - } - - @Override - public void getStreamExportedKeys( - FlightSql.CommandGetExportedKeys command, - CallContext context, - ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_EXPORTED_KEYS_SCHEMA); - } - - @Override - public void getStreamImportedKeys( - FlightSql.CommandGetImportedKeys command, - CallContext context, - ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_IMPORTED_KEYS_SCHEMA); - } - - @Override - public void getStreamCrossReference( - FlightSql.CommandGetCrossReference command, - CallContext context, - ServerStreamListener listener) { - putEmptyBatchToStreamListener(listener, Schemas.GET_CROSS_REFERENCE_SCHEMA); - } - - @Override - public void close() throws Exception {} - - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) {} - - private FlightInfo getFlightInfoForSchema( - final T request, final FlightDescriptor descriptor, final Schema schema) { - final Ticket ticket = new Ticket(Any.pack(request).toByteArray()); - final List endpoints = Collections.singletonList(new FlightEndpoint(ticket)); - - return new FlightInfo(schema, descriptor, endpoints, -1, -1); - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationAssertions.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationAssertions.java deleted file mode 100644 index ada565c635428..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationAssertions.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.Arrays; -import java.util.Objects; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightRuntimeException; - -/** Utility methods to implement integration tests without using JUnit assertions. */ -final class IntegrationAssertions { - - /** - * Assert that the given code throws the given exception or subclass thereof. - * - * @param clazz The exception type. - * @param body The code to run. - * @param The exception type. - * @return The thrown exception. - */ - @SuppressWarnings("unchecked") - static T assertThrows(Class clazz, AssertThrows body) { - try { - body.run(); - } catch (Throwable t) { - if (clazz.isInstance(t)) { - return (T) t; - } - throw new AssertionError( - "Expected exception of class " + clazz + " but got " + t.getClass(), t); - } - throw new AssertionError("Expected exception of class " + clazz + " but did not throw."); - } - - /** Assert that the two (non-array) objects are equal. */ - static void assertEquals(Object expected, Object actual) { - if (!Objects.equals(expected, actual)) { - throw new AssertionError("Expected:\n" + expected + "\nbut got:\n" + actual); - } - } - - /** Assert that the two arrays are equal. */ - static void assertEquals(byte[] expected, byte[] actual) { - if (!Arrays.equals(expected, actual)) { - throw new AssertionError( - String.format( - "Expected:\n%s\nbut got:\n%s", Arrays.toString(expected), Arrays.toString(actual))); - } - } - - /** Assert that the value is false, using the given message as an error otherwise. */ - static void assertFalse(String message, boolean value) { - if (value) { - throw new AssertionError("Expected false: " + message); - } - } - - /** Assert that the value is true, using the given message as an error otherwise. */ - static void assertTrue(String message, boolean value) { - if (!value) { - throw new AssertionError("Expected true: " + message); - } - } - - static void assertNull(Object actual) { - if (actual != null) { - throw new AssertionError("Expected: null\n\nbut got: " + actual); - } - } - - static void assertNotNull(Object actual) { - if (actual == null) { - throw new AssertionError("Expected: (not null)\n\nbut got: null\n"); - } - } - - /** Convert a throwable into a FlightRuntimeException with error details, for debugging. */ - static FlightRuntimeException toFlightRuntimeException(Throwable t) { - final StringWriter stringWriter = new StringWriter(); - final PrintWriter writer = new PrintWriter(stringWriter); - t.printStackTrace(writer); - return CallStatus.UNKNOWN - .withCause(t) - .withDescription("Unknown error: " + t + "\n. Stack trace:\n" + stringWriter.toString()) - .toRuntimeException(); - } - - /** An interface used with {@link #assertThrows(Class, AssertThrows)}. */ - @FunctionalInterface - interface AssertThrows { - - void run() throws Throwable; - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationProducer.java deleted file mode 100644 index 8cbfa4547e405..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationProducer.java +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.stream.Collectors; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.DictionaryUtility; - -/** - * A FlightProducer that hosts an in memory store of Arrow buffers. Used for integration testing. - */ -public class IntegrationProducer extends NoOpFlightProducer implements AutoCloseable { - private final ConcurrentMap datasets = new ConcurrentHashMap<>(); - private final BufferAllocator allocator; - private Location location; - - /** - * Constructs a new instance. - * - * @param allocator The allocator for creating new Arrow buffers. - * @param location The location of the storage. - */ - public IntegrationProducer(BufferAllocator allocator, Location location) { - super(); - this.allocator = allocator; - this.location = location; - } - - /** - * Update the location after server start. - * - *

    Useful for binding to port 0 to get a free port. - */ - public void setLocation(Location location) { - this.location = location; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - try { - FlightDescriptor descriptor = - FlightDescriptor.deserialize(ByteBuffer.wrap(ticket.getBytes())); - Dataset dataset = datasets.get(descriptor); - if (dataset == null) { - listener.error( - CallStatus.NOT_FOUND - .withDescription("Unknown ticket: " + descriptor) - .toRuntimeException()); - return; - } - dataset.streamTo(allocator, listener); - } catch (Exception ex) { - listener.error(IntegrationAssertions.toFlightRuntimeException(ex)); - } - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - Dataset h = datasets.get(descriptor); - if (h == null) { - throw CallStatus.NOT_FOUND - .withDescription("Unknown descriptor: " + descriptor) - .toRuntimeException(); - } - return h.getFlightInfo(location); - } - - @Override - public Runnable acceptPut( - CallContext context, - final FlightStream flightStream, - final StreamListener ackStream) { - return () -> { - List batches = new ArrayList<>(); - try { - try (VectorSchemaRoot root = flightStream.getRoot()) { - VectorUnloader unloader = new VectorUnloader(root); - while (flightStream.next()) { - ackStream.onNext(PutResult.metadata(flightStream.getLatestMetadata())); - batches.add(unloader.getRecordBatch()); - } - // Closing the stream will release the dictionaries, take ownership - final Dataset dataset = - new Dataset( - flightStream.getDescriptor(), - flightStream.getSchema(), - flightStream.takeDictionaryOwnership(), - batches); - batches.clear(); - datasets.put(flightStream.getDescriptor(), dataset); - } finally { - AutoCloseables.close(batches); - } - } catch (Exception ex) { - ackStream.onError(IntegrationAssertions.toFlightRuntimeException(ex)); - } - }; - } - - @Override - public void close() throws Exception { - AutoCloseables.close(datasets.values()); - datasets.clear(); - } - - private static final class Dataset implements AutoCloseable { - private final FlightDescriptor descriptor; - private final Schema schema; - private final DictionaryProvider dictionaryProvider; - private final List batches; - - private Dataset( - FlightDescriptor descriptor, - Schema schema, - DictionaryProvider dictionaryProvider, - List batches) { - this.descriptor = descriptor; - this.schema = schema; - this.dictionaryProvider = dictionaryProvider; - this.batches = new ArrayList<>(batches); - } - - public FlightInfo getFlightInfo(Location location) { - ByteBuffer serializedDescriptor = descriptor.serialize(); - byte[] descriptorBytes = new byte[serializedDescriptor.remaining()]; - serializedDescriptor.get(descriptorBytes); - final List endpoints = - Collections.singletonList(new FlightEndpoint(new Ticket(descriptorBytes), location)); - return new FlightInfo( - messageFormatSchema(), - descriptor, - endpoints, - batches.stream().mapToLong(ArrowRecordBatch::computeBodyLength).sum(), - batches.stream().mapToInt(ArrowRecordBatch::getLength).sum()); - } - - private Schema messageFormatSchema() { - final Set dictionaryIdsUsed = new HashSet<>(); - final List messageFormatFields = - schema.getFields().stream() - .map(f -> DictionaryUtility.toMessageFormat(f, dictionaryProvider, dictionaryIdsUsed)) - .collect(Collectors.toList()); - return new Schema(messageFormatFields, schema.getCustomMetadata()); - } - - @Override - public void close() throws Exception { - AutoCloseables.close(batches); - } - - public void streamTo(BufferAllocator allocator, ServerStreamListener listener) { - try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - listener.start(root, dictionaryProvider); - final VectorLoader loader = new VectorLoader(root); - int counter = 0; - for (ArrowRecordBatch batch : batches) { - final byte[] rawMetadata = Integer.toString(counter).getBytes(StandardCharsets.UTF_8); - final ArrowBuf metadata = allocator.buffer(rawMetadata.length); - metadata.writeBytes(rawMetadata); - loader.load(batch); - // Transfers ownership of the buffer - do not free buffer ourselves - listener.putNext(metadata); - counter++; - } - listener.completed(); - } - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestClient.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestClient.java deleted file mode 100644 index f40102c1fae87..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestClient.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import java.io.File; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.List; -import org.apache.arrow.flight.AsyncPutListener; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ipc.JsonFileReader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Validator; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; - -/** A Flight client for integration testing. */ -class IntegrationTestClient { - private static final org.slf4j.Logger LOGGER = - org.slf4j.LoggerFactory.getLogger(IntegrationTestClient.class); - private final Options options; - - private IntegrationTestClient() { - options = new Options(); - options.addOption("j", "json", true, "json file"); - options.addOption("scenario", true, "The integration test scenario."); - options.addOption("host", true, "The host to connect to."); - options.addOption("port", true, "The port to connect to."); - } - - public static void main(String[] args) { - try { - new IntegrationTestClient().run(args); - } catch (ParseException e) { - fatalError("Invalid parameters", e); - } catch (IOException e) { - fatalError("Error accessing files", e); - } catch (Exception e) { - fatalError("Unknown error", e); - } - } - - private static void fatalError(String message, Throwable e) { - System.err.println(message); - System.err.println(e.getMessage()); - LOGGER.error(message, e); - System.exit(1); - } - - private void run(String[] args) throws Exception { - final CommandLineParser parser = new DefaultParser(); - final CommandLine cmd = parser.parse(options, args, false); - - final String host = cmd.getOptionValue("host", "localhost"); - final int port = Integer.parseInt(cmd.getOptionValue("port", "31337")); - - final Location defaultLocation = Location.forGrpcInsecure(host, port); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final FlightClient client = FlightClient.builder(allocator, defaultLocation).build()) { - - if (cmd.hasOption("scenario")) { - Scenarios.getScenario(cmd.getOptionValue("scenario")) - .client(allocator, defaultLocation, client); - } else { - final String inputPath = cmd.getOptionValue("j"); - testStream(allocator, client, inputPath); - } - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - private static void testStream(BufferAllocator allocator, FlightClient client, String inputPath) - throws IOException { - // 1. Read data from JSON and upload to server. - FlightDescriptor descriptor = FlightDescriptor.path(inputPath); - try (JsonFileReader reader = new JsonFileReader(new File(inputPath), allocator); - VectorSchemaRoot root = VectorSchemaRoot.create(reader.start(), allocator)) { - FlightClient.ClientStreamListener stream = - client.startPut( - descriptor, - root, - reader, - new AsyncPutListener() { - int counter = 0; - - @Override - public void onNext(PutResult val) { - final byte[] metadataRaw = - new byte[checkedCastToInt(val.getApplicationMetadata().readableBytes())]; - val.getApplicationMetadata().readBytes(metadataRaw); - final String metadata = new String(metadataRaw, StandardCharsets.UTF_8); - if (!Integer.toString(counter).equals(metadata)) { - throw new RuntimeException( - String.format( - "Invalid ACK from server. Expected '%d' but got '%s'.", - counter, metadata)); - } - counter++; - } - }); - int counter = 0; - while (reader.read(root)) { - final byte[] rawMetadata = Integer.toString(counter).getBytes(StandardCharsets.UTF_8); - final ArrowBuf metadata = allocator.buffer(rawMetadata.length); - metadata.writeBytes(rawMetadata); - // Transfers ownership of the buffer, so do not release it ourselves - stream.putNext(metadata); - root.clear(); - counter++; - } - stream.completed(); - // Need to call this, or exceptions from the server get swallowed - stream.getResult(); - } - - // 2. Get the ticket for the data. - FlightInfo info = client.getInfo(descriptor); - List endpoints = info.getEndpoints(); - if (endpoints.isEmpty()) { - throw new RuntimeException("No endpoints returned from Flight server."); - } - - for (FlightEndpoint endpoint : info.getEndpoints()) { - // 3. Download the data from the server. - List locations = endpoint.getLocations(); - if (locations.isEmpty()) { - // No locations provided, validate the server itself. - testTicket(allocator, client, endpoint.getTicket(), inputPath); - } else { - // All locations should be equivalent, validate each one. - for (Location location : locations) { - try (FlightClient readClient = FlightClient.builder(allocator, location).build()) { - testTicket(allocator, readClient, endpoint.getTicket(), inputPath); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - } - } - } - - private static void testTicket( - BufferAllocator allocator, FlightClient readClient, Ticket ticket, String inputPath) { - try (FlightStream stream = readClient.getStream(ticket); - VectorSchemaRoot root = stream.getRoot(); - VectorSchemaRoot downloadedRoot = VectorSchemaRoot.create(root.getSchema(), allocator); - JsonFileReader reader = new JsonFileReader(new File(inputPath), allocator)) { - VectorLoader loader = new VectorLoader(downloadedRoot); - VectorUnloader unloader = new VectorUnloader(root); - - Schema jsonSchema = reader.start(); - Validator.compareSchemas(root.getSchema(), jsonSchema); - try (VectorSchemaRoot jsonRoot = VectorSchemaRoot.create(jsonSchema, allocator)) { - - while (stream.next()) { - try (final ArrowRecordBatch arb = unloader.getRecordBatch()) { - loader.load(arb); - if (reader.read(jsonRoot)) { - - // 4. Validate the data. - Validator.compareVectorSchemaRoot(jsonRoot, downloadedRoot); - jsonRoot.clear(); - } else { - throw new RuntimeException("Flight stream has more batches than JSON"); - } - } - } - - // Verify no more batches with data in JSON - // NOTE: Currently the C++ Flight server skips empty batches at end of the stream - if (reader.read(jsonRoot) && jsonRoot.getRowCount() > 0) { - throw new RuntimeException("JSON has more batches with than Flight stream"); - } - } - } catch (Exception e) { - throw new RuntimeException(e); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestServer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestServer.java deleted file mode 100644 index d6e53cd427c73..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestServer.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; - -/** Flight server for integration testing. */ -class IntegrationTestServer { - private static final org.slf4j.Logger LOGGER = - org.slf4j.LoggerFactory.getLogger(IntegrationTestServer.class); - private final Options options; - - private IntegrationTestServer() { - options = new Options(); - options.addOption("port", true, "The port to serve on."); - options.addOption("scenario", true, "The integration test scenario."); - } - - private void run(String[] args) throws Exception { - CommandLineParser parser = new DefaultParser(); - CommandLine cmd = parser.parse(options, args, false); - final int port = Integer.parseInt(cmd.getOptionValue("port", "31337")); - final Location location = Location.forGrpcInsecure("localhost", port); - - final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final FlightServer.Builder builder = - FlightServer.builder().allocator(allocator).location(location); - - final FlightServer server; - if (cmd.hasOption("scenario")) { - final Scenario scenario = Scenarios.getScenario(cmd.getOptionValue("scenario")); - scenario.buildServer(builder); - server = builder.producer(scenario.producer(allocator, location)).build(); - server.start(); - } else { - final IntegrationProducer producer = new IntegrationProducer(allocator, location); - server = FlightServer.builder(allocator, location, producer).build().start(); - producer.setLocation(Location.forGrpcInsecure("localhost", server.getPort())); - } - // Print out message for integration test script - System.out.println("Server listening on localhost:" + server.getPort()); - - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - try { - System.out.println("\nExiting..."); - AutoCloseables.close(server, allocator); - } catch (Exception e) { - e.printStackTrace(); - } - })); - - server.awaitTermination(); - } - - public static void main(String[] args) { - try { - new IntegrationTestServer().run(args); - } catch (ParseException e) { - fatalError("Error parsing arguments", e); - } catch (Exception e) { - fatalError("Runtime error", e); - } - } - - private static void fatalError(String message, Throwable e) { - System.err.println(message); - System.err.println(e.getMessage()); - LOGGER.error(message, e); - System.exit(1); - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/LocationReuseConnectionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/LocationReuseConnectionScenario.java deleted file mode 100644 index 0f6154ccd7071..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/LocationReuseConnectionScenario.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Test the 'arrow-flight-reuse-connection' scheme. */ -public class LocationReuseConnectionScenario implements Scenario { - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new ReuseConnectionProducer(); - } - - @Override - public void buildServer(FlightServer.Builder builder) throws Exception {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - final FlightInfo info = - client.getInfo(FlightDescriptor.command("reuse".getBytes(StandardCharsets.UTF_8))); - IntegrationAssertions.assertEquals(1, info.getEndpoints().size()); - IntegrationAssertions.assertEquals(1, info.getEndpoints().get(0).getLocations().size()); - Location actual = info.getEndpoints().get(0).getLocations().get(0); - IntegrationAssertions.assertEquals(Location.reuseConnection().getUri(), actual.getUri()); - } - - private static class ReuseConnectionProducer extends NoOpFlightProducer { - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - List endpoints = - Collections.singletonList( - new FlightEndpoint(new Ticket(new byte[0]), Location.reuseConnection())); - return new FlightInfo( - new Schema(Collections.emptyList()), descriptor, endpoints, /*bytes*/ -1, /*records*/ -1); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/MiddlewareScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/MiddlewareScenario.java deleted file mode 100644 index ee1ac8ada2701..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/MiddlewareScenario.java +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightClientMiddleware; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightServerMiddleware; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.RequestContext; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * Test an edge case in middleware: gRPC-Java consolidates headers and trailers if a call fails - * immediately. On the gRPC implementation side, we need to watch for this, or else we'll have a - * call with "no headers" if we only look for headers. - */ -final class MiddlewareScenario implements Scenario { - - private static final String HEADER = "x-middleware"; - private static final String EXPECTED_HEADER_VALUE = "expected value"; - private static final byte[] COMMAND_SUCCESS = "success".getBytes(StandardCharsets.UTF_8); - - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) { - return new NoOpFlightProducer() { - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - if (descriptor.isCommand()) { - if (Arrays.equals(COMMAND_SUCCESS, descriptor.getCommand())) { - return new FlightInfo( - new Schema(Collections.emptyList()), descriptor, Collections.emptyList(), -1, -1); - } - } - throw CallStatus.UNIMPLEMENTED.toRuntimeException(); - } - }; - } - - @Override - public void buildServer(FlightServer.Builder builder) { - builder.middleware( - FlightServerMiddleware.Key.of("test"), new InjectingServerMiddleware.Factory()); - } - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient ignored) - throws Exception { - final ExtractingClientMiddleware.Factory factory = new ExtractingClientMiddleware.Factory(); - try (final FlightClient client = - FlightClient.builder(allocator, location).intercept(factory).build()) { - // Should fail immediately - IntegrationAssertions.assertThrows( - FlightRuntimeException.class, - () -> client.getInfo(FlightDescriptor.command(new byte[0]))); - if (!EXPECTED_HEADER_VALUE.equals(factory.extractedHeader)) { - throw new AssertionError( - "Expected to extract the header value '" - + EXPECTED_HEADER_VALUE - + "', but found: " - + factory.extractedHeader); - } - - // Should not fail - factory.extractedHeader = ""; - client.getInfo(FlightDescriptor.command(COMMAND_SUCCESS)); - if (!EXPECTED_HEADER_VALUE.equals(factory.extractedHeader)) { - throw new AssertionError( - "Expected to extract the header value '" - + EXPECTED_HEADER_VALUE - + "', but found: " - + factory.extractedHeader); - } - } - } - - /** Middleware that inserts a constant value in outgoing requests. */ - static class InjectingServerMiddleware implements FlightServerMiddleware { - - private final String headerValue; - - InjectingServerMiddleware(String incoming) { - this.headerValue = incoming; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - outgoingHeaders.insert("x-middleware", headerValue); - } - - @Override - public void onCallCompleted(CallStatus status) {} - - @Override - public void onCallErrored(Throwable err) {} - - /** The factory for the server middleware. */ - static class Factory implements FlightServerMiddleware.Factory { - - @Override - public InjectingServerMiddleware onCallStarted( - CallInfo info, CallHeaders incomingHeaders, RequestContext context) { - String incoming = incomingHeaders.get(HEADER); - return new InjectingServerMiddleware(incoming == null ? "" : incoming); - } - } - } - - /** Middleware that pulls a value out of incoming responses. */ - static class ExtractingClientMiddleware implements FlightClientMiddleware { - - private final ExtractingClientMiddleware.Factory factory; - - public ExtractingClientMiddleware(ExtractingClientMiddleware.Factory factory) { - this.factory = factory; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) { - outgoingHeaders.insert(HEADER, EXPECTED_HEADER_VALUE); - } - - @Override - public void onHeadersReceived(CallHeaders incomingHeaders) { - this.factory.extractedHeader = incomingHeaders.get(HEADER); - } - - @Override - public void onCallCompleted(CallStatus status) {} - - /** The factory for the client middleware. */ - static class Factory implements FlightClientMiddleware.Factory { - - String extractedHeader = null; - - @Override - public FlightClientMiddleware onCallStarted(CallInfo info) { - return new ExtractingClientMiddleware(this); - } - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java deleted file mode 100644 index 0e08a8afa3e09..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Objects; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Test the 'ordered' flag in FlightInfo. */ -public class OrderedScenario implements Scenario { - private static final Schema SCHEMA = - new Schema( - Collections.singletonList(Field.notNullable("number", Types.MinorType.INT.getType()))); - private static final byte[] ORDERED_COMMAND = "ordered".getBytes(StandardCharsets.UTF_8); - - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new OrderedProducer(allocator); - } - - @Override - public void buildServer(FlightServer.Builder builder) throws Exception {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - final FlightInfo info = client.getInfo(FlightDescriptor.command(ORDERED_COMMAND)); - IntegrationAssertions.assertTrue("ordered must be true", info.getOrdered()); - IntegrationAssertions.assertEquals(3, info.getEndpoints().size()); - - int offset = 0; - for (int multiplier : Arrays.asList(1, 10, 100)) { - FlightEndpoint endpoint = info.getEndpoints().get(offset); - - IntegrationAssertions.assertTrue( - "locations must be empty", endpoint.getLocations().isEmpty()); - - try (final FlightStream stream = client.getStream(endpoint.getTicket())) { - IntegrationAssertions.assertEquals(SCHEMA, stream.getSchema()); - IntegrationAssertions.assertTrue("stream must have a batch", stream.next()); - - IntVector number = (IntVector) stream.getRoot().getVector(0); - IntegrationAssertions.assertEquals(3, stream.getRoot().getRowCount()); - - IntegrationAssertions.assertFalse("value must be non-null", number.isNull(0)); - IntegrationAssertions.assertFalse("value must be non-null", number.isNull(1)); - IntegrationAssertions.assertFalse("value must be non-null", number.isNull(2)); - IntegrationAssertions.assertEquals(multiplier, number.get(0)); - IntegrationAssertions.assertEquals(2 * multiplier, number.get(1)); - IntegrationAssertions.assertEquals(3 * multiplier, number.get(2)); - - IntegrationAssertions.assertFalse("stream must have one batch", stream.next()); - } - - offset++; - } - } - - private static class OrderedProducer extends NoOpFlightProducer { - private static final byte[] TICKET_1 = "1".getBytes(StandardCharsets.UTF_8); - private static final byte[] TICKET_2 = "2".getBytes(StandardCharsets.UTF_8); - private static final byte[] TICKET_3 = "3".getBytes(StandardCharsets.UTF_8); - - private final BufferAllocator allocator; - - OrderedProducer(BufferAllocator allocator) { - this.allocator = Objects.requireNonNull(allocator); - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - try (final VectorSchemaRoot root = VectorSchemaRoot.create(SCHEMA, allocator)) { - IntVector number = (IntVector) root.getVector(0); - - if (Arrays.equals(ticket.getBytes(), TICKET_1)) { - number.setSafe(0, 1); - number.setSafe(1, 2); - number.setSafe(2, 3); - } else if (Arrays.equals(ticket.getBytes(), TICKET_2)) { - number.setSafe(0, 10); - number.setSafe(1, 20); - number.setSafe(2, 30); - } else if (Arrays.equals(ticket.getBytes(), TICKET_3)) { - number.setSafe(0, 100); - number.setSafe(1, 200); - number.setSafe(2, 300); - } else { - listener.error( - CallStatus.INVALID_ARGUMENT - .withDescription( - "Could not find flight: " - + new String(ticket.getBytes(), StandardCharsets.UTF_8)) - .toRuntimeException()); - return; - } - - root.setRowCount(3); - - listener.start(root); - listener.putNext(); - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - final boolean ordered = - descriptor.isCommand() && Arrays.equals(descriptor.getCommand(), ORDERED_COMMAND); - List endpoints; - if (ordered) { - endpoints = - Arrays.asList( - new FlightEndpoint(new Ticket(TICKET_1)), - new FlightEndpoint(new Ticket(TICKET_2)), - new FlightEndpoint(new Ticket(TICKET_3))); - } else { - endpoints = - Arrays.asList( - new FlightEndpoint(new Ticket(TICKET_1)), - new FlightEndpoint(new Ticket(TICKET_3)), - new FlightEndpoint(new Ticket(TICKET_2))); - } - return new FlightInfo( - SCHEMA, descriptor, endpoints, /*bytes*/ -1, /*records*/ -1, ordered, IpcOption.DEFAULT); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/PollFlightInfoProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/PollFlightInfoProducer.java deleted file mode 100644 index c01f46cde06fc..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/PollFlightInfoProducer.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.time.Instant; -import java.time.temporal.ChronoUnit; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.PollInfo; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Test PollFlightInfo. */ -class PollFlightInfoProducer extends NoOpFlightProducer { - static final byte[] POLL_DESCRIPTOR = "poll".getBytes(StandardCharsets.UTF_8); - - @Override - public PollInfo pollFlightInfo(CallContext context, FlightDescriptor descriptor) { - Schema schema = - new Schema( - Collections.singletonList( - Field.notNullable("number", Types.MinorType.UINT4.getType()))); - List endpoints = - Collections.singletonList( - new FlightEndpoint(new Ticket("long-running query".getBytes(StandardCharsets.UTF_8)))); - FlightInfo info = new FlightInfo(schema, descriptor, endpoints, -1, -1); - if (descriptor.isCommand() && Arrays.equals(descriptor.getCommand(), POLL_DESCRIPTOR)) { - return new PollInfo(info, null, 1.0, null); - } else { - return new PollInfo( - info, - FlightDescriptor.command(POLL_DESCRIPTOR), - 0.1, - Instant.now().plus(10, ChronoUnit.SECONDS)); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/PollFlightInfoScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/PollFlightInfoScenario.java deleted file mode 100644 index 0282ba76ccd69..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/PollFlightInfoScenario.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.nio.charset.StandardCharsets; -import java.util.Optional; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.PollInfo; -import org.apache.arrow.memory.BufferAllocator; - -/** Test PollFlightInfo. */ -final class PollFlightInfoScenario implements Scenario { - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new PollFlightInfoProducer(); - } - - @Override - public void buildServer(FlightServer.Builder builder) throws Exception {} - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient client) - throws Exception { - PollInfo info = - client.pollInfo(FlightDescriptor.command("heavy query".getBytes(StandardCharsets.UTF_8))); - IntegrationAssertions.assertNotNull(info.getFlightInfo()); - Optional progress = info.getProgress(); - IntegrationAssertions.assertTrue("progress is missing", progress.isPresent()); - IntegrationAssertions.assertTrue( - "progress is invalid", progress.get() >= 0.0 && progress.get() <= 1.0); - IntegrationAssertions.assertTrue("expiration is missing", info.getExpirationTime().isPresent()); - IntegrationAssertions.assertTrue( - "descriptor is missing", info.getFlightDescriptor().isPresent()); - - info = client.pollInfo(info.getFlightDescriptor().get()); - IntegrationAssertions.assertNotNull(info.getFlightInfo()); - progress = info.getProgress(); - IntegrationAssertions.assertTrue("progress is missing in finished query", progress.isPresent()); - IntegrationAssertions.assertTrue( - "progress isn't 1.0 in finished query", Math.abs(progress.get() - 1.0) < Math.ulp(1.0)); - IntegrationAssertions.assertFalse( - "expiration is set in finished query", info.getExpirationTime().isPresent()); - IntegrationAssertions.assertFalse( - "descriptor is set in finished query", info.getFlightDescriptor().isPresent()); - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenario.java deleted file mode 100644 index 23f3375c2077c..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenario.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; - -/** A particular scenario in integration testing. */ -interface Scenario { - - /** Construct the FlightProducer for a server in this scenario. */ - FlightProducer producer(BufferAllocator allocator, Location location) throws Exception; - - /** Set any other server options. */ - void buildServer(FlightServer.Builder builder) throws Exception; - - /** Run as the client in the scenario. */ - void client(BufferAllocator allocator, Location location, FlightClient client) throws Exception; -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java deleted file mode 100644 index 7903ae994c7d1..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.util.Map; -import java.util.TreeMap; -import java.util.concurrent.TimeUnit; -import java.util.function.Supplier; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; - -/** Scenarios for integration testing. */ -final class Scenarios { - - private static Scenarios INSTANCE; - - private final Map> scenarios; - - private Scenarios() { - scenarios = new TreeMap<>(); - scenarios.put("auth:basic_proto", AuthBasicProtoScenario::new); - scenarios.put( - "expiration_time:cancel_flight_info", ExpirationTimeCancelFlightInfoScenario::new); - scenarios.put( - "expiration_time:renew_flight_endpoint", ExpirationTimeRenewFlightEndpointScenario::new); - scenarios.put("expiration_time:do_get", ExpirationTimeDoGetScenario::new); - scenarios.put("expiration_time:list_actions", ExpirationTimeListActionsScenario::new); - scenarios.put("location:reuse_connection", LocationReuseConnectionScenario::new); - scenarios.put("middleware", MiddlewareScenario::new); - scenarios.put("ordered", OrderedScenario::new); - scenarios.put("poll_flight_info", PollFlightInfoScenario::new); - scenarios.put("flight_sql", FlightSqlScenario::new); - scenarios.put("flight_sql:extension", FlightSqlExtensionScenario::new); - scenarios.put("flight_sql:ingestion", FlightSqlIngestionScenario::new); - scenarios.put("app_metadata_flight_info_endpoint", AppMetadataFlightInfoEndpointScenario::new); - scenarios.put("session_options", SessionOptionsScenario::new); - scenarios.put("do_exchange:echo", DoExchangeEchoScenario::new); - } - - private static Scenarios getInstance() { - if (INSTANCE == null) { - INSTANCE = new Scenarios(); - } - return INSTANCE; - } - - static Scenario getScenario(String scenario) { - final Supplier ctor = getInstance().scenarios.get(scenario); - if (ctor == null) { - throw new IllegalArgumentException("Unknown integration test scenario: " + scenario); - } - return ctor.get(); - } - - // Utility methods for implementing tests. - - public static void main(String[] args) { - // Run scenarios one after the other - final Location location = Location.forGrpcInsecure("localhost", 31337); - for (final Map.Entry> entry : getInstance().scenarios.entrySet()) { - System.out.println("Running test scenario: " + entry.getKey()); - final Scenario scenario = entry.getValue().get(); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { - final FlightServer.Builder builder = - FlightServer.builder(allocator, location, scenario.producer(allocator, location)); - scenario.buildServer(builder); - try (final FlightServer server = builder.build()) { - server.start(); - - try (final FlightClient client = FlightClient.builder(allocator, location).build()) { - scenario.client(allocator, location, client); - } - - server.shutdown(); - server.awaitTermination(1, TimeUnit.SECONDS); - System.out.println("Ran scenario " + entry.getKey()); - } - } catch (Exception e) { - System.out.println("Exception while running scenario " + entry.getKey()); - e.printStackTrace(); - } - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java deleted file mode 100644 index 788419deb0ff2..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.flight.CloseSessionRequest; -import org.apache.arrow.flight.CloseSessionResult; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightServerMiddleware; -import org.apache.arrow.flight.GetSessionOptionsRequest; -import org.apache.arrow.flight.GetSessionOptionsResult; -import org.apache.arrow.flight.ServerSessionMiddleware; -import org.apache.arrow.flight.SessionOptionValue; -import org.apache.arrow.flight.SessionOptionValueFactory; -import org.apache.arrow.flight.SetSessionOptionsRequest; -import org.apache.arrow.flight.SetSessionOptionsResult; -import org.apache.arrow.flight.sql.NoOpFlightSqlProducer; - -/** - * The server used for testing Sessions. - * - *

    SetSessionOptions(), GetSessionOptions(), and CloseSession() operate on a simple - * SessionOptionValue store. - */ -final class SessionOptionsProducer extends NoOpFlightSqlProducer { - private static final SessionOptionValue invalidOptionValue = - SessionOptionValueFactory.makeSessionOptionValue("lol_invalid"); - private final FlightServerMiddleware.Key sessionMiddlewareKey; - - SessionOptionsProducer(FlightServerMiddleware.Key sessionMiddlewareKey) { - this.sessionMiddlewareKey = sessionMiddlewareKey; - } - - @Override - public void setSessionOptions( - SetSessionOptionsRequest request, - CallContext context, - StreamListener listener) { - Map errors = new HashMap(); - - ServerSessionMiddleware middleware = context.getMiddleware(sessionMiddlewareKey); - ServerSessionMiddleware.Session session = middleware.getSession(); - for (Map.Entry entry : request.getSessionOptions().entrySet()) { - // Blacklisted option name - if (entry.getKey().equals("lol_invalid")) { - errors.put( - entry.getKey(), - new SetSessionOptionsResult.Error(SetSessionOptionsResult.ErrorValue.INVALID_NAME)); - continue; - } - // Blacklisted option value - // Recommend using a visitor to check polymorphic equality, but this check is easy - if (entry.getValue().equals(invalidOptionValue)) { - errors.put( - entry.getKey(), - new SetSessionOptionsResult.Error(SetSessionOptionsResult.ErrorValue.INVALID_VALUE)); - continue; - } - // Business as usual: - if (entry.getValue().isEmpty()) { - session.eraseSessionOption(entry.getKey()); - continue; - } - session.setSessionOption(entry.getKey(), entry.getValue()); - } - listener.onNext(new SetSessionOptionsResult(errors)); - listener.onCompleted(); - } - - @Override - public void getSessionOptions( - GetSessionOptionsRequest request, - CallContext context, - StreamListener listener) { - ServerSessionMiddleware middleware = context.getMiddleware(sessionMiddlewareKey); - final Map sessionOptions = - middleware.getSession().getSessionOptions(); - listener.onNext(new GetSessionOptionsResult(sessionOptions)); - listener.onCompleted(); - } - - @Override - public void closeSession( - CloseSessionRequest request, - CallContext context, - StreamListener listener) { - ServerSessionMiddleware middleware = context.getMiddleware(sessionMiddlewareKey); - try { - middleware.closeSession(); - } catch (FlightRuntimeException fre) { - listener.onError(fre); - return; - } - listener.onNext(new CloseSessionResult(CloseSessionResult.Status.CLOSED)); - listener.onCompleted(); - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsScenario.java deleted file mode 100644 index 76d5ce1509b00..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsScenario.java +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import com.google.common.collect.ImmutableMap; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightServerMiddleware; -import org.apache.arrow.flight.GetSessionOptionsRequest; -import org.apache.arrow.flight.GetSessionOptionsResult; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.ServerSessionMiddleware; -import org.apache.arrow.flight.SessionOptionValue; -import org.apache.arrow.flight.SessionOptionValueFactory; -import org.apache.arrow.flight.SetSessionOptionsRequest; -import org.apache.arrow.flight.SetSessionOptionsResult; -import org.apache.arrow.flight.client.ClientCookieMiddleware; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.memory.BufferAllocator; - -/** Scenario to exercise Session Options functionality. */ -final class SessionOptionsScenario implements Scenario { - private final FlightServerMiddleware.Key key = - FlightServerMiddleware.Key.of("sessionmiddleware"); - - @Override - public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception { - return new SessionOptionsProducer(key); - } - - @Override - public void buildServer(FlightServer.Builder builder) { - AtomicInteger counter = new AtomicInteger(1000); - builder.middleware( - key, new ServerSessionMiddleware.Factory(() -> String.valueOf(counter.getAndIncrement()))); - } - - @Override - public void client(BufferAllocator allocator, Location location, FlightClient ignored) - throws Exception { - final ClientCookieMiddleware.Factory factory = new ClientCookieMiddleware.Factory(); - try (final FlightClient flightClient = - FlightClient.builder(allocator, location).intercept(factory).build()) { - final FlightSqlClient client = new FlightSqlClient(flightClient); - - // Set - SetSessionOptionsRequest req1 = - new SetSessionOptionsRequest( - ImmutableMap.builder() - .put("foolong", SessionOptionValueFactory.makeSessionOptionValue(123L)) - .put("bardouble", SessionOptionValueFactory.makeSessionOptionValue(456.0)) - .put( - "lol_invalid", - SessionOptionValueFactory.makeSessionOptionValue("this won't get set")) - .put( - "key_with_invalid_value", - SessionOptionValueFactory.makeSessionOptionValue("lol_invalid")) - .put( - "big_ol_string_list", - SessionOptionValueFactory.makeSessionOptionValue( - new String[] {"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"})) - .build()); - SetSessionOptionsResult res1 = client.setSessionOptions(req1); - // Some errors - IntegrationAssertions.assertEquals( - ImmutableMap.builder() - .put( - "lol_invalid", - new SetSessionOptionsResult.Error( - SetSessionOptionsResult.ErrorValue.INVALID_NAME)) - .put( - "key_with_invalid_value", - new SetSessionOptionsResult.Error( - SetSessionOptionsResult.ErrorValue.INVALID_VALUE)) - .build(), - res1.getErrors()); - // Some set, some omitted due to above errors - GetSessionOptionsResult res2 = client.getSessionOptions(new GetSessionOptionsRequest()); - IntegrationAssertions.assertEquals( - ImmutableMap.builder() - .put("foolong", SessionOptionValueFactory.makeSessionOptionValue(123L)) - .put("bardouble", SessionOptionValueFactory.makeSessionOptionValue(456.0)) - .put( - "big_ol_string_list", - SessionOptionValueFactory.makeSessionOptionValue( - new String[] {"a", "b", "sea", "dee", " ", " ", "geee", "(づ。◕‿‿◕。)づ"})) - .build(), - res2.getSessionOptions()); - // Update - client.setSessionOptions( - new SetSessionOptionsRequest( - ImmutableMap.builder() - // Delete - .put("foolong", SessionOptionValueFactory.makeEmptySessionOptionValue()) - // Update - .put( - "big_ol_string_list", - SessionOptionValueFactory.makeSessionOptionValue( - "a,b,sea,dee, , ,geee,(づ。◕‿‿◕。)づ")) - .build())); - GetSessionOptionsResult res4 = client.getSessionOptions(new GetSessionOptionsRequest()); - IntegrationAssertions.assertEquals( - ImmutableMap.builder() - .put("bardouble", SessionOptionValueFactory.makeSessionOptionValue(456.0)) - .put( - "big_ol_string_list", - SessionOptionValueFactory.makeSessionOptionValue( - "a,b,sea,dee, , ,geee,(づ。◕‿‿◕。)づ")) - .build(), - res4.getSessionOptions()); - } - } -} diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java deleted file mode 100644 index 10594d4cf0962..0000000000000 --- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.memory.AllocationListener; - -class TestBufferAllocationListener implements AllocationListener { - static class Entry { - StackTraceElement[] stackTrace; - long size; - boolean forAllocation; - - public Entry(StackTraceElement[] stackTrace, long size, boolean forAllocation) { - this.stackTrace = stackTrace; - this.size = size; - this.forAllocation = forAllocation; - } - } - - List trail = new ArrayList<>(); - - public void onAllocation(long size) { - trail.add(new Entry(Thread.currentThread().getStackTrace(), size, true)); - } - - public void onRelease(long size) { - trail.add(new Entry(Thread.currentThread().getStackTrace(), size, false)); - } - - public void reThrowWithAddedAllocatorInfo(Exception e) { - StringBuilder sb = new StringBuilder(); - sb.append(e.getMessage()); - sb.append("\n"); - sb.append("[[Buffer allocation and release trail during the test execution: \n"); - for (Entry trailEntry : trail) { - sb.append( - String.format( - "%s: %d: %n%s", - trailEntry.forAllocation ? "allocate" : "release", - trailEntry.size, - getStackTraceAsString(trailEntry.stackTrace))); - } - sb.append("]]"); - throw new IllegalStateException(sb.toString(), e); - } - - private String getStackTraceAsString(StackTraceElement[] elements) { - StringBuilder sb = new StringBuilder(); - for (int i = 1; i < elements.length; i++) { - StackTraceElement s = elements[i]; - sb.append("\t"); - sb.append(s); - sb.append("\n"); - } - return sb.toString(); - } -} diff --git a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java deleted file mode 100644 index 16265b8b37014..0000000000000 --- a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.integration.tests; - -import com.google.common.util.concurrent.ThreadFactoryBuilder; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.Location; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -/** Run the integration test scenarios in-process. */ -class IntegrationTest { - @Test - void authBasicProto() throws Exception { - testScenario("auth:basic_proto"); - } - - @Test - void expirationTimeCancelFlightInfo() throws Exception { - testScenario("expiration_time:cancel_flight_info"); - } - - @Test - void expirationTimeDoGet() throws Exception { - testScenario("expiration_time:do_get"); - } - - @Test - void expirationTimeListActions() throws Exception { - testScenario("expiration_time:list_actions"); - } - - @Test - void expirationTimeRenewFlightEndpoint() throws Exception { - testScenario("expiration_time:renew_flight_endpoint"); - } - - @Test - void locationReuseConnection() throws Exception { - testScenario("location:reuse_connection"); - } - - @Test - void middleware() throws Exception { - testScenario("middleware"); - } - - @Test - void ordered() throws Exception { - testScenario("ordered"); - } - - @Test - void pollFlightInfo() throws Exception { - testScenario("poll_flight_info"); - } - - @Test - void flightSql() throws Exception { - testScenario("flight_sql"); - } - - @Test - void flightSqlExtension() throws Exception { - testScenario("flight_sql:extension"); - } - - @Test - void flightSqlIngestion() throws Exception { - testScenario("flight_sql:ingestion"); - } - - @Test - void appMetadataFlightInfoEndpoint() throws Exception { - testScenario("app_metadata_flight_info_endpoint"); - } - - @Test - void sessionOptions() throws Exception { - testScenario("session_options"); - } - - @Test - void doExchangeEcho() throws Exception { - testScenario("do_exchange:echo"); - } - - void testScenario(String scenarioName) throws Exception { - TestBufferAllocationListener listener = new TestBufferAllocationListener(); - try (final BufferAllocator allocator = new RootAllocator(listener, Long.MAX_VALUE)) { - final ExecutorService exec = - Executors.newCachedThreadPool( - new ThreadFactoryBuilder() - .setNameFormat("integration-test-flight-server-executor-%d") - .build()); - final FlightServer.Builder builder = - FlightServer.builder() - .executor(exec) - .allocator(allocator) - .location(Location.forGrpcInsecure("0.0.0.0", 0)); - final Scenario scenario = Scenarios.getScenario(scenarioName); - scenario.buildServer(builder); - builder.producer(scenario.producer(allocator, Location.forGrpcInsecure("0.0.0.0", 0))); - - try (final FlightServer server = builder.build()) { - server.start(); - - final Location location = Location.forGrpcInsecure("localhost", server.getPort()); - try (final FlightClient client = FlightClient.builder(allocator, location).build()) { - scenario.client(allocator, location, client); - } - } - - // Shutdown the executor while allowing existing tasks to finish. - // Without this wait, allocator.close() may get invoked earlier than an executor thread may - // have finished freeing up resources - // In that case, allocator.close() can throw an IllegalStateException for memory leak, leading - // to flaky tests - exec.shutdown(); - final boolean unused = exec.awaitTermination(3, TimeUnit.SECONDS); - } catch (IllegalStateException e) { - // this could be due to Allocator detecting memory leak. Add allocation trail to help debug - listener.reThrowWithAddedAllocatorInfo(e); - } - } -} diff --git a/java/flight/flight-integration-tests/src/test/resources/logback.xml b/java/flight/flight-integration-tests/src/test/resources/logback.xml deleted file mode 100644 index 95fb0b37dc5d3..0000000000000 --- a/java/flight/flight-integration-tests/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/flight/flight-sql-jdbc-core/jdbc-spotbugs-exclude.xml b/java/flight/flight-sql-jdbc-core/jdbc-spotbugs-exclude.xml deleted file mode 100644 index af75d70425cb4..0000000000000 --- a/java/flight/flight-sql-jdbc-core/jdbc-spotbugs-exclude.xml +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml deleted file mode 100644 index fc033a5ea7ab1..0000000000000 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ /dev/null @@ -1,153 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-flight - 19.0.0-SNAPSHOT - - - flight-sql-jdbc-core - jar - Arrow Flight SQL JDBC Driver Core - Core implementation of JDBC driver based on Arrow Flight SQL. - https://arrow.apache.org - - - - org.apache.arrow - flight-core - - - io.netty - netty-transport-native-kqueue - - - io.netty - netty-transport-native-epoll - - - - - - org.apache.arrow - arrow-memory-core - - - - org.apache.arrow - arrow-memory-netty - runtime - - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - - com.google.guava - guava - - - - org.slf4j - slf4j-api - runtime - - - - com.google.protobuf - protobuf-java - - - - org.hamcrest - hamcrest - 3.0 - test - - - - commons-io - commons-io - 2.17.0 - test - - - - org.mockito - mockito-core - ${mockito.core.version} - test - - - - io.netty - netty-common - - - - org.apache.arrow - flight-sql - ${project.version} - - - - org.apache.calcite.avatica - avatica - 1.25.0 - - - - org.bouncycastle - bcpkix-jdk18on - 1.79 - - - - org.checkerframework - checker-qual - - - - - - - true - src/main/resources - - - - - - maven-surefire-plugin - - false - - ${project.basedir}/../../../testing/data - - - - - - diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java deleted file mode 100644 index 3f072d071b047..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java +++ /dev/null @@ -1,1279 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static java.sql.Types.BIGINT; -import static java.sql.Types.BINARY; -import static java.sql.Types.BIT; -import static java.sql.Types.CHAR; -import static java.sql.Types.DATE; -import static java.sql.Types.DECIMAL; -import static java.sql.Types.FLOAT; -import static java.sql.Types.INTEGER; -import static java.sql.Types.LONGNVARCHAR; -import static java.sql.Types.LONGVARBINARY; -import static java.sql.Types.NUMERIC; -import static java.sql.Types.REAL; -import static java.sql.Types.SMALLINT; -import static java.sql.Types.TIMESTAMP; -import static java.sql.Types.TINYINT; -import static java.sql.Types.VARCHAR; -import static org.apache.arrow.flight.sql.util.SqlInfoOptionsUtils.doesBitmaskTranslateToEnum; - -import com.google.protobuf.ProtocolMessageEnum; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.EnumMap; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import org.apache.arrow.driver.jdbc.utils.SqlTypes; -import org.apache.arrow.driver.jdbc.utils.VectorSchemaRootTransformer; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; -import org.apache.arrow.flight.sql.FlightSqlProducer.Schemas; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlInfo; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlOuterJoinsSupportLevel; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedElementActions; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedGroupBy; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedPositionedCommands; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedResultSetType; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedSubqueries; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedUnions; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportsConvert; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlTransactionIsolationLevel; -import org.apache.arrow.flight.sql.impl.FlightSql.SupportedAnsi92SqlGrammarLevel; -import org.apache.arrow.flight.sql.impl.FlightSql.SupportedSqlGrammar; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.apache.calcite.avatica.AvaticaConnection; -import org.apache.calcite.avatica.AvaticaDatabaseMetaData; - -/** Arrow Flight JDBC's implementation of {@link DatabaseMetaData}. */ -public class ArrowDatabaseMetadata extends AvaticaDatabaseMetaData { - private static final String JAVA_REGEX_SPECIALS = "[]()|^-+*?{}$\\."; - private static final Charset CHARSET = StandardCharsets.UTF_8; - private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; - static final int NO_DECIMAL_DIGITS = 0; - private static final int BASE10_RADIX = 10; - static final int COLUMN_SIZE_BYTE = (int) Math.ceil((Byte.SIZE - 1) * Math.log(2) / Math.log(10)); - static final int COLUMN_SIZE_SHORT = - (int) Math.ceil((Short.SIZE - 1) * Math.log(2) / Math.log(10)); - static final int COLUMN_SIZE_INT = - (int) Math.ceil((Integer.SIZE - 1) * Math.log(2) / Math.log(10)); - static final int COLUMN_SIZE_LONG = (int) Math.ceil((Long.SIZE - 1) * Math.log(2) / Math.log(10)); - static final int COLUMN_SIZE_VARCHAR_AND_BINARY = 65536; - static final int COLUMN_SIZE_DATE = "YYYY-MM-DD".length(); - static final int COLUMN_SIZE_TIME = "HH:MM:ss".length(); - static final int COLUMN_SIZE_TIME_MILLISECONDS = "HH:MM:ss.SSS".length(); - static final int COLUMN_SIZE_TIME_MICROSECONDS = "HH:MM:ss.SSSSSS".length(); - static final int COLUMN_SIZE_TIME_NANOSECONDS = "HH:MM:ss.SSSSSSSSS".length(); - static final int COLUMN_SIZE_TIMESTAMP_SECONDS = COLUMN_SIZE_DATE + 1 + COLUMN_SIZE_TIME; - static final int COLUMN_SIZE_TIMESTAMP_MILLISECONDS = - COLUMN_SIZE_DATE + 1 + COLUMN_SIZE_TIME_MILLISECONDS; - static final int COLUMN_SIZE_TIMESTAMP_MICROSECONDS = - COLUMN_SIZE_DATE + 1 + COLUMN_SIZE_TIME_MICROSECONDS; - static final int COLUMN_SIZE_TIMESTAMP_NANOSECONDS = - COLUMN_SIZE_DATE + 1 + COLUMN_SIZE_TIME_NANOSECONDS; - static final int DECIMAL_DIGITS_TIME_MILLISECONDS = 3; - static final int DECIMAL_DIGITS_TIME_MICROSECONDS = 6; - static final int DECIMAL_DIGITS_TIME_NANOSECONDS = 9; - private static final Schema GET_COLUMNS_SCHEMA = - new Schema( - Arrays.asList( - Field.nullable("TABLE_CAT", Types.MinorType.VARCHAR.getType()), - Field.nullable("TABLE_SCHEM", Types.MinorType.VARCHAR.getType()), - Field.notNullable("TABLE_NAME", Types.MinorType.VARCHAR.getType()), - Field.notNullable("COLUMN_NAME", Types.MinorType.VARCHAR.getType()), - Field.nullable("DATA_TYPE", Types.MinorType.INT.getType()), - Field.nullable("TYPE_NAME", Types.MinorType.VARCHAR.getType()), - Field.nullable("COLUMN_SIZE", Types.MinorType.INT.getType()), - Field.nullable("BUFFER_LENGTH", Types.MinorType.INT.getType()), - Field.nullable("DECIMAL_DIGITS", Types.MinorType.INT.getType()), - Field.nullable("NUM_PREC_RADIX", Types.MinorType.INT.getType()), - Field.notNullable("NULLABLE", Types.MinorType.INT.getType()), - Field.nullable("REMARKS", Types.MinorType.VARCHAR.getType()), - Field.nullable("COLUMN_DEF", Types.MinorType.VARCHAR.getType()), - Field.nullable("SQL_DATA_TYPE", Types.MinorType.INT.getType()), - Field.nullable("SQL_DATETIME_SUB", Types.MinorType.INT.getType()), - Field.notNullable("CHAR_OCTET_LENGTH", Types.MinorType.INT.getType()), - Field.notNullable("ORDINAL_POSITION", Types.MinorType.INT.getType()), - Field.notNullable("IS_NULLABLE", Types.MinorType.VARCHAR.getType()), - Field.nullable("SCOPE_CATALOG", Types.MinorType.VARCHAR.getType()), - Field.nullable("SCOPE_SCHEMA", Types.MinorType.VARCHAR.getType()), - Field.nullable("SCOPE_TABLE", Types.MinorType.VARCHAR.getType()), - Field.nullable("SOURCE_DATA_TYPE", Types.MinorType.SMALLINT.getType()), - Field.notNullable("IS_AUTOINCREMENT", Types.MinorType.VARCHAR.getType()), - Field.notNullable("IS_GENERATEDCOLUMN", Types.MinorType.VARCHAR.getType()))); - private final AtomicBoolean isCachePopulated = new AtomicBoolean(false); - private final Map cachedSqlInfo = new EnumMap<>(SqlInfo.class); - private static final Map sqlTypesToFlightEnumConvertTypes = new HashMap<>(); - - static { - sqlTypesToFlightEnumConvertTypes.put(BIT, SqlSupportsConvert.SQL_CONVERT_BIT_VALUE); - sqlTypesToFlightEnumConvertTypes.put(INTEGER, SqlSupportsConvert.SQL_CONVERT_INTEGER_VALUE); - sqlTypesToFlightEnumConvertTypes.put(NUMERIC, SqlSupportsConvert.SQL_CONVERT_NUMERIC_VALUE); - sqlTypesToFlightEnumConvertTypes.put(SMALLINT, SqlSupportsConvert.SQL_CONVERT_SMALLINT_VALUE); - sqlTypesToFlightEnumConvertTypes.put(TINYINT, SqlSupportsConvert.SQL_CONVERT_TINYINT_VALUE); - sqlTypesToFlightEnumConvertTypes.put(FLOAT, SqlSupportsConvert.SQL_CONVERT_FLOAT_VALUE); - sqlTypesToFlightEnumConvertTypes.put(BIGINT, SqlSupportsConvert.SQL_CONVERT_BIGINT_VALUE); - sqlTypesToFlightEnumConvertTypes.put(REAL, SqlSupportsConvert.SQL_CONVERT_REAL_VALUE); - sqlTypesToFlightEnumConvertTypes.put(DECIMAL, SqlSupportsConvert.SQL_CONVERT_DECIMAL_VALUE); - sqlTypesToFlightEnumConvertTypes.put(BINARY, SqlSupportsConvert.SQL_CONVERT_BINARY_VALUE); - sqlTypesToFlightEnumConvertTypes.put( - LONGVARBINARY, SqlSupportsConvert.SQL_CONVERT_LONGVARBINARY_VALUE); - sqlTypesToFlightEnumConvertTypes.put(CHAR, SqlSupportsConvert.SQL_CONVERT_CHAR_VALUE); - sqlTypesToFlightEnumConvertTypes.put(VARCHAR, SqlSupportsConvert.SQL_CONVERT_VARCHAR_VALUE); - sqlTypesToFlightEnumConvertTypes.put( - LONGNVARCHAR, SqlSupportsConvert.SQL_CONVERT_LONGVARCHAR_VALUE); - sqlTypesToFlightEnumConvertTypes.put(DATE, SqlSupportsConvert.SQL_CONVERT_DATE_VALUE); - sqlTypesToFlightEnumConvertTypes.put(TIMESTAMP, SqlSupportsConvert.SQL_CONVERT_TIMESTAMP_VALUE); - } - - ArrowDatabaseMetadata(final AvaticaConnection connection) { - super(connection); - } - - @Override - public String getDatabaseProductName() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.FLIGHT_SQL_SERVER_NAME, String.class); - } - - @Override - public String getDatabaseProductVersion() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.FLIGHT_SQL_SERVER_VERSION, String.class); - } - - @Override - public String getIdentifierQuoteString() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR, String.class); - } - - @Override - public boolean isReadOnly() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY, Boolean.class); - } - - @Override - public String getSQLKeywords() throws SQLException { - return convertListSqlInfoToString( - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_KEYWORDS, List.class)) - .orElse(""); - } - - @Override - public String getNumericFunctions() throws SQLException { - return convertListSqlInfoToString( - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_NUMERIC_FUNCTIONS, List.class)) - .orElse(""); - } - - @Override - public String getStringFunctions() throws SQLException { - return convertListSqlInfoToString( - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_STRING_FUNCTIONS, List.class)) - .orElse(""); - } - - @Override - public String getSystemFunctions() throws SQLException { - return convertListSqlInfoToString( - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SYSTEM_FUNCTIONS, List.class)) - .orElse(""); - } - - @Override - public String getTimeDateFunctions() throws SQLException { - return convertListSqlInfoToString( - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_DATETIME_FUNCTIONS, List.class)) - .orElse(""); - } - - @Override - public String getSearchStringEscape() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SEARCH_STRING_ESCAPE, String.class); - } - - @Override - public String getExtraNameCharacters() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_EXTRA_NAME_CHARACTERS, String.class); - } - - @Override - public boolean supportsColumnAliasing() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SUPPORTS_COLUMN_ALIASING, Boolean.class); - } - - @Override - public boolean nullPlusNonNullIsNull() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_NULL_PLUS_NULL_IS_NULL, Boolean.class); - } - - @Override - public boolean supportsConvert() throws SQLException { - return !getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SUPPORTS_CONVERT, Map.class).isEmpty(); - } - - @Override - public boolean supportsConvert(final int fromType, final int toType) throws SQLException { - final Map> sqlSupportsConvert = - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SUPPORTS_CONVERT, Map.class); - - if (!sqlTypesToFlightEnumConvertTypes.containsKey(fromType)) { - return false; - } - - final List list = - sqlSupportsConvert.get(sqlTypesToFlightEnumConvertTypes.get(fromType)); - - return list != null && list.contains(sqlTypesToFlightEnumConvertTypes.get(toType)); - } - - @Override - public boolean supportsTableCorrelationNames() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTS_TABLE_CORRELATION_NAMES, Boolean.class); - } - - @Override - public boolean supportsDifferentTableCorrelationNames() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES, Boolean.class); - } - - @Override - public boolean supportsExpressionsInOrderBy() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY, Boolean.class); - } - - @Override - public boolean supportsOrderByUnrelated() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SUPPORTS_ORDER_BY_UNRELATED, Boolean.class); - } - - @Override - public boolean supportsGroupBy() throws SQLException { - final int bitmask = - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SUPPORTED_GROUP_BY, Integer.class); - return bitmask != 0; - } - - @Override - public boolean supportsGroupByUnrelated() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_GROUP_BY, SqlSupportedGroupBy.SQL_GROUP_BY_UNRELATED); - } - - @Override - public boolean supportsLikeEscapeClause() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE, Boolean.class); - } - - @Override - public boolean supportsNonNullableColumns() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTS_NON_NULLABLE_COLUMNS, Boolean.class); - } - - @Override - public boolean supportsMinimumSQLGrammar() throws SQLException { - return checkEnumLevel( - Arrays.asList( - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_GRAMMAR, SupportedSqlGrammar.SQL_EXTENDED_GRAMMAR), - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_GRAMMAR, SupportedSqlGrammar.SQL_CORE_GRAMMAR), - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_GRAMMAR, SupportedSqlGrammar.SQL_MINIMUM_GRAMMAR))); - } - - @Override - public boolean supportsCoreSQLGrammar() throws SQLException { - return checkEnumLevel( - Arrays.asList( - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_GRAMMAR, SupportedSqlGrammar.SQL_EXTENDED_GRAMMAR), - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_GRAMMAR, SupportedSqlGrammar.SQL_CORE_GRAMMAR))); - } - - @Override - public boolean supportsExtendedSQLGrammar() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_GRAMMAR, SupportedSqlGrammar.SQL_EXTENDED_GRAMMAR); - } - - @Override - public boolean supportsANSI92EntryLevelSQL() throws SQLException { - return checkEnumLevel( - Arrays.asList( - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_ANSI92_SUPPORTED_LEVEL, - SupportedAnsi92SqlGrammarLevel.ANSI92_ENTRY_SQL), - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_ANSI92_SUPPORTED_LEVEL, - SupportedAnsi92SqlGrammarLevel.ANSI92_INTERMEDIATE_SQL), - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_ANSI92_SUPPORTED_LEVEL, - SupportedAnsi92SqlGrammarLevel.ANSI92_FULL_SQL))); - } - - @Override - public boolean supportsANSI92IntermediateSQL() throws SQLException { - return checkEnumLevel( - Arrays.asList( - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_ANSI92_SUPPORTED_LEVEL, - SupportedAnsi92SqlGrammarLevel.ANSI92_ENTRY_SQL), - getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_ANSI92_SUPPORTED_LEVEL, - SupportedAnsi92SqlGrammarLevel.ANSI92_INTERMEDIATE_SQL))); - } - - @Override - public boolean supportsANSI92FullSQL() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_ANSI92_SUPPORTED_LEVEL, SupportedAnsi92SqlGrammarLevel.ANSI92_FULL_SQL); - } - - @Override - public boolean supportsIntegrityEnhancementFacility() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY, Boolean.class); - } - - @Override - public boolean supportsOuterJoins() throws SQLException { - final int bitmask = - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_OUTER_JOINS_SUPPORT_LEVEL, Integer.class); - return bitmask != 0; - } - - @Override - public boolean supportsFullOuterJoins() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_OUTER_JOINS_SUPPORT_LEVEL, SqlOuterJoinsSupportLevel.SQL_FULL_OUTER_JOINS); - } - - @Override - public boolean supportsLimitedOuterJoins() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_OUTER_JOINS_SUPPORT_LEVEL, SqlOuterJoinsSupportLevel.SQL_LIMITED_OUTER_JOINS); - } - - @Override - public String getSchemaTerm() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SCHEMA_TERM, String.class); - } - - @Override - public String getProcedureTerm() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_PROCEDURE_TERM, String.class); - } - - @Override - public String getCatalogTerm() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_CATALOG_TERM, String.class); - } - - @Override - public boolean isCatalogAtStart() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_CATALOG_AT_START, Boolean.class); - } - - @Override - public boolean supportsSchemasInProcedureCalls() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SCHEMAS_SUPPORTED_ACTIONS, - SqlSupportedElementActions.SQL_ELEMENT_IN_PROCEDURE_CALLS); - } - - @Override - public boolean supportsSchemasInIndexDefinitions() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SCHEMAS_SUPPORTED_ACTIONS, - SqlSupportedElementActions.SQL_ELEMENT_IN_INDEX_DEFINITIONS); - } - - @Override - public boolean supportsSchemasInPrivilegeDefinitions() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SCHEMAS_SUPPORTED_ACTIONS, - SqlSupportedElementActions.SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS); - } - - @Override - public boolean supportsCatalogsInIndexDefinitions() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_CATALOGS_SUPPORTED_ACTIONS, - SqlSupportedElementActions.SQL_ELEMENT_IN_INDEX_DEFINITIONS); - } - - @Override - public boolean supportsCatalogsInPrivilegeDefinitions() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_CATALOGS_SUPPORTED_ACTIONS, - SqlSupportedElementActions.SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS); - } - - @Override - public boolean supportsPositionedDelete() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_POSITIONED_COMMANDS, - SqlSupportedPositionedCommands.SQL_POSITIONED_DELETE); - } - - @Override - public boolean supportsPositionedUpdate() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_POSITIONED_COMMANDS, - SqlSupportedPositionedCommands.SQL_POSITIONED_UPDATE); - } - - @Override - public boolean supportsResultSetType(final int type) throws SQLException { - final int bitmask = - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SUPPORTED_RESULT_SET_TYPES, Integer.class); - - switch (type) { - case ResultSet.TYPE_FORWARD_ONLY: - return doesBitmaskTranslateToEnum( - SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_FORWARD_ONLY, bitmask); - case ResultSet.TYPE_SCROLL_INSENSITIVE: - return doesBitmaskTranslateToEnum( - SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE, bitmask); - case ResultSet.TYPE_SCROLL_SENSITIVE: - return doesBitmaskTranslateToEnum( - SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE, bitmask); - default: - throw new SQLException( - "Invalid result set type argument. The informed type is not defined in java.sql.ResultSet."); - } - } - - @Override - public boolean supportsSelectForUpdate() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SELECT_FOR_UPDATE_SUPPORTED, Boolean.class); - } - - @Override - public boolean supportsStoredProcedures() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_STORED_PROCEDURES_SUPPORTED, Boolean.class); - } - - @Override - public boolean supportsSubqueriesInComparisons() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_SUBQUERIES, SqlSupportedSubqueries.SQL_SUBQUERIES_IN_COMPARISONS); - } - - @Override - public boolean supportsSubqueriesInExists() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_SUBQUERIES, SqlSupportedSubqueries.SQL_SUBQUERIES_IN_EXISTS); - } - - @Override - public boolean supportsSubqueriesInIns() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_SUBQUERIES, SqlSupportedSubqueries.SQL_SUBQUERIES_IN_INS); - } - - @Override - public boolean supportsSubqueriesInQuantifieds() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_SUBQUERIES, SqlSupportedSubqueries.SQL_SUBQUERIES_IN_QUANTIFIEDS); - } - - @Override - public boolean supportsCorrelatedSubqueries() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_CORRELATED_SUBQUERIES_SUPPORTED, Boolean.class); - } - - @Override - public boolean supportsUnion() throws SQLException { - final int bitmask = - getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SUPPORTED_UNIONS, Integer.class); - return bitmask != 0; - } - - @Override - public boolean supportsUnionAll() throws SQLException { - return getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_UNIONS, SqlSupportedUnions.SQL_UNION_ALL); - } - - @Override - public int getMaxBinaryLiteralLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_BINARY_LITERAL_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxCharLiteralLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_CHAR_LITERAL_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxColumnNameLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_COLUMN_NAME_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxColumnsInGroupBy() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_COLUMNS_IN_GROUP_BY, Long.class) - .intValue(); - } - - @Override - public int getMaxColumnsInIndex() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_COLUMNS_IN_INDEX, Long.class) - .intValue(); - } - - @Override - public int getMaxColumnsInOrderBy() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_COLUMNS_IN_ORDER_BY, Long.class) - .intValue(); - } - - @Override - public int getMaxColumnsInSelect() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_COLUMNS_IN_SELECT, Long.class) - .intValue(); - } - - @Override - public int getMaxColumnsInTable() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_COLUMNS_IN_TABLE, Long.class) - .intValue(); - } - - @Override - public int getMaxConnections() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_CONNECTIONS, Long.class).intValue(); - } - - @Override - public int getMaxCursorNameLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_CURSOR_NAME_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxIndexLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_INDEX_LENGTH, Long.class).intValue(); - } - - @Override - public int getMaxSchemaNameLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_DB_SCHEMA_NAME_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxProcedureNameLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_PROCEDURE_NAME_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxCatalogNameLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_CATALOG_NAME_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxRowSize() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_ROW_SIZE, Long.class).intValue(); - } - - @Override - public boolean doesMaxRowSizeIncludeBlobs() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_ROW_SIZE_INCLUDES_BLOBS, Boolean.class); - } - - @Override - public int getMaxStatementLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_STATEMENT_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxStatements() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_STATEMENTS, Long.class).intValue(); - } - - @Override - public int getMaxTableNameLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_TABLE_NAME_LENGTH, Long.class) - .intValue(); - } - - @Override - public int getMaxTablesInSelect() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_TABLES_IN_SELECT, Long.class) - .intValue(); - } - - @Override - public int getMaxUserNameLength() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_MAX_USERNAME_LENGTH, Long.class).intValue(); - } - - @Override - public int getDefaultTransactionIsolation() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_DEFAULT_TRANSACTION_ISOLATION, Long.class) - .intValue(); - } - - @Override - public boolean supportsTransactions() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_TRANSACTIONS_SUPPORTED, Boolean.class); - } - - @Override - public boolean supportsTransactionIsolationLevel(final int level) throws SQLException { - final int bitmask = - getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS, Integer.class); - - switch (level) { - case Connection.TRANSACTION_NONE: - return doesBitmaskTranslateToEnum( - SqlTransactionIsolationLevel.SQL_TRANSACTION_NONE, bitmask); - case Connection.TRANSACTION_READ_COMMITTED: - return doesBitmaskTranslateToEnum( - SqlTransactionIsolationLevel.SQL_TRANSACTION_READ_COMMITTED, bitmask); - case Connection.TRANSACTION_READ_UNCOMMITTED: - return doesBitmaskTranslateToEnum( - SqlTransactionIsolationLevel.SQL_TRANSACTION_READ_UNCOMMITTED, bitmask); - case Connection.TRANSACTION_REPEATABLE_READ: - return doesBitmaskTranslateToEnum( - SqlTransactionIsolationLevel.SQL_TRANSACTION_REPEATABLE_READ, bitmask); - case Connection.TRANSACTION_SERIALIZABLE: - return doesBitmaskTranslateToEnum( - SqlTransactionIsolationLevel.SQL_TRANSACTION_SERIALIZABLE, bitmask); - default: - throw new SQLException( - "Invalid transaction isolation level argument. The informed level is not defined in java.sql.Connection."); - } - } - - @Override - public boolean dataDefinitionCausesTransactionCommit() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT, Boolean.class); - } - - @Override - public boolean dataDefinitionIgnoredInTransactions() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED, Boolean.class); - } - - @Override - public boolean supportsBatchUpdates() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_BATCH_UPDATES_SUPPORTED, Boolean.class); - } - - @Override - public boolean supportsSavepoints() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_SAVEPOINTS_SUPPORTED, Boolean.class); - } - - @Override - public boolean supportsNamedParameters() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_NAMED_PARAMETERS_SUPPORTED, Boolean.class); - } - - @Override - public boolean locatorsUpdateCopy() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty(SqlInfo.SQL_LOCATORS_UPDATE_COPY, Boolean.class); - } - - @Override - public boolean supportsStoredFunctionsUsingCallSyntax() throws SQLException { - return getSqlInfoAndCacheIfCacheIsEmpty( - SqlInfo.SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED, Boolean.class); - } - - @Override - public ArrowFlightConnection getConnection() throws SQLException { - return (ArrowFlightConnection) super.getConnection(); - } - - private T getSqlInfoAndCacheIfCacheIsEmpty( - final SqlInfo sqlInfoCommand, final Class desiredType) throws SQLException { - final ArrowFlightConnection connection = getConnection(); - if (!isCachePopulated.get()) { - // Lock-and-populate the cache. Only issue the call to getSqlInfo() once, - // populate the cache, then mark it as populated. - // Note that multiple callers from separate threads can see that the cache is not populated, - // but only - // one thread will try to populate the cache. Other threads will see the cache is non-empty - // when acquiring - // the lock on the cache and skip population. - synchronized (cachedSqlInfo) { - if (cachedSqlInfo.isEmpty()) { - final FlightInfo sqlInfo = connection.getClientHandler().getSqlInfo(); - try (final ResultSet resultSet = - ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo(connection, sqlInfo, null)) { - while (resultSet.next()) { - cachedSqlInfo.put( - SqlInfo.forNumber((Integer) resultSet.getObject("info_name")), - resultSet.getObject("value")); - } - } - isCachePopulated.set(true); - } - } - } - return desiredType.cast(cachedSqlInfo.get(sqlInfoCommand)); - } - - private Optional convertListSqlInfoToString(final List sqlInfoList) { - if (sqlInfoList == null) { - return Optional.empty(); - } else { - return Optional.of( - sqlInfoList.stream().map(Object::toString).collect(Collectors.joining(", "))); - } - } - - private boolean getSqlInfoEnumOptionAndCacheIfCacheIsEmpty( - final SqlInfo sqlInfoCommand, final ProtocolMessageEnum enumInstance) throws SQLException { - final int bitmask = getSqlInfoAndCacheIfCacheIsEmpty(sqlInfoCommand, Integer.class); - return doesBitmaskTranslateToEnum(enumInstance, bitmask); - } - - private boolean checkEnumLevel(final List toCheck) { - return toCheck.stream().anyMatch(e -> e); - } - - @Override - public ResultSet getCatalogs() throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final FlightInfo flightInfoCatalogs = connection.getClientHandler().getCatalogs(); - - final BufferAllocator allocator = connection.getBufferAllocator(); - final VectorSchemaRootTransformer transformer = - new VectorSchemaRootTransformer.Builder(Schemas.GET_CATALOGS_SCHEMA, allocator) - .renameFieldVector("catalog_name", "TABLE_CAT") - .build(); - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, flightInfoCatalogs, transformer); - } - - @Override - public ResultSet getImportedKeys(final String catalog, final String schema, final String table) - throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final FlightInfo flightInfoImportedKeys = - connection.getClientHandler().getImportedKeys(catalog, schema, table); - - final BufferAllocator allocator = connection.getBufferAllocator(); - final VectorSchemaRootTransformer transformer = getForeignKeysTransformer(allocator); - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, flightInfoImportedKeys, transformer); - } - - @Override - public ResultSet getExportedKeys(final String catalog, final String schema, final String table) - throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final FlightInfo flightInfoExportedKeys = - connection.getClientHandler().getExportedKeys(catalog, schema, table); - - final BufferAllocator allocator = connection.getBufferAllocator(); - final VectorSchemaRootTransformer transformer = getForeignKeysTransformer(allocator); - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, flightInfoExportedKeys, transformer); - } - - @Override - public ResultSet getCrossReference( - final String parentCatalog, - final String parentSchema, - final String parentTable, - final String foreignCatalog, - final String foreignSchema, - final String foreignTable) - throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final FlightInfo flightInfoCrossReference = - connection - .getClientHandler() - .getCrossReference( - parentCatalog, - parentSchema, - parentTable, - foreignCatalog, - foreignSchema, - foreignTable); - - final BufferAllocator allocator = connection.getBufferAllocator(); - final VectorSchemaRootTransformer transformer = getForeignKeysTransformer(allocator); - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, flightInfoCrossReference, transformer); - } - - /** - * Transformer used on getImportedKeys, getExportedKeys and getCrossReference methods, since all - * three share the same schema. - */ - private VectorSchemaRootTransformer getForeignKeysTransformer(final BufferAllocator allocator) { - return new VectorSchemaRootTransformer.Builder(Schemas.GET_IMPORTED_KEYS_SCHEMA, allocator) - .renameFieldVector("pk_catalog_name", "PKTABLE_CAT") - .renameFieldVector("pk_db_schema_name", "PKTABLE_SCHEM") - .renameFieldVector("pk_table_name", "PKTABLE_NAME") - .renameFieldVector("pk_column_name", "PKCOLUMN_NAME") - .renameFieldVector("fk_catalog_name", "FKTABLE_CAT") - .renameFieldVector("fk_db_schema_name", "FKTABLE_SCHEM") - .renameFieldVector("fk_table_name", "FKTABLE_NAME") - .renameFieldVector("fk_column_name", "FKCOLUMN_NAME") - .renameFieldVector("key_sequence", "KEY_SEQ") - .renameFieldVector("fk_key_name", "FK_NAME") - .renameFieldVector("pk_key_name", "PK_NAME") - .renameFieldVector("update_rule", "UPDATE_RULE") - .renameFieldVector("delete_rule", "DELETE_RULE") - .addEmptyField("DEFERRABILITY", new ArrowType.Int(Byte.SIZE, false)) - .build(); - } - - @Override - public ResultSet getSchemas(final String catalog, final String schemaPattern) - throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final FlightInfo flightInfoSchemas = - connection.getClientHandler().getSchemas(catalog, schemaPattern); - - final BufferAllocator allocator = connection.getBufferAllocator(); - final VectorSchemaRootTransformer transformer = - new VectorSchemaRootTransformer.Builder(Schemas.GET_SCHEMAS_SCHEMA, allocator) - .renameFieldVector("db_schema_name", "TABLE_SCHEM") - .renameFieldVector("catalog_name", "TABLE_CATALOG") - .build(); - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, flightInfoSchemas, transformer); - } - - @Override - public ResultSet getTableTypes() throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final FlightInfo flightInfoTableTypes = connection.getClientHandler().getTableTypes(); - - final BufferAllocator allocator = connection.getBufferAllocator(); - final VectorSchemaRootTransformer transformer = - new VectorSchemaRootTransformer.Builder(Schemas.GET_TABLE_TYPES_SCHEMA, allocator) - .renameFieldVector("table_type", "TABLE_TYPE") - .build(); - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, flightInfoTableTypes, transformer); - } - - @Override - public ResultSet getTables( - final String catalog, - final String schemaPattern, - final String tableNamePattern, - final String[] types) - throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final List typesList = types == null ? null : Arrays.asList(types); - final FlightInfo flightInfoTables = - connection - .getClientHandler() - .getTables(catalog, schemaPattern, tableNamePattern, typesList, false); - - final BufferAllocator allocator = connection.getBufferAllocator(); - final VectorSchemaRootTransformer transformer = - new VectorSchemaRootTransformer.Builder(Schemas.GET_TABLES_SCHEMA_NO_SCHEMA, allocator) - .renameFieldVector("catalog_name", "TABLE_CAT") - .renameFieldVector("db_schema_name", "TABLE_SCHEM") - .renameFieldVector("table_name", "TABLE_NAME") - .renameFieldVector("table_type", "TABLE_TYPE") - .addEmptyField("REMARKS", Types.MinorType.VARBINARY) - .addEmptyField("TYPE_CAT", Types.MinorType.VARBINARY) - .addEmptyField("TYPE_SCHEM", Types.MinorType.VARBINARY) - .addEmptyField("TYPE_NAME", Types.MinorType.VARBINARY) - .addEmptyField("SELF_REFERENCING_COL_NAME", Types.MinorType.VARBINARY) - .addEmptyField("REF_GENERATION", Types.MinorType.VARBINARY) - .build(); - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, flightInfoTables, transformer); - } - - @Override - public ResultSet getPrimaryKeys(final String catalog, final String schema, final String table) - throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final FlightInfo flightInfoPrimaryKeys = - connection.getClientHandler().getPrimaryKeys(catalog, schema, table); - - final BufferAllocator allocator = connection.getBufferAllocator(); - final VectorSchemaRootTransformer transformer = - new VectorSchemaRootTransformer.Builder(Schemas.GET_PRIMARY_KEYS_SCHEMA, allocator) - .renameFieldVector("catalog_name", "TABLE_CAT") - .renameFieldVector("db_schema_name", "TABLE_SCHEM") - .renameFieldVector("table_name", "TABLE_NAME") - .renameFieldVector("column_name", "COLUMN_NAME") - .renameFieldVector("key_sequence", "KEY_SEQ") - .renameFieldVector("key_name", "PK_NAME") - .build(); - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, flightInfoPrimaryKeys, transformer); - } - - @Override - public ResultSet getColumns( - final String catalog, - final String schemaPattern, - final String tableNamePattern, - final String columnNamePattern) - throws SQLException { - final ArrowFlightConnection connection = getConnection(); - final FlightInfo flightInfoTables = - connection - .getClientHandler() - .getTables(catalog, schemaPattern, tableNamePattern, null, true); - - final BufferAllocator allocator = connection.getBufferAllocator(); - - final Pattern columnNamePat = - columnNamePattern != null ? Pattern.compile(sqlToRegexLike(columnNamePattern)) : null; - - return ArrowFlightJdbcFlightStreamResultSet.fromFlightInfo( - connection, - flightInfoTables, - (originalRoot, transformedRoot) -> { - int columnCounter = 0; - if (transformedRoot == null) { - transformedRoot = VectorSchemaRoot.create(GET_COLUMNS_SCHEMA, allocator); - } - - final int originalRootRowCount = originalRoot.getRowCount(); - - final VarCharVector catalogNameVector = - (VarCharVector) originalRoot.getVector("catalog_name"); - final VarCharVector tableNameVector = - (VarCharVector) originalRoot.getVector("table_name"); - final VarCharVector schemaNameVector = - (VarCharVector) originalRoot.getVector("db_schema_name"); - - final VarBinaryVector schemaVector = - (VarBinaryVector) originalRoot.getVector("table_schema"); - - for (int i = 0; i < originalRootRowCount; i++) { - final Text catalogName = catalogNameVector.getObject(i); - final Text tableName = tableNameVector.getObject(i); - final Text schemaName = schemaNameVector.getObject(i); - - final Schema currentSchema; - try { - currentSchema = - MessageSerializer.deserializeSchema( - new ReadChannel( - Channels.newChannel(new ByteArrayInputStream(schemaVector.get(i))))); - } catch (final IOException e) { - throw new IOException( - String.format("Failed to deserialize schema for table %s", tableName), e); - } - final List tableColumns = currentSchema.getFields(); - - columnCounter = - setGetColumnsVectorSchemaRootFromFields( - transformedRoot, - columnCounter, - tableColumns, - catalogName, - tableName, - schemaName, - columnNamePat); - } - - transformedRoot.setRowCount(columnCounter); - - originalRoot.clear(); - return transformedRoot; - }); - } - - private int setGetColumnsVectorSchemaRootFromFields( - final VectorSchemaRoot currentRoot, - int insertIndex, - final List tableColumns, - final Text catalogName, - final Text tableName, - final Text schemaName, - final Pattern columnNamePattern) { - int ordinalIndex = 1; - final int tableColumnsSize = tableColumns.size(); - - final VarCharVector tableCatVector = (VarCharVector) currentRoot.getVector("TABLE_CAT"); - final VarCharVector tableSchemVector = (VarCharVector) currentRoot.getVector("TABLE_SCHEM"); - final VarCharVector tableNameVector = (VarCharVector) currentRoot.getVector("TABLE_NAME"); - final VarCharVector columnNameVector = (VarCharVector) currentRoot.getVector("COLUMN_NAME"); - final IntVector dataTypeVector = (IntVector) currentRoot.getVector("DATA_TYPE"); - final VarCharVector typeNameVector = (VarCharVector) currentRoot.getVector("TYPE_NAME"); - final IntVector columnSizeVector = (IntVector) currentRoot.getVector("COLUMN_SIZE"); - final IntVector decimalDigitsVector = (IntVector) currentRoot.getVector("DECIMAL_DIGITS"); - final IntVector numPrecRadixVector = (IntVector) currentRoot.getVector("NUM_PREC_RADIX"); - final IntVector nullableVector = (IntVector) currentRoot.getVector("NULLABLE"); - final IntVector ordinalPositionVector = (IntVector) currentRoot.getVector("ORDINAL_POSITION"); - final VarCharVector isNullableVector = (VarCharVector) currentRoot.getVector("IS_NULLABLE"); - final VarCharVector isAutoincrementVector = - (VarCharVector) currentRoot.getVector("IS_AUTOINCREMENT"); - final VarCharVector isGeneratedColumnVector = - (VarCharVector) currentRoot.getVector("IS_GENERATEDCOLUMN"); - - for (int i = 0; i < tableColumnsSize; i++, ordinalIndex++) { - final Field field = tableColumns.get(i); - final FlightSqlColumnMetadata columnMetadata = - new FlightSqlColumnMetadata(field.getMetadata()); - final String columnName = field.getName(); - - if (columnNamePattern != null && !columnNamePattern.matcher(columnName).matches()) { - continue; - } - final ArrowType fieldType = field.getType(); - - if (catalogName != null) { - tableCatVector.setSafe(insertIndex, catalogName); - } - - if (schemaName != null) { - tableSchemVector.setSafe(insertIndex, schemaName); - } - - if (tableName != null) { - tableNameVector.setSafe(insertIndex, tableName); - } - - if (columnName != null) { - columnNameVector.setSafe(insertIndex, columnName.getBytes(CHARSET)); - } - - dataTypeVector.setSafe(insertIndex, SqlTypes.getSqlTypeIdFromArrowType(fieldType)); - byte[] typeName = - columnMetadata.getTypeName() != null - ? columnMetadata.getTypeName().getBytes(CHARSET) - : SqlTypes.getSqlTypeNameFromArrowType(fieldType).getBytes(CHARSET); - typeNameVector.setSafe(insertIndex, typeName); - - // We aren't setting COLUMN_SIZE for ROWID SQL Types, as there's no such Arrow type. - // We aren't setting COLUMN_SIZE nor DECIMAL_DIGITS for Float/Double as their precision and - // scale are variable. - if (fieldType instanceof ArrowType.Decimal) { - numPrecRadixVector.setSafe(insertIndex, BASE10_RADIX); - } else if (fieldType instanceof ArrowType.Int) { - numPrecRadixVector.setSafe(insertIndex, BASE10_RADIX); - } else if (fieldType instanceof ArrowType.FloatingPoint) { - numPrecRadixVector.setSafe(insertIndex, BASE10_RADIX); - } - - Integer decimalDigits = columnMetadata.getScale(); - if (decimalDigits == null) { - decimalDigits = getDecimalDigits(fieldType); - } - if (decimalDigits != null) { - decimalDigitsVector.setSafe(insertIndex, decimalDigits); - } - - Integer columnSize = columnMetadata.getPrecision(); - if (columnSize == null) { - columnSize = getColumnSize(fieldType); - } - if (columnSize != null) { - columnSizeVector.setSafe(insertIndex, columnSize); - } - - nullableVector.setSafe(insertIndex, field.isNullable() ? 1 : 0); - - isNullableVector.setSafe(insertIndex, booleanToYesOrNo(field.isNullable())); - - Boolean autoIncrement = columnMetadata.isAutoIncrement(); - if (autoIncrement != null) { - isAutoincrementVector.setSafe(insertIndex, booleanToYesOrNo(autoIncrement)); - } else { - isAutoincrementVector.setSafe(insertIndex, EMPTY_BYTE_ARRAY); - } - - // Fields also don't hold information about IS_AUTOINCREMENT and IS_GENERATEDCOLUMN, - // so we're setting an empty string (as bytes), which means it couldn't be determined. - isGeneratedColumnVector.setSafe(insertIndex, EMPTY_BYTE_ARRAY); - - ordinalPositionVector.setSafe(insertIndex, ordinalIndex); - - insertIndex++; - } - return insertIndex; - } - - private static byte[] booleanToYesOrNo(boolean autoIncrement) { - return autoIncrement ? "YES".getBytes(CHARSET) : "NO".getBytes(CHARSET); - } - - static Integer getDecimalDigits(final ArrowType fieldType) { - // We aren't setting DECIMAL_DIGITS for Float/Double as their precision and scale are variable. - if (fieldType instanceof ArrowType.Decimal) { - final ArrowType.Decimal thisDecimal = (ArrowType.Decimal) fieldType; - return thisDecimal.getScale(); - } else if (fieldType instanceof ArrowType.Int) { - return NO_DECIMAL_DIGITS; - } else if (fieldType instanceof ArrowType.Timestamp) { - switch (((ArrowType.Timestamp) fieldType).getUnit()) { - case SECOND: - return NO_DECIMAL_DIGITS; - case MILLISECOND: - return DECIMAL_DIGITS_TIME_MILLISECONDS; - case MICROSECOND: - return DECIMAL_DIGITS_TIME_MICROSECONDS; - case NANOSECOND: - return DECIMAL_DIGITS_TIME_NANOSECONDS; - default: - break; - } - } else if (fieldType instanceof ArrowType.Time) { - switch (((ArrowType.Time) fieldType).getUnit()) { - case SECOND: - return NO_DECIMAL_DIGITS; - case MILLISECOND: - return DECIMAL_DIGITS_TIME_MILLISECONDS; - case MICROSECOND: - return DECIMAL_DIGITS_TIME_MICROSECONDS; - case NANOSECOND: - return DECIMAL_DIGITS_TIME_NANOSECONDS; - default: - break; - } - } else if (fieldType instanceof ArrowType.Date) { - return NO_DECIMAL_DIGITS; - } - - return null; - } - - static Integer getColumnSize(final ArrowType fieldType) { - // We aren't setting COLUMN_SIZE for ROWID SQL Types, as there's no such Arrow type. - // We aren't setting COLUMN_SIZE nor DECIMAL_DIGITS for Float/Double as their precision and - // scale are variable. - if (fieldType instanceof ArrowType.Decimal) { - final ArrowType.Decimal thisDecimal = (ArrowType.Decimal) fieldType; - return thisDecimal.getPrecision(); - } else if (fieldType instanceof ArrowType.Int) { - final ArrowType.Int thisInt = (ArrowType.Int) fieldType; - switch (thisInt.getBitWidth()) { - case Byte.SIZE: - return COLUMN_SIZE_BYTE; - case Short.SIZE: - return COLUMN_SIZE_SHORT; - case Integer.SIZE: - return COLUMN_SIZE_INT; - case Long.SIZE: - return COLUMN_SIZE_LONG; - default: - break; - } - } else if (fieldType instanceof ArrowType.Utf8 || fieldType instanceof ArrowType.Binary) { - return COLUMN_SIZE_VARCHAR_AND_BINARY; - } else if (fieldType instanceof ArrowType.Timestamp) { - switch (((ArrowType.Timestamp) fieldType).getUnit()) { - case SECOND: - return COLUMN_SIZE_TIMESTAMP_SECONDS; - case MILLISECOND: - return COLUMN_SIZE_TIMESTAMP_MILLISECONDS; - case MICROSECOND: - return COLUMN_SIZE_TIMESTAMP_MICROSECONDS; - case NANOSECOND: - return COLUMN_SIZE_TIMESTAMP_NANOSECONDS; - default: - break; - } - } else if (fieldType instanceof ArrowType.Time) { - switch (((ArrowType.Time) fieldType).getUnit()) { - case SECOND: - return COLUMN_SIZE_TIME; - case MILLISECOND: - return COLUMN_SIZE_TIME_MILLISECONDS; - case MICROSECOND: - return COLUMN_SIZE_TIME_MICROSECONDS; - case NANOSECOND: - return COLUMN_SIZE_TIME_NANOSECONDS; - default: - break; - } - } else if (fieldType instanceof ArrowType.Date) { - return COLUMN_SIZE_DATE; - } - - return null; - } - - static String sqlToRegexLike(final String sqlPattern) { - final int len = sqlPattern.length(); - final StringBuilder javaPattern = new StringBuilder(len + len); - - for (int i = 0; i < len; i++) { - final char currentChar = sqlPattern.charAt(i); - - if (JAVA_REGEX_SPECIALS.indexOf(currentChar) >= 0) { - javaPattern.append('\\'); - } - - switch (currentChar) { - case '_': - javaPattern.append('.'); - break; - case '%': - javaPattern.append("."); - javaPattern.append('*'); - break; - default: - javaPattern.append(currentChar); - break; - } - } - return javaPattern.toString(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java deleted file mode 100644 index c1b1c8f8e6add..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightConnection.java +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.replaceSemiColons; - -import io.netty.util.concurrent.DefaultThreadFactory; -import java.sql.SQLException; -import java.util.Properties; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.calcite.avatica.AvaticaConnection; -import org.apache.calcite.avatica.AvaticaFactory; - -/** Connection to the Arrow Flight server. */ -public final class ArrowFlightConnection extends AvaticaConnection { - - private final BufferAllocator allocator; - private final ArrowFlightSqlClientHandler clientHandler; - private final ArrowFlightConnectionConfigImpl config; - private ExecutorService executorService; - - /** - * Creates a new {@link ArrowFlightConnection}. - * - * @param driver the {@link ArrowFlightJdbcDriver} to use. - * @param factory the {@link AvaticaFactory} to use. - * @param url the URL to use. - * @param properties the {@link Properties} to use. - * @param config the {@link ArrowFlightConnectionConfigImpl} to use. - * @param allocator the {@link BufferAllocator} to use. - * @param clientHandler the {@link ArrowFlightSqlClientHandler} to use. - */ - private ArrowFlightConnection( - final ArrowFlightJdbcDriver driver, - final AvaticaFactory factory, - final String url, - final Properties properties, - final ArrowFlightConnectionConfigImpl config, - final BufferAllocator allocator, - final ArrowFlightSqlClientHandler clientHandler) { - super(driver, factory, url, properties); - this.config = Preconditions.checkNotNull(config, "Config cannot be null."); - this.allocator = Preconditions.checkNotNull(allocator, "Allocator cannot be null."); - this.clientHandler = Preconditions.checkNotNull(clientHandler, "Handler cannot be null."); - } - - /** - * Creates a new {@link ArrowFlightConnection} to a {@link FlightClient}. - * - * @param driver the {@link ArrowFlightJdbcDriver} to use. - * @param factory the {@link AvaticaFactory} to use. - * @param url the URL to establish the connection to. - * @param properties the {@link Properties} to use for this session. - * @param allocator the {@link BufferAllocator} to use. - * @return a new {@link ArrowFlightConnection}. - * @throws SQLException on error. - */ - static ArrowFlightConnection createNewConnection( - final ArrowFlightJdbcDriver driver, - final AvaticaFactory factory, - String url, - final Properties properties, - final BufferAllocator allocator) - throws SQLException { - url = replaceSemiColons(url); - final ArrowFlightConnectionConfigImpl config = new ArrowFlightConnectionConfigImpl(properties); - final ArrowFlightSqlClientHandler clientHandler = createNewClientHandler(config, allocator); - return new ArrowFlightConnection( - driver, factory, url, properties, config, allocator, clientHandler); - } - - private static ArrowFlightSqlClientHandler createNewClientHandler( - final ArrowFlightConnectionConfigImpl config, final BufferAllocator allocator) - throws SQLException { - try { - return new ArrowFlightSqlClientHandler.Builder() - .withHost(config.getHost()) - .withPort(config.getPort()) - .withUsername(config.getUser()) - .withPassword(config.getPassword()) - .withTrustStorePath(config.getTrustStorePath()) - .withTrustStorePassword(config.getTrustStorePassword()) - .withSystemTrustStore(config.useSystemTrustStore()) - .withTlsRootCertificates(config.getTlsRootCertificatesPath()) - .withClientCertificate(config.getClientCertificatePath()) - .withClientKey(config.getClientKeyPath()) - .withBufferAllocator(allocator) - .withEncryption(config.useEncryption()) - .withDisableCertificateVerification(config.getDisableCertificateVerification()) - .withToken(config.getToken()) - .withCallOptions(config.toCallOption()) - .withRetainCookies(config.retainCookies()) - .withRetainAuth(config.retainAuth()) - .withCatalog(config.getCatalog()) - .build(); - } catch (final SQLException e) { - try { - allocator.close(); - } catch (final Exception allocatorCloseEx) { - e.addSuppressed(allocatorCloseEx); - } - throw e; - } - } - - void reset() throws SQLException { - // Clean up any open Statements - try { - AutoCloseables.close(statementMap.values()); - } catch (final Exception e) { - throw AvaticaConnection.HELPER.createException(e.getMessage(), e); - } - - statementMap.clear(); - - // Reset Holdability - this.setHoldability(this.metaData.getResultSetHoldability()); - - // Reset Meta - ((ArrowFlightMetaImpl) this.meta).setDefaultConnectionProperties(); - } - - /** - * Gets the client {@link #clientHandler} backing this connection. - * - * @return the handler. - */ - ArrowFlightSqlClientHandler getClientHandler() { - return clientHandler; - } - - /** - * Gets the {@link ExecutorService} of this connection. - * - * @return the {@link #executorService}. - */ - synchronized ExecutorService getExecutorService() { - return executorService = - executorService == null - ? Executors.newFixedThreadPool( - config.threadPoolSize(), new DefaultThreadFactory(getClass().getSimpleName())) - : executorService; - } - - @Override - public Properties getClientInfo() { - final Properties copy = new Properties(); - copy.putAll(info); - return copy; - } - - @Override - public void close() throws SQLException { - clientHandler.close(); - if (executorService != null) { - executorService.shutdown(); - } - - try { - AutoCloseables.close(clientHandler); - allocator.getChildAllocators().forEach(AutoCloseables::closeNoChecked); - AutoCloseables.close(allocator); - - super.close(); - } catch (final Exception e) { - throw AvaticaConnection.HELPER.createException(e.getMessage(), e); - } - } - - BufferAllocator getBufferAllocator() { - return allocator; - } - - public ArrowFlightMetaImpl getMeta() { - return (ArrowFlightMetaImpl) this.meta; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightInfoStatement.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightInfoStatement.java deleted file mode 100644 index 37ee93722a524..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightInfoStatement.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.SQLException; -import java.sql.Statement; -import org.apache.arrow.flight.FlightInfo; - -/** A {@link Statement} that deals with {@link FlightInfo}. */ -public interface ArrowFlightInfoStatement extends Statement { - - @Override - ArrowFlightConnection getConnection() throws SQLException; - - /** - * Executes the query this {@link Statement} is holding. - * - * @return the {@link FlightInfo} for the results. - * @throws SQLException on error. - */ - FlightInfo executeFlightInfoQuery() throws SQLException; -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArray.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArray.java deleted file mode 100644 index 9b9eba51e5ae6..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArray.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.Array; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.util.Arrays; -import java.util.Map; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.AbstractArrowFlightJdbcListVectorAccessor; -import org.apache.arrow.driver.jdbc.utils.SqlTypes; -import org.apache.arrow.memory.util.LargeMemoryUtil; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Implementation of {@link Array} using an underlying {@link FieldVector}. - * - * @see AbstractArrowFlightJdbcListVectorAccessor - */ -public class ArrowFlightJdbcArray implements Array { - - private final FieldVector dataVector; - private final long startOffset; - private final long valuesCount; - - /** - * Instantiate an {@link Array} backed up by given {@link FieldVector}, limited by a start offset - * and values count. - * - * @param dataVector underlying FieldVector, containing the Array items. - * @param startOffset offset from FieldVector pointing to this Array's first value. - * @param valuesCount how many items this Array contains. - */ - public ArrowFlightJdbcArray(FieldVector dataVector, long startOffset, long valuesCount) { - this.dataVector = dataVector; - this.startOffset = startOffset; - this.valuesCount = valuesCount; - } - - @Override - public String getBaseTypeName() { - final ArrowType arrowType = this.dataVector.getField().getType(); - return SqlTypes.getSqlTypeNameFromArrowType(arrowType); - } - - @Override - public int getBaseType() { - final ArrowType arrowType = this.dataVector.getField().getType(); - return SqlTypes.getSqlTypeIdFromArrowType(arrowType); - } - - @Override - public Object getArray() throws SQLException { - return getArray(null); - } - - @Override - public Object getArray(Map> map) throws SQLException { - if (map != null) { - throw new SQLFeatureNotSupportedException(); - } - - return getArrayNoBoundCheck(this.dataVector, this.startOffset, this.valuesCount); - } - - @Override - public Object getArray(long index, int count) throws SQLException { - return getArray(index, count, null); - } - - private void checkBoundaries(long index, int count) { - if (index < 0 || index + count > this.startOffset + this.valuesCount) { - throw new ArrayIndexOutOfBoundsException(); - } - } - - private static Object getArrayNoBoundCheck(ValueVector dataVector, long start, long count) { - Object[] result = new Object[LargeMemoryUtil.checkedCastToInt(count)]; - for (int i = 0; i < count; i++) { - result[i] = dataVector.getObject(LargeMemoryUtil.checkedCastToInt(start + i)); - } - - return result; - } - - @Override - public Object getArray(long index, int count, Map> map) throws SQLException { - if (map != null) { - throw new SQLFeatureNotSupportedException(); - } - - checkBoundaries(index, count); - return getArrayNoBoundCheck( - this.dataVector, LargeMemoryUtil.checkedCastToInt(this.startOffset + index), count); - } - - @Override - public ResultSet getResultSet() throws SQLException { - return this.getResultSet(null); - } - - @Override - public ResultSet getResultSet(Map> map) throws SQLException { - if (map != null) { - throw new SQLFeatureNotSupportedException(); - } - - return getResultSetNoBoundariesCheck(this.dataVector, this.startOffset, this.valuesCount); - } - - @Override - public ResultSet getResultSet(long index, int count) throws SQLException { - return getResultSet(index, count, null); - } - - private static ResultSet getResultSetNoBoundariesCheck( - ValueVector dataVector, long start, long count) throws SQLException { - TransferPair transferPair = dataVector.getTransferPair(dataVector.getAllocator()); - transferPair.splitAndTransfer( - LargeMemoryUtil.checkedCastToInt(start), LargeMemoryUtil.checkedCastToInt(count)); - FieldVector vectorSlice = (FieldVector) transferPair.getTo(); - - VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.of(vectorSlice); - return ArrowFlightJdbcVectorSchemaRootResultSet.fromVectorSchemaRoot(vectorSchemaRoot); - } - - @Override - public ResultSet getResultSet(long index, int count, Map> map) - throws SQLException { - if (map != null) { - throw new SQLFeatureNotSupportedException(); - } - - checkBoundaries(index, count); - return getResultSetNoBoundariesCheck( - this.dataVector, LargeMemoryUtil.checkedCastToInt(this.startOffset + index), count); - } - - @Override - public void free() {} - - @Override - public String toString() { - JsonStringArrayList array = new JsonStringArrayList<>((int) this.valuesCount); - - try { - array.addAll(Arrays.asList((Object[]) getArray())); - } catch (SQLException e) { - throw new RuntimeException(e); - } - - return array.toString(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java deleted file mode 100644 index 8303ea32864a9..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSource.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.SQLException; -import java.util.Map; -import java.util.Properties; -import java.util.Queue; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentLinkedQueue; -import javax.sql.ConnectionEvent; -import javax.sql.ConnectionEventListener; -import javax.sql.ConnectionPoolDataSource; -import javax.sql.PooledConnection; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl; - -/** {@link ConnectionPoolDataSource} implementation for Arrow Flight JDBC Driver. */ -public class ArrowFlightJdbcConnectionPoolDataSource extends ArrowFlightJdbcDataSource - implements ConnectionPoolDataSource, ConnectionEventListener, AutoCloseable { - private final Map> pool = - new ConcurrentHashMap<>(); - - /** - * Instantiates a new DataSource. - * - * @param properties the properties - * @param config the config. - */ - protected ArrowFlightJdbcConnectionPoolDataSource( - final Properties properties, final ArrowFlightConnectionConfigImpl config) { - super(properties, config); - } - - /** - * Creates a new {@link ArrowFlightJdbcConnectionPoolDataSource}. - * - * @param properties the properties. - * @return a new data source. - */ - public static ArrowFlightJdbcConnectionPoolDataSource createNewDataSource( - final Properties properties) { - return new ArrowFlightJdbcConnectionPoolDataSource( - properties, new ArrowFlightConnectionConfigImpl(properties)); - } - - @Override - public PooledConnection getPooledConnection() throws SQLException { - final ArrowFlightConnectionConfigImpl config = getConfig(); - return this.getPooledConnection(config.getUser(), config.getPassword()); - } - - @Override - public PooledConnection getPooledConnection(final String username, final String password) - throws SQLException { - final Properties properties = getProperties(username, password); - Queue objectPool = - pool.computeIfAbsent(properties, s -> new ConcurrentLinkedQueue<>()); - ArrowFlightJdbcPooledConnection pooledConnection = objectPool.poll(); - if (pooledConnection == null) { - pooledConnection = createPooledConnection(new ArrowFlightConnectionConfigImpl(properties)); - } else { - pooledConnection.reset(); - } - return pooledConnection; - } - - private ArrowFlightJdbcPooledConnection createPooledConnection( - final ArrowFlightConnectionConfigImpl config) throws SQLException { - ArrowFlightJdbcPooledConnection pooledConnection = - new ArrowFlightJdbcPooledConnection(getConnection(config.getUser(), config.getPassword())); - pooledConnection.addConnectionEventListener(this); - return pooledConnection; - } - - @Override - public void connectionClosed(ConnectionEvent connectionEvent) { - final ArrowFlightJdbcPooledConnection pooledConnection = - (ArrowFlightJdbcPooledConnection) connectionEvent.getSource(); - Queue connectionQueue = - pool.get(pooledConnection.getProperties()); - connectionQueue.add(pooledConnection); - } - - @Override - public void connectionErrorOccurred(ConnectionEvent connectionEvent) {} - - @Override - public void close() throws Exception { - SQLException lastException = null; - for (Queue connections : this.pool.values()) { - while (!connections.isEmpty()) { - PooledConnection pooledConnection = connections.poll(); - try { - pooledConnection.close(); - } catch (SQLException e) { - lastException = e; - } - } - } - - if (lastException != null) { - throw lastException; - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursor.java deleted file mode 100644 index fbf0b308c2d6d..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursor.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.util.ArrayList; -import java.util.Calendar; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.calcite.avatica.ColumnMetaData; -import org.apache.calcite.avatica.util.AbstractCursor; -import org.apache.calcite.avatica.util.ArrayImpl; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Arrow Flight Jdbc's Cursor class. */ -public class ArrowFlightJdbcCursor extends AbstractCursor { - - private static final Logger LOGGER; - private final VectorSchemaRoot root; - private final int rowCount; - private int currentRow = -1; - - static { - LOGGER = LoggerFactory.getLogger(ArrowFlightJdbcCursor.class); - } - - public ArrowFlightJdbcCursor(VectorSchemaRoot root) { - this.root = root; - rowCount = root.getRowCount(); - } - - @Override - public List createAccessors( - List columns, Calendar localCalendar, ArrayImpl.Factory factory) { - final List fieldVectors = root.getFieldVectors(); - - return IntStream.range(0, fieldVectors.size()) - .mapToObj(root::getVector) - .map(this::createAccessor) - .collect(Collectors.toCollection(() -> new ArrayList<>(fieldVectors.size()))); - } - - private Accessor createAccessor(FieldVector vector) { - return ArrowFlightJdbcAccessorFactory.createAccessor( - vector, - this::getCurrentRow, - (boolean wasNull) -> { - // AbstractCursor creates a boolean array of length 1 to hold the wasNull value - this.wasNull[0] = wasNull; - }); - } - - /** - * ArrowFlightJdbcAccessors do not use {@link AbstractCursor.Getter}, as it would box primitive - * types and cause performance issues. Each Accessor implementation works directly on Arrow - * Vectors. - */ - @Override - protected Getter createGetter(int column) { - throw new UnsupportedOperationException("Not allowed."); - } - - @Override - public boolean next() { - currentRow++; - return currentRow < rowCount; - } - - @Override - public void close() { - try { - AutoCloseables.close(root); - } catch (Exception e) { - LOGGER.error(e.getMessage(), e); - } - } - - private int getCurrentRow() { - return currentRow; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java deleted file mode 100644 index 0b5d7fd368bcf..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDataSource.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; - -import java.io.PrintWriter; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.util.Properties; -import java.util.logging.Logger; -import javax.sql.DataSource; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl; -import org.apache.arrow.util.Preconditions; - -/** {@link DataSource} implementation for Arrow Flight JDBC Driver. */ -public class ArrowFlightJdbcDataSource implements DataSource { - private final Properties properties; - private final ArrowFlightConnectionConfigImpl config; - private PrintWriter logWriter; - - /** Instantiates a new DataSource. */ - protected ArrowFlightJdbcDataSource( - final Properties properties, final ArrowFlightConnectionConfigImpl config) { - this.properties = Preconditions.checkNotNull(properties); - this.config = Preconditions.checkNotNull(config); - } - - /** - * Gets the {@link #config} for this {@link ArrowFlightJdbcDataSource}. - * - * @return the {@link ArrowFlightConnectionConfigImpl}. - */ - protected final ArrowFlightConnectionConfigImpl getConfig() { - return config; - } - - /** - * Gets a copy of the {@link #properties} for this {@link ArrowFlightJdbcDataSource} with the - * provided {@code username} and {@code password}. - * - * @return the {@link Properties} for this data source. - */ - protected final Properties getProperties(final String username, final String password) { - final Properties newProperties = new Properties(); - newProperties.putAll(this.properties); - if (username != null) { - newProperties.replace(ArrowFlightConnectionProperty.USER.camelName(), username); - } - if (password != null) { - newProperties.replace(ArrowFlightConnectionProperty.PASSWORD.camelName(), password); - } - return ArrowFlightJdbcDriver.lowerCasePropertyKeys(newProperties); - } - - /** - * Creates a new {@link ArrowFlightJdbcDataSource}. - * - * @param properties the properties. - * @return a new data source. - */ - public static ArrowFlightJdbcDataSource createNewDataSource(final Properties properties) { - return new ArrowFlightJdbcDataSource( - properties, new ArrowFlightConnectionConfigImpl(properties)); - } - - @Override - public ArrowFlightConnection getConnection() throws SQLException { - return getConnection(config.getUser(), config.getPassword()); - } - - @Override - public ArrowFlightConnection getConnection(final String username, final String password) - throws SQLException { - final Properties properties = getProperties(username, password); - return new ArrowFlightJdbcDriver().connect(config.url(), properties); - } - - @Override - public T unwrap(Class aClass) throws SQLException { - throw new SQLException("ArrowFlightJdbcDataSource is not a wrapper."); - } - - @Override - public boolean isWrapperFor(Class aClass) { - return false; - } - - @Override - public PrintWriter getLogWriter() { - return this.logWriter; - } - - @Override - public void setLogWriter(PrintWriter logWriter) { - this.logWriter = logWriter; - } - - @Override - public void setLoginTimeout(int timeout) throws SQLException { - throw new SQLFeatureNotSupportedException("Setting Login timeout is not supported."); - } - - @Override - public int getLoginTimeout() { - return 0; - } - - @Override - public Logger getParentLogger() { - return Logger.getLogger("ArrowFlightJdbc"); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java deleted file mode 100644 index 53e6120f6271b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.replaceSemiColons; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.net.URI; -import java.nio.charset.StandardCharsets; -import java.sql.SQLException; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Properties; -import java.util.logging.Logger; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.apache.arrow.driver.jdbc.utils.UrlParser; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.util.VisibleForTesting; -import org.apache.calcite.avatica.AvaticaConnection; -import org.apache.calcite.avatica.DriverVersion; -import org.apache.calcite.avatica.Meta; -import org.apache.calcite.avatica.UnregisteredDriver; - -/** JDBC driver for querying data from an Apache Arrow Flight server. */ -public class ArrowFlightJdbcDriver extends UnregisteredDriver { - private static final String CONNECT_STRING_PREFIX = "jdbc:arrow-flight-sql://"; - private static final String CONNECT_STRING_PREFIX_DEPRECATED = "jdbc:arrow-flight://"; - private static final String CONNECTION_STRING_EXPECTED = - "jdbc:arrow-flight-sql://[host][:port][?param1=value&...]"; - private static DriverVersion version; - - static { - // Special code for supporting Java9 and higher. - // Netty requires some extra properties to unlock some native memory management api - // Setting this property if not already set externally - // This has to be done before any netty class is being loaded - final String key = "io.netty.tryReflectionSetAccessible"; - final String tryReflectionSetAccessible = System.getProperty(key); - if (tryReflectionSetAccessible == null) { - System.setProperty(key, Boolean.TRUE.toString()); - } - - new ArrowFlightJdbcDriver().register(); - } - - @Override - public Logger getParentLogger() { - // Return the logger associated with the driver package ('org.apache.arrow.driver.jdbc') - // When packaged in flight-sql-jdbc-driver, it will also apply to all shaded dependencies - return Logger.getLogger(getClass().getPackage().getName()); - } - - @Override - public ArrowFlightConnection connect(final String url, final Properties info) - throws SQLException { - final Properties properties = new Properties(info); - properties.putAll(info); - - if (url != null) { - final Optional> maybeProperties = getUrlsArgs(url); - if (!maybeProperties.isPresent()) { - return null; - } - final Map propertiesFromUrl = maybeProperties.get(); - properties.putAll(propertiesFromUrl); - } - - try { - return ArrowFlightConnection.createNewConnection( - this, factory, url, lowerCasePropertyKeys(properties), new RootAllocator(Long.MAX_VALUE)); - } catch (final FlightRuntimeException e) { - throw new SQLException("Failed to connect.", e); - } - } - - @Override - protected String getFactoryClassName(final JdbcVersion jdbcVersion) { - return ArrowFlightJdbcFactory.class.getName(); - } - - @Override - @SuppressWarnings("StringSplitter") - protected DriverVersion createDriverVersion() { - if (version == null) { - final InputStream flightProperties = - this.getClass().getResourceAsStream("/properties/flight.properties"); - if (flightProperties == null) { - throw new RuntimeException( - "Flight Properties not found. Ensure the JAR was built properly."); - } - try (final Reader reader = - new BufferedReader(new InputStreamReader(flightProperties, StandardCharsets.UTF_8))) { - final Properties properties = new Properties(); - properties.load(reader); - - final String parentName = properties.getProperty("org.apache.arrow.flight.name"); - final String parentVersion = properties.getProperty("org.apache.arrow.flight.version"); - final String[] pVersion = parentVersion.split("\\."); - - final int parentMajorVersion = Integer.parseInt(pVersion[0]); - final int parentMinorVersion = Integer.parseInt(pVersion[1]); - - final String childName = properties.getProperty("org.apache.arrow.flight.jdbc-driver.name"); - final String childVersion = - properties.getProperty("org.apache.arrow.flight.jdbc-driver.version"); - final String[] cVersion = childVersion.split("\\."); - - final int childMajorVersion = Integer.parseInt(cVersion[0]); - final int childMinorVersion = Integer.parseInt(cVersion[1]); - - version = - new DriverVersion( - childName, - childVersion, - parentName, - parentVersion, - true, - childMajorVersion, - childMinorVersion, - parentMajorVersion, - parentMinorVersion); - } catch (final IOException e) { - throw new RuntimeException("Failed to load driver version.", e); - } - } - - return version; - } - - @Override - public Meta createMeta(final AvaticaConnection connection) { - return new ArrowFlightMetaImpl(connection); - } - - @Override - protected String getConnectStringPrefix() { - return CONNECT_STRING_PREFIX; - } - - @Override - public boolean acceptsURL(final String url) { - Preconditions.checkNotNull(url); - return url.startsWith(CONNECT_STRING_PREFIX) - || url.startsWith(CONNECT_STRING_PREFIX_DEPRECATED); - } - - /** - * Parses the provided url based on the format this driver accepts, retrieving arguments after the - * {@link #CONNECT_STRING_PREFIX}. - * - *

    This method gets the args if the provided URL follows this pattern: {@code - * jdbc:arrow-flight-sql://:[/?key1=val1&key2=val2&(...)]} - * - *

  • - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
    GroupDefinitionValue
    ? — inaccessible{@link #getConnectStringPrefix} - * the URL prefix accepted by this driver, i.e., - * {@code "jdbc:arrow-flight-sql://"} - *
    1IPv4 host name - * first word after previous group and before "{@code :}" - *
    2IPv4 port number - * first number after previous group and before "{@code /?}" - *
    3custom call parameters - * all parameters provided after "{@code /?}" — must follow the - * pattern: "{@code key=value}" with "{@code &}" separating a - * parameter from another - *
    - * - * @param url The url to parse. - * @return the parsed arguments, or an empty optional if the driver does not handle this URL. - * @throws SQLException If an error occurs while trying to parse the URL. - */ - @VisibleForTesting // ArrowFlightJdbcDriverTest - Optional> getUrlsArgs(String url) throws SQLException { - - /* - * - * Perhaps this logic should be inside a utility class, separated from this - * one, so as to better delegate responsibilities and concerns throughout - * the code and increase maintainability. - * - * ===== - * - * Keep in mind that the URL must ALWAYS follow the pattern: - * "jdbc:arrow-flight-sql://:[/?param1=value1¶m2=value2&(...)]." - * - */ - - final Properties resultMap = new Properties(); - url = replaceSemiColons(url); - - if (!url.startsWith("jdbc:")) { - throw new SQLException( - "Connection string must start with 'jdbc:'. Expected format: " - + CONNECTION_STRING_EXPECTED); - } - - // It's necessary to use a string without "jdbc:" at the beginning to be parsed as a valid URL. - url = url.substring(5); - - final URI uri; - - try { - uri = URI.create(url); - } catch (final IllegalArgumentException e) { - throw new SQLException("Malformed/invalid URL!", e); - } - - if (!Objects.equals(uri.getScheme(), "arrow-flight") - && !Objects.equals(uri.getScheme(), "arrow-flight-sql")) { - return Optional.empty(); - } - - if (uri.getHost() == null) { - throw new SQLException( - "URL must have a host. Expected format: " + CONNECTION_STRING_EXPECTED); - } else if (uri.getPort() < 0) { - throw new SQLException( - "URL must have a port. Expected format: " + CONNECTION_STRING_EXPECTED); - } - resultMap.put(ArrowFlightConnectionProperty.HOST.camelName(), uri.getHost()); // host - resultMap.put(ArrowFlightConnectionProperty.PORT.camelName(), uri.getPort()); // port - - final String extraParams = uri.getRawQuery(); // optional params - if (extraParams != null) { - final Map keyValuePairs = UrlParser.parse(extraParams, "&"); - resultMap.putAll(keyValuePairs); - } - - return Optional.of(resultMap); - } - - static Properties lowerCasePropertyKeys(final Properties properties) { - final Properties resultProperty = new Properties(); - properties.forEach((k, v) -> resultProperty.put(k.toString().toLowerCase(), v)); - return resultProperty; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactory.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactory.java deleted file mode 100644 index e1ccfc820f0d6..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactory.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Properties; -import java.util.TimeZone; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler; -import org.apache.arrow.memory.RootAllocator; -import org.apache.calcite.avatica.AvaticaConnection; -import org.apache.calcite.avatica.AvaticaFactory; -import org.apache.calcite.avatica.AvaticaResultSetMetaData; -import org.apache.calcite.avatica.AvaticaSpecificDatabaseMetaData; -import org.apache.calcite.avatica.AvaticaStatement; -import org.apache.calcite.avatica.Meta; -import org.apache.calcite.avatica.QueryState; -import org.apache.calcite.avatica.UnregisteredDriver; - -/** Factory for the Arrow Flight JDBC Driver. */ -public class ArrowFlightJdbcFactory implements AvaticaFactory { - private final int major; - private final int minor; - - // This need to be public so Avatica can call this constructor - public ArrowFlightJdbcFactory() { - this(4, 1); - } - - private ArrowFlightJdbcFactory(final int major, final int minor) { - this.major = major; - this.minor = minor; - } - - @Override - public AvaticaConnection newConnection( - final UnregisteredDriver driver, - final AvaticaFactory factory, - final String url, - final Properties info) - throws SQLException { - return ArrowFlightConnection.createNewConnection( - (ArrowFlightJdbcDriver) driver, factory, url, info, new RootAllocator(Long.MAX_VALUE)); - } - - @Override - public AvaticaStatement newStatement( - final AvaticaConnection connection, - final Meta.StatementHandle handle, - final int resultType, - final int resultSetConcurrency, - final int resultSetHoldability) { - return new ArrowFlightStatement( - (ArrowFlightConnection) connection, - handle, - resultType, - resultSetConcurrency, - resultSetHoldability); - } - - @Override - public ArrowFlightPreparedStatement newPreparedStatement( - final AvaticaConnection connection, - final Meta.StatementHandle statementHandle, - final Meta.Signature signature, - final int resultType, - final int resultSetConcurrency, - final int resultSetHoldability) - throws SQLException { - final ArrowFlightConnection flightConnection = (ArrowFlightConnection) connection; - ArrowFlightSqlClientHandler.PreparedStatement preparedStatement = - flightConnection.getMeta().getPreparedStatement(statementHandle); - - return ArrowFlightPreparedStatement.newPreparedStatement( - flightConnection, - preparedStatement, - statementHandle, - signature, - resultType, - resultSetConcurrency, - resultSetHoldability); - } - - @Override - public ArrowFlightJdbcVectorSchemaRootResultSet newResultSet( - final AvaticaStatement statement, - final QueryState state, - final Meta.Signature signature, - final TimeZone timeZone, - final Meta.Frame frame) - throws SQLException { - final ResultSetMetaData metaData = newResultSetMetaData(statement, signature); - - return new ArrowFlightJdbcFlightStreamResultSet( - statement, state, signature, metaData, timeZone, frame); - } - - @Override - public AvaticaSpecificDatabaseMetaData newDatabaseMetaData(final AvaticaConnection connection) { - return new ArrowDatabaseMetadata(connection); - } - - @Override - public ResultSetMetaData newResultSetMetaData( - final AvaticaStatement avaticaStatement, final Meta.Signature signature) { - return new AvaticaResultSetMetaData(avaticaStatement, null, signature); - } - - @Override - public int getJdbcMajorVersion() { - return major; - } - - @Override - public int getJdbcMinorVersion() { - return minor; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFlightStreamResultSet.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFlightStreamResultSet.java deleted file mode 100644 index aabaf01e638a4..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFlightStreamResultSet.java +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.apache.arrow.driver.jdbc.utils.FlightEndpointDataQueue.createNewQueue; - -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Optional; -import java.util.TimeZone; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.driver.jdbc.client.CloseableEndpointStreamPair; -import org.apache.arrow.driver.jdbc.utils.FlightEndpointDataQueue; -import org.apache.arrow.driver.jdbc.utils.VectorSchemaRootTransformer; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.calcite.avatica.AvaticaResultSet; -import org.apache.calcite.avatica.AvaticaResultSetMetaData; -import org.apache.calcite.avatica.AvaticaStatement; -import org.apache.calcite.avatica.Meta; -import org.apache.calcite.avatica.QueryState; - -/** - * {@link ResultSet} implementation for Arrow Flight used to access the results of multiple {@link - * FlightStream} objects. - */ -public final class ArrowFlightJdbcFlightStreamResultSet - extends ArrowFlightJdbcVectorSchemaRootResultSet { - - private final ArrowFlightConnection connection; - private final FlightInfo flightInfo; - private CloseableEndpointStreamPair currentEndpointData; - private FlightEndpointDataQueue flightEndpointDataQueue; - - private VectorSchemaRootTransformer transformer; - private VectorSchemaRoot currentVectorSchemaRoot; - - private Schema schema; - - /** Public constructor used by ArrowFlightJdbcFactory. */ - ArrowFlightJdbcFlightStreamResultSet( - final AvaticaStatement statement, - final QueryState state, - final Meta.Signature signature, - final ResultSetMetaData resultSetMetaData, - final TimeZone timeZone, - final Meta.Frame firstFrame) - throws SQLException { - super(statement, state, signature, resultSetMetaData, timeZone, firstFrame); - this.connection = (ArrowFlightConnection) statement.connection; - this.flightInfo = ((ArrowFlightInfoStatement) statement).executeFlightInfoQuery(); - } - - /** Private constructor for fromFlightInfo. */ - private ArrowFlightJdbcFlightStreamResultSet( - final ArrowFlightConnection connection, - final QueryState state, - final Meta.Signature signature, - final ResultSetMetaData resultSetMetaData, - final TimeZone timeZone, - final Meta.Frame firstFrame, - final FlightInfo flightInfo) - throws SQLException { - super(null, state, signature, resultSetMetaData, timeZone, firstFrame); - this.connection = connection; - this.flightInfo = flightInfo; - } - - /** - * Create a {@link ResultSet} which pulls data from given {@link FlightInfo}. This is used to - * fetch result sets from DatabaseMetadata calls and skips the Avatica factory. - * - * @param connection The connection linked to the returned ResultSet. - * @param flightInfo The FlightInfo from which data will be iterated by the returned ResultSet. - * @param transformer Optional transformer for processing VectorSchemaRoot before access from - * ResultSet - * @return A ResultSet which pulls data from given FlightInfo. - */ - static ArrowFlightJdbcFlightStreamResultSet fromFlightInfo( - final ArrowFlightConnection connection, - final FlightInfo flightInfo, - final VectorSchemaRootTransformer transformer) - throws SQLException { - // Similar to how org.apache.calcite.avatica.util.ArrayFactoryImpl does - - final TimeZone timeZone = TimeZone.getDefault(); - final QueryState state = new QueryState(); - - final Meta.Signature signature = ArrowFlightMetaImpl.newSignature(null, null, null); - - final AvaticaResultSetMetaData resultSetMetaData = - new AvaticaResultSetMetaData(null, null, signature); - final ArrowFlightJdbcFlightStreamResultSet resultSet = - new ArrowFlightJdbcFlightStreamResultSet( - connection, state, signature, resultSetMetaData, timeZone, null, flightInfo); - - resultSet.transformer = transformer; - - resultSet.populateData(); - return resultSet; - } - - private void loadNewQueue() { - Optional.ofNullable(flightEndpointDataQueue).ifPresent(AutoCloseables::closeNoChecked); - flightEndpointDataQueue = createNewQueue(connection.getExecutorService()); - } - - private void loadNewFlightStream() throws SQLException { - if (currentEndpointData != null) { - AutoCloseables.closeNoChecked(currentEndpointData); - } - this.currentEndpointData = getNextEndpointStream(true); - } - - @Override - protected AvaticaResultSet execute() throws SQLException { - if (flightInfo != null) { - schema = flightInfo.getSchemaOptional().orElse(null); - populateData(); - } - return this; - } - - private void populateData() throws SQLException { - loadNewQueue(); - flightEndpointDataQueue.enqueue(connection.getClientHandler().getStreams(flightInfo)); - loadNewFlightStream(); - - // Ownership of the root will be passed onto the cursor. - if (currentEndpointData != null) { - populateDataForCurrentFlightStream(); - } - } - - private void populateDataForCurrentFlightStream() throws SQLException { - final VectorSchemaRoot originalRoot = currentEndpointData.getStream().getRoot(); - - if (transformer != null) { - try { - currentVectorSchemaRoot = transformer.transform(originalRoot, currentVectorSchemaRoot); - } catch (final Exception e) { - throw new SQLException("Failed to transform VectorSchemaRoot.", e); - } - } else { - currentVectorSchemaRoot = originalRoot; - } - - populateData(currentVectorSchemaRoot, schema); - } - - /** Expose appMetadata associated with the underlying FlightInfo for this ResultSet. */ - public byte[] getAppMetadata() { - return flightInfo.getAppMetadata(); - } - - @Override - public boolean next() throws SQLException { - if (currentVectorSchemaRoot == null) { - return false; - } - while (true) { - final boolean hasNext = super.next(); - final int maxRows = statement != null ? statement.getMaxRows() : 0; - if (maxRows != 0 && this.getRow() > maxRows) { - if (statement.isCloseOnCompletion()) { - statement.close(); - } - return false; - } - - if (hasNext) { - return true; - } - - if (currentEndpointData != null) { - currentEndpointData.getStream().getRoot().clear(); - if (currentEndpointData.getStream().next()) { - populateDataForCurrentFlightStream(); - continue; - } - - flightEndpointDataQueue.enqueue(currentEndpointData); - } - - currentEndpointData = getNextEndpointStream(false); - - if (currentEndpointData != null) { - populateDataForCurrentFlightStream(); - continue; - } - - if (statement != null && statement.isCloseOnCompletion()) { - statement.close(); - } - - return false; - } - } - - @Override - protected void cancel() { - super.cancel(); - final CloseableEndpointStreamPair currentEndpoint = this.currentEndpointData; - if (currentEndpoint != null) { - currentEndpoint.getStream().cancel("Cancel", null); - } - - if (flightEndpointDataQueue != null) { - try { - flightEndpointDataQueue.close(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - } - - @Override - public synchronized void close() { - try { - if (flightEndpointDataQueue != null) { - // flightStreamQueue should close currentFlightStream internally - flightEndpointDataQueue.close(); - } else if (currentEndpointData != null) { - // close is only called for currentFlightStream if there's no queue - currentEndpointData.close(); - } - - } catch (final Exception e) { - throw new RuntimeException(e); - } finally { - super.close(); - } - } - - private CloseableEndpointStreamPair getNextEndpointStream(final boolean canTimeout) - throws SQLException { - if (canTimeout) { - final int statementTimeout = statement != null ? statement.getQueryTimeout() : 0; - return statementTimeout != 0 - ? flightEndpointDataQueue.next(statementTimeout, TimeUnit.SECONDS) - : flightEndpointDataQueue.next(); - } else { - return flightEndpointDataQueue.next(); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcPooledConnection.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcPooledConnection.java deleted file mode 100644 index 96dd356141acd..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcPooledConnection.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.Connection; -import java.sql.SQLException; -import java.util.Collections; -import java.util.HashSet; -import java.util.Properties; -import java.util.Set; -import javax.sql.ConnectionEvent; -import javax.sql.ConnectionEventListener; -import javax.sql.PooledConnection; -import javax.sql.StatementEventListener; -import org.apache.arrow.driver.jdbc.utils.ConnectionWrapper; - -/** {@link PooledConnection} implementation for Arrow Flight JDBC Driver. */ -public class ArrowFlightJdbcPooledConnection implements PooledConnection { - - private final ArrowFlightConnection connection; - private final Set eventListeners; - private final Set statementEventListeners; - - private final class ConnectionHandle extends ConnectionWrapper { - private boolean closed = false; - - public ConnectionHandle() { - super(connection); - } - - @Override - public void close() throws SQLException { - if (!closed) { - closed = true; - onConnectionClosed(); - } - } - - @Override - public boolean isClosed() throws SQLException { - return this.closed || super.isClosed(); - } - } - - ArrowFlightJdbcPooledConnection(ArrowFlightConnection connection) { - this.connection = connection; - this.eventListeners = Collections.synchronizedSet(new HashSet<>()); - this.statementEventListeners = Collections.synchronizedSet(new HashSet<>()); - } - - public Properties getProperties() { - return connection.getClientInfo(); - } - - @Override - public Connection getConnection() throws SQLException { - return new ConnectionHandle(); - } - - @Override - public void close() throws SQLException { - this.connection.close(); - } - - void reset() throws SQLException { - this.connection.reset(); - } - - @Override - public void addConnectionEventListener(ConnectionEventListener listener) { - eventListeners.add(listener); - } - - @Override - public void removeConnectionEventListener(ConnectionEventListener listener) { - this.eventListeners.remove(listener); - } - - @Override - public void addStatementEventListener(StatementEventListener listener) { - statementEventListeners.add(listener); - } - - @Override - public void removeStatementEventListener(StatementEventListener listener) { - this.statementEventListeners.remove(listener); - } - - private void onConnectionClosed() { - ConnectionEvent connectionEvent = new ConnectionEvent(this); - eventListeners.forEach(listener -> listener.connectionClosed(connectionEvent)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTime.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTime.java deleted file mode 100644 index f87d7f9004c30..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTime.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.apache.calcite.avatica.util.DateTimeUtils.MILLIS_PER_DAY; - -import com.google.common.collect.ImmutableList; -import java.sql.Time; -import java.time.LocalTime; -import java.time.temporal.ChronoField; -import java.util.List; -import java.util.Objects; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.util.VisibleForTesting; - -/** - * Wrapper class for Time objects to include the milliseconds part in ISO 8601 format in - * this#toString. - */ -public class ArrowFlightJdbcTime extends Time { - private static final List LEADING_ZEROES = ImmutableList.of("", "0", "00"); - - // Desired length of the millisecond portion should be 3 - private static final int DESIRED_MILLIS_LENGTH = 3; - - // Millis of the date time object. - private final int millisReprValue; - - /** - * Constructs this object based on epoch millis. - * - * @param milliseconds milliseconds representing Time. - */ - public ArrowFlightJdbcTime(final long milliseconds) { - super(milliseconds); - millisReprValue = getMillisReprValue(milliseconds); - } - - @VisibleForTesting - ArrowFlightJdbcTime(final LocalTime time) { - // Although the constructor is deprecated, this is the exact same code as - // Time#valueOf(LocalTime) - super(time.getHour(), time.getMinute(), time.getSecond()); - millisReprValue = time.get(ChronoField.MILLI_OF_SECOND); - } - - private int getMillisReprValue(long milliseconds) { - // Extract the millisecond part from epoch nano day - if (milliseconds >= MILLIS_PER_DAY) { - // Convert to Epoch Day - milliseconds %= MILLIS_PER_DAY; - } else if (milliseconds < 0) { - // LocalTime#ofNanoDay only accepts positive values - milliseconds -= ((milliseconds / MILLIS_PER_DAY) - 1) * MILLIS_PER_DAY; - } - return LocalTime.ofNanoOfDay(TimeUnit.MILLISECONDS.toNanos(milliseconds)) - .get(ChronoField.MILLI_OF_SECOND); - } - - @Override - public String toString() { - final StringBuilder time = new StringBuilder().append(super.toString()); - - if (millisReprValue > 0) { - final String millisString = Integer.toString(millisReprValue); - - // dot to separate the fractional seconds - time.append("."); - - final int millisLength = millisString.length(); - if (millisLength < DESIRED_MILLIS_LENGTH) { - // add necessary leading zeroes - time.append(LEADING_ZEROES.get(DESIRED_MILLIS_LENGTH - millisLength)); - } - time.append(millisString); - } - - return time.toString(); - } - - // Spotbugs requires these methods to be overridden - @Override - public boolean equals(Object obj) { - return super.equals(obj); - } - - @Override - public int hashCode() { - return Objects.hash(super.hashCode(), this.millisReprValue); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcVectorSchemaRootResultSet.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcVectorSchemaRootResultSet.java deleted file mode 100644 index 0dc2b07c974f6..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcVectorSchemaRootResultSet.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.HashSet; -import java.util.List; -import java.util.Objects; -import java.util.Set; -import java.util.TimeZone; -import org.apache.arrow.driver.jdbc.utils.ConvertUtils; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.calcite.avatica.AvaticaResultSet; -import org.apache.calcite.avatica.AvaticaResultSetMetaData; -import org.apache.calcite.avatica.AvaticaStatement; -import org.apache.calcite.avatica.ColumnMetaData; -import org.apache.calcite.avatica.Meta; -import org.apache.calcite.avatica.Meta.Frame; -import org.apache.calcite.avatica.Meta.Signature; -import org.apache.calcite.avatica.QueryState; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** {@link ResultSet} implementation used to access a {@link VectorSchemaRoot}. */ -public class ArrowFlightJdbcVectorSchemaRootResultSet extends AvaticaResultSet { - - private static final Logger LOGGER = - LoggerFactory.getLogger(ArrowFlightJdbcVectorSchemaRootResultSet.class); - VectorSchemaRoot vectorSchemaRoot; - - ArrowFlightJdbcVectorSchemaRootResultSet( - final AvaticaStatement statement, - final QueryState state, - final Signature signature, - final ResultSetMetaData resultSetMetaData, - final TimeZone timeZone, - final Frame firstFrame) - throws SQLException { - super(statement, state, signature, resultSetMetaData, timeZone, firstFrame); - } - - /** - * Instantiate a ResultSet backed up by given VectorSchemaRoot. - * - * @param vectorSchemaRoot root from which the ResultSet will access. - * @return a ResultSet which accesses the given VectorSchemaRoot - */ - public static ArrowFlightJdbcVectorSchemaRootResultSet fromVectorSchemaRoot( - final VectorSchemaRoot vectorSchemaRoot) throws SQLException { - // Similar to how org.apache.calcite.avatica.util.ArrayFactoryImpl does - - final TimeZone timeZone = TimeZone.getDefault(); - final QueryState state = new QueryState(); - - final Meta.Signature signature = ArrowFlightMetaImpl.newSignature(null, null, null); - - final AvaticaResultSetMetaData resultSetMetaData = - new AvaticaResultSetMetaData(null, null, signature); - final ArrowFlightJdbcVectorSchemaRootResultSet resultSet = - new ArrowFlightJdbcVectorSchemaRootResultSet( - null, state, signature, resultSetMetaData, timeZone, null); - - resultSet.populateData(vectorSchemaRoot); - return resultSet; - } - - @Override - protected AvaticaResultSet execute() throws SQLException { - throw new RuntimeException("Can only execute with execute(VectorSchemaRoot)"); - } - - void populateData(final VectorSchemaRoot vectorSchemaRoot) { - populateData(vectorSchemaRoot, null); - } - - void populateData(final VectorSchemaRoot vectorSchemaRoot, final Schema schema) { - Schema currentSchema = schema == null ? vectorSchemaRoot.getSchema() : schema; - final List columns = - ConvertUtils.convertArrowFieldsToColumnMetaDataList(currentSchema.getFields()); - signature.columns.clear(); - signature.columns.addAll(columns); - - this.vectorSchemaRoot = vectorSchemaRoot; - execute2(new ArrowFlightJdbcCursor(vectorSchemaRoot), this.signature.columns); - } - - @Override - protected void cancel() { - signature.columns.clear(); - super.cancel(); - try { - AutoCloseables.close(vectorSchemaRoot); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public void close() { - final Set exceptions = new HashSet<>(); - try { - if (isClosed()) { - return; - } - } catch (final SQLException e) { - exceptions.add(e); - } - try { - AutoCloseables.close(vectorSchemaRoot); - } catch (final Exception e) { - exceptions.add(e); - } - if (!Objects.isNull(statement)) { - try { - super.close(); - } catch (final Exception e) { - exceptions.add(e); - } - } - exceptions.parallelStream().forEach(e -> LOGGER.error(e.getMessage(), e)); - exceptions.stream() - .findAny() - .ifPresent( - e -> { - throw new RuntimeException(e); - }); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java deleted file mode 100644 index 9c7112f1c30d8..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.Connection; -import java.sql.SQLException; -import java.sql.SQLTimeoutException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler.PreparedStatement; -import org.apache.arrow.driver.jdbc.utils.AvaticaParameterBinder; -import org.apache.arrow.driver.jdbc.utils.ConvertUtils; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.calcite.avatica.AvaticaConnection; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.ColumnMetaData; -import org.apache.calcite.avatica.MetaImpl; -import org.apache.calcite.avatica.NoSuchStatementException; -import org.apache.calcite.avatica.QueryState; -import org.apache.calcite.avatica.remote.TypedValue; - -/** Metadata handler for Arrow Flight. */ -public class ArrowFlightMetaImpl extends MetaImpl { - private final Map statementHandlePreparedStatementMap; - - /** - * Constructs a {@link MetaImpl} object specific for Arrow Flight. - * - * @param connection A {@link AvaticaConnection}. - */ - public ArrowFlightMetaImpl(final AvaticaConnection connection) { - super(connection); - this.statementHandlePreparedStatementMap = new ConcurrentHashMap<>(); - setDefaultConnectionProperties(); - } - - /** Construct a signature. */ - static Signature newSignature(final String sql, Schema resultSetSchema, Schema parameterSchema) { - List columnMetaData = - resultSetSchema == null - ? new ArrayList<>() - : ConvertUtils.convertArrowFieldsToColumnMetaDataList(resultSetSchema.getFields()); - List parameters = - parameterSchema == null - ? new ArrayList<>() - : ConvertUtils.convertArrowFieldsToAvaticaParameters(parameterSchema.getFields()); - - return new Signature( - columnMetaData, - sql, - parameters, - Collections.emptyMap(), - null, // unnecessary, as SQL requests use ArrowFlightJdbcCursor - StatementType.SELECT); - } - - @Override - public void closeStatement(final StatementHandle statementHandle) { - PreparedStatement preparedStatement = - statementHandlePreparedStatementMap.remove(new StatementHandleKey(statementHandle)); - // Testing if the prepared statement was created because the statement can be not created until - // this moment - if (preparedStatement != null) { - preparedStatement.close(); - } - } - - @Override - public void commit(final ConnectionHandle connectionHandle) { - // TODO Fill this stub. - } - - @Override - public ExecuteResult execute( - final StatementHandle statementHandle, - final List typedValues, - final long maxRowCount) { - Preconditions.checkArgument( - connection.id.equals(statementHandle.connectionId), "Connection IDs are not consistent"); - PreparedStatement preparedStatement = getPreparedStatement(statementHandle); - - if (preparedStatement == null) { - throw new IllegalStateException("Prepared statement not found: " + statementHandle); - } - - new AvaticaParameterBinder( - preparedStatement, ((ArrowFlightConnection) connection).getBufferAllocator()) - .bind(typedValues); - - if (statementHandle.signature == null) { - // Update query - long updatedCount = preparedStatement.executeUpdate(); - return new ExecuteResult( - Collections.singletonList( - MetaResultSet.count(statementHandle.connectionId, statementHandle.id, updatedCount))); - } else { - // TODO Why is maxRowCount ignored? - return new ExecuteResult( - Collections.singletonList( - MetaResultSet.create( - statementHandle.connectionId, - statementHandle.id, - true, - statementHandle.signature, - null))); - } - } - - @Override - public ExecuteResult execute( - final StatementHandle statementHandle, - final List typedValues, - final int maxRowsInFirstFrame) { - return execute(statementHandle, typedValues, (long) maxRowsInFirstFrame); - } - - @Override - public ExecuteBatchResult executeBatch( - final StatementHandle statementHandle, final List> parameterValuesList) - throws IllegalStateException { - Preconditions.checkArgument( - connection.id.equals(statementHandle.connectionId), "Connection IDs are not consistent"); - PreparedStatement preparedStatement = getPreparedStatement(statementHandle); - - if (preparedStatement == null) { - throw new IllegalStateException("Prepared statement not found: " + statementHandle); - } - - final AvaticaParameterBinder binder = - new AvaticaParameterBinder( - preparedStatement, ((ArrowFlightConnection) connection).getBufferAllocator()); - for (int i = 0; i < parameterValuesList.size(); i++) { - binder.bind(parameterValuesList.get(i), i); - } - - // Update query - long[] updatedCounts = {preparedStatement.executeUpdate()}; - return new ExecuteBatchResult(updatedCounts); - } - - @Override - public Frame fetch( - final StatementHandle statementHandle, final long offset, final int fetchMaxRowCount) { - /* - * ArrowFlightMetaImpl does not use frames. - * Instead, we have accessors that contain a VectorSchemaRoot with - * the results. - */ - throw AvaticaConnection.HELPER.wrap( - String.format("%s does not use frames.", this), AvaticaConnection.HELPER.unsupported()); - } - - private PreparedStatement prepareForHandle(final String query, StatementHandle handle) { - final PreparedStatement preparedStatement = - ((ArrowFlightConnection) connection).getClientHandler().prepare(query); - handle.signature = - newSignature( - query, preparedStatement.getDataSetSchema(), preparedStatement.getParameterSchema()); - statementHandlePreparedStatementMap.put(new StatementHandleKey(handle), preparedStatement); - return preparedStatement; - } - - @Override - public StatementHandle prepare( - final ConnectionHandle connectionHandle, final String query, final long maxRowCount) { - final StatementHandle handle = super.createStatement(connectionHandle); - prepareForHandle(query, handle); - return handle; - } - - @Override - public ExecuteResult prepareAndExecute( - final StatementHandle statementHandle, - final String query, - final long maxRowCount, - final PrepareCallback prepareCallback) - throws NoSuchStatementException { - return prepareAndExecute( - statementHandle, query, maxRowCount, -1 /* Not used */, prepareCallback); - } - - @Override - public ExecuteResult prepareAndExecute( - final StatementHandle handle, - final String query, - final long maxRowCount, - final int maxRowsInFirstFrame, - final PrepareCallback callback) - throws NoSuchStatementException { - try { - PreparedStatement preparedStatement = prepareForHandle(query, handle); - final StatementType statementType = preparedStatement.getType(); - - final long updateCount = - statementType.equals(StatementType.UPDATE) ? preparedStatement.executeUpdate() : -1; - synchronized (callback.getMonitor()) { - callback.clear(); - callback.assign(handle.signature, null, updateCount); - } - callback.execute(); - final MetaResultSet metaResultSet = - MetaResultSet.create(handle.connectionId, handle.id, false, handle.signature, null); - return new ExecuteResult(Collections.singletonList(metaResultSet)); - } catch (SQLTimeoutException e) { - // So far AvaticaStatement(executeInternal) only handles NoSuchStatement and Runtime - // Exceptions. - throw new RuntimeException(e); - } catch (SQLException e) { - throw new NoSuchStatementException(handle); - } - } - - @Override - public ExecuteBatchResult prepareAndExecuteBatch( - final StatementHandle statementHandle, final List queries) - throws NoSuchStatementException { - // TODO Fill this stub. - return null; - } - - @Override - public void rollback(final ConnectionHandle connectionHandle) { - // TODO Fill this stub. - } - - @Override - public boolean syncResults( - final StatementHandle statementHandle, final QueryState queryState, final long offset) - throws NoSuchStatementException { - // TODO Fill this stub. - return false; - } - - void setDefaultConnectionProperties() { - // TODO Double-check this. - connProps - .setDirty(false) - .setAutoCommit(true) - .setReadOnly(true) - .setCatalog(null) - .setSchema(null) - .setTransactionIsolation(Connection.TRANSACTION_NONE); - } - - PreparedStatement getPreparedStatement(StatementHandle statementHandle) { - return statementHandlePreparedStatementMap.get(new StatementHandleKey(statementHandle)); - } - - // Helper used to look up prepared statement instances later. Avatica doesn't give us the - // signature in - // an UPDATE code path so we can't directly use StatementHandle as a map key. - private static final class StatementHandleKey { - public final String connectionId; - public final int id; - - StatementHandleKey(StatementHandle statementHandle) { - this.connectionId = statementHandle.connectionId; - this.id = statementHandle.id; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - StatementHandleKey that = (StatementHandleKey) o; - - if (id != that.id) { - return false; - } - return connectionId.equals(that.connectionId); - } - - @Override - public int hashCode() { - int result = connectionId.hashCode(); - result = 31 * result + id; - return result; - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatement.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatement.java deleted file mode 100644 index d7af6902f43b8..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatement.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.PreparedStatement; -import java.sql.SQLException; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.util.Preconditions; -import org.apache.calcite.avatica.AvaticaPreparedStatement; -import org.apache.calcite.avatica.Meta.Signature; -import org.apache.calcite.avatica.Meta.StatementHandle; - -/** Arrow Flight JBCS's implementation {@link PreparedStatement}. */ -public class ArrowFlightPreparedStatement extends AvaticaPreparedStatement - implements ArrowFlightInfoStatement { - - private final ArrowFlightSqlClientHandler.PreparedStatement preparedStatement; - - private ArrowFlightPreparedStatement( - final ArrowFlightConnection connection, - final ArrowFlightSqlClientHandler.PreparedStatement preparedStatement, - final StatementHandle handle, - final Signature signature, - final int resultSetType, - final int resultSetConcurrency, - final int resultSetHoldability) - throws SQLException { - super(connection, handle, signature, resultSetType, resultSetConcurrency, resultSetHoldability); - this.preparedStatement = Preconditions.checkNotNull(preparedStatement); - } - - static ArrowFlightPreparedStatement newPreparedStatement( - final ArrowFlightConnection connection, - final ArrowFlightSqlClientHandler.PreparedStatement preparedStmt, - final StatementHandle statementHandle, - final Signature signature, - final int resultSetType, - final int resultSetConcurrency, - final int resultSetHoldability) - throws SQLException { - return new ArrowFlightPreparedStatement( - connection, - preparedStmt, - statementHandle, - signature, - resultSetType, - resultSetConcurrency, - resultSetHoldability); - } - - @Override - public ArrowFlightConnection getConnection() throws SQLException { - return (ArrowFlightConnection) super.getConnection(); - } - - @Override - public synchronized void close() throws SQLException { - this.preparedStatement.close(); - super.close(); - } - - @Override - public FlightInfo executeFlightInfoQuery() throws SQLException { - return preparedStatement.executeQuery(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightStatement.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightStatement.java deleted file mode 100644 index 577aee3b4ac6b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightStatement.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import java.sql.SQLException; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler.PreparedStatement; -import org.apache.arrow.driver.jdbc.utils.ConvertUtils; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.calcite.avatica.AvaticaStatement; -import org.apache.calcite.avatica.Meta; -import org.apache.calcite.avatica.Meta.StatementHandle; - -/** A SQL statement for querying data from an Arrow Flight server. */ -public class ArrowFlightStatement extends AvaticaStatement implements ArrowFlightInfoStatement { - - ArrowFlightStatement( - final ArrowFlightConnection connection, - final StatementHandle handle, - final int resultSetType, - final int resultSetConcurrency, - final int resultSetHoldability) { - super(connection, handle, resultSetType, resultSetConcurrency, resultSetHoldability); - } - - @Override - public ArrowFlightConnection getConnection() throws SQLException { - return (ArrowFlightConnection) super.getConnection(); - } - - @Override - public FlightInfo executeFlightInfoQuery() throws SQLException { - final PreparedStatement preparedStatement = - getConnection().getMeta().getPreparedStatement(handle); - final Meta.Signature signature = getSignature(); - if (signature == null) { - return null; - } - - final Schema resultSetSchema = preparedStatement.getDataSetSchema(); - signature.columns.addAll( - ConvertUtils.convertArrowFieldsToColumnMetaDataList(resultSetSchema.getFields())); - setSignature(signature); - - return preparedStatement.executeQuery(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessor.java deleted file mode 100644 index f0fa55fa826bb..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessor.java +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor; - -import static org.apache.calcite.avatica.util.Cursor.Accessor; - -import java.io.InputStream; -import java.io.Reader; -import java.math.BigDecimal; -import java.net.URL; -import java.sql.Array; -import java.sql.Blob; -import java.sql.Clob; -import java.sql.Date; -import java.sql.NClob; -import java.sql.Ref; -import java.sql.SQLException; -import java.sql.SQLXML; -import java.sql.Struct; -import java.sql.Time; -import java.sql.Timestamp; -import java.util.Calendar; -import java.util.Map; -import java.util.function.IntSupplier; - -/** Base Jdbc Accessor. */ -public abstract class ArrowFlightJdbcAccessor implements Accessor { - private final IntSupplier currentRowSupplier; - - // All the derived accessor classes should alter this as they encounter null Values - protected boolean wasNull; - protected ArrowFlightJdbcAccessorFactory.WasNullConsumer wasNullConsumer; - - protected ArrowFlightJdbcAccessor( - final IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer wasNullConsumer) { - this.currentRowSupplier = currentRowSupplier; - this.wasNullConsumer = wasNullConsumer; - } - - protected int getCurrentRow() { - return currentRowSupplier.getAsInt(); - } - - // It needs to be public so this method can be accessed when creating the complex types. - public abstract Class getObjectClass(); - - @Override - public boolean wasNull() { - return wasNull; - } - - @Override - public String getString() throws SQLException { - final Object object = getObject(); - if (object == null) { - return null; - } - - return object.toString(); - } - - @Override - public boolean getBoolean() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public byte getByte() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public short getShort() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public int getInt() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public long getLong() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public float getFloat() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public double getDouble() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public BigDecimal getBigDecimal() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public BigDecimal getBigDecimal(final int i) throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public byte[] getBytes() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public InputStream getAsciiStream() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public InputStream getUnicodeStream() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public InputStream getBinaryStream() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Object getObject() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Reader getCharacterStream() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Object getObject(final Map> map) throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Ref getRef() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Blob getBlob() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Clob getClob() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Array getArray() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Struct getStruct() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Date getDate(final Calendar calendar) throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Time getTime(final Calendar calendar) throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Timestamp getTimestamp(final Calendar calendar) throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public URL getURL() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public NClob getNClob() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public SQLXML getSQLXML() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public String getNString() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public Reader getNCharacterStream() throws SQLException { - throw getOperationNotSupported(this.getClass()); - } - - @Override - public T getObject(final Class type) throws SQLException { - final Object value; - if (type == Byte.class) { - value = getByte(); - } else if (type == Short.class) { - value = getShort(); - } else if (type == Integer.class) { - value = getInt(); - } else if (type == Long.class) { - value = getLong(); - } else if (type == Float.class) { - value = getFloat(); - } else if (type == Double.class) { - value = getDouble(); - } else if (type == Boolean.class) { - value = getBoolean(); - } else if (type == BigDecimal.class) { - value = getBigDecimal(); - } else if (type == String.class) { - value = getString(); - } else if (type == byte[].class) { - value = getBytes(); - } else { - value = getObject(); - } - return !type.isPrimitive() && wasNull ? null : type.cast(value); - } - - private static SQLException getOperationNotSupported(final Class type) { - return new SQLException(String.format("Operation not supported for type: %s.", type.getName())); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactory.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactory.java deleted file mode 100644 index dad1fa5f73acd..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactory.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor; - -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.impl.ArrowFlightJdbcNullVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.binary.ArrowFlightJdbcBinaryVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDurationVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcIntervalVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcDenseUnionVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcFixedSizeListVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcLargeListVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcListVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcMapVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcStructVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcUnionVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcBaseIntVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcBitVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcDecimalVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcFloat4VectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcFloat8VectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.text.ArrowFlightJdbcVarCharVectorAccessor; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; - -/** Factory to instantiate the accessors. */ -public class ArrowFlightJdbcAccessorFactory { - - /** - * Create an accessor according to its type. - * - * @param vector an instance of an arrow vector. - * @param getCurrentRow a supplier to check which row is being accessed. - * @return an instance of one of the accessors. - */ - public static ArrowFlightJdbcAccessor createAccessor( - ValueVector vector, IntSupplier getCurrentRow, WasNullConsumer setCursorWasNull) { - if (vector instanceof UInt1Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt1Vector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof UInt2Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt2Vector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof UInt4Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt4Vector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof UInt8Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt8Vector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof TinyIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (TinyIntVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof SmallIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (SmallIntVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof IntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (IntVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof BigIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (BigIntVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof Float4Vector) { - return new ArrowFlightJdbcFloat4VectorAccessor( - (Float4Vector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof Float8Vector) { - return new ArrowFlightJdbcFloat8VectorAccessor( - (Float8Vector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof BitVector) { - return new ArrowFlightJdbcBitVectorAccessor( - (BitVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof DecimalVector) { - return new ArrowFlightJdbcDecimalVectorAccessor( - (DecimalVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof Decimal256Vector) { - return new ArrowFlightJdbcDecimalVectorAccessor( - (Decimal256Vector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof VarBinaryVector) { - return new ArrowFlightJdbcBinaryVectorAccessor( - (VarBinaryVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof LargeVarBinaryVector) { - return new ArrowFlightJdbcBinaryVectorAccessor( - (LargeVarBinaryVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof FixedSizeBinaryVector) { - return new ArrowFlightJdbcBinaryVectorAccessor( - (FixedSizeBinaryVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof TimeStampVector) { - return new ArrowFlightJdbcTimeStampVectorAccessor( - (TimeStampVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof TimeNanoVector) { - return new ArrowFlightJdbcTimeVectorAccessor( - (TimeNanoVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof TimeMicroVector) { - return new ArrowFlightJdbcTimeVectorAccessor( - (TimeMicroVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof TimeMilliVector) { - return new ArrowFlightJdbcTimeVectorAccessor( - (TimeMilliVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof TimeSecVector) { - return new ArrowFlightJdbcTimeVectorAccessor( - (TimeSecVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof DateDayVector) { - return new ArrowFlightJdbcDateVectorAccessor( - ((DateDayVector) vector), getCurrentRow, setCursorWasNull); - } else if (vector instanceof DateMilliVector) { - return new ArrowFlightJdbcDateVectorAccessor( - ((DateMilliVector) vector), getCurrentRow, setCursorWasNull); - } else if (vector instanceof VarCharVector) { - return new ArrowFlightJdbcVarCharVectorAccessor( - (VarCharVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof LargeVarCharVector) { - return new ArrowFlightJdbcVarCharVectorAccessor( - (LargeVarCharVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof DurationVector) { - return new ArrowFlightJdbcDurationVectorAccessor( - (DurationVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof IntervalDayVector) { - return new ArrowFlightJdbcIntervalVectorAccessor( - ((IntervalDayVector) vector), getCurrentRow, setCursorWasNull); - } else if (vector instanceof IntervalYearVector) { - return new ArrowFlightJdbcIntervalVectorAccessor( - ((IntervalYearVector) vector), getCurrentRow, setCursorWasNull); - } else if (vector instanceof IntervalMonthDayNanoVector) { - return new ArrowFlightJdbcIntervalVectorAccessor( - ((IntervalMonthDayNanoVector) vector), getCurrentRow, setCursorWasNull); - } else if (vector instanceof StructVector) { - return new ArrowFlightJdbcStructVectorAccessor( - (StructVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof MapVector) { - return new ArrowFlightJdbcMapVectorAccessor( - (MapVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof ListVector) { - return new ArrowFlightJdbcListVectorAccessor( - (ListVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof LargeListVector) { - return new ArrowFlightJdbcLargeListVectorAccessor( - (LargeListVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof FixedSizeListVector) { - return new ArrowFlightJdbcFixedSizeListVectorAccessor( - (FixedSizeListVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof UnionVector) { - return new ArrowFlightJdbcUnionVectorAccessor( - (UnionVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof DenseUnionVector) { - return new ArrowFlightJdbcDenseUnionVectorAccessor( - (DenseUnionVector) vector, getCurrentRow, setCursorWasNull); - } else if (vector instanceof NullVector || vector == null) { - return new ArrowFlightJdbcNullVectorAccessor(setCursorWasNull); - } - - throw new UnsupportedOperationException( - "Unsupported vector type: " + vector.getClass().getName()); - } - - /** Functional interface used to propagate that the value accessed was null or not. */ - @FunctionalInterface - public interface WasNullConsumer { - void setWasNull(boolean wasNull); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessor.java deleted file mode 100644 index 56092ec3e7f32..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessor.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl; - -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.NullVector; - -/** Accessor for the Arrow type {@link NullVector}. */ -public class ArrowFlightJdbcNullVectorAccessor extends ArrowFlightJdbcAccessor { - public ArrowFlightJdbcNullVectorAccessor( - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(null, setCursorWasNull); - } - - @Override - public Class getObjectClass() { - return Object.class; - } - - @Override - public boolean wasNull() { - return true; - } - - @Override - public Object getObject() { - this.wasNullConsumer.setWasNull(true); - return null; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessor.java deleted file mode 100644 index 30dfffce6406a..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessor.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.binary; - -import java.io.ByteArrayInputStream; -import java.io.CharArrayReader; -import java.io.InputStream; -import java.io.Reader; -import java.nio.charset.StandardCharsets; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.VarBinaryVector; - -/** - * Accessor for the Arrow types: {@link FixedSizeBinaryVector}, {@link VarBinaryVector} and {@link - * LargeVarBinaryVector}. - */ -public class ArrowFlightJdbcBinaryVectorAccessor extends ArrowFlightJdbcAccessor { - - private interface ByteArrayGetter { - byte[] get(int index); - } - - private final ByteArrayGetter getter; - - public ArrowFlightJdbcBinaryVectorAccessor( - FixedSizeBinaryVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector::get, currentRowSupplier, setCursorWasNull); - } - - public ArrowFlightJdbcBinaryVectorAccessor( - VarBinaryVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector::get, currentRowSupplier, setCursorWasNull); - } - - public ArrowFlightJdbcBinaryVectorAccessor( - LargeVarBinaryVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector::get, currentRowSupplier, setCursorWasNull); - } - - private ArrowFlightJdbcBinaryVectorAccessor( - ByteArrayGetter getter, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.getter = getter; - } - - @Override - public byte[] getBytes() { - byte[] bytes = getter.get(getCurrentRow()); - this.wasNull = bytes == null; - this.wasNullConsumer.setWasNull(this.wasNull); - - return bytes; - } - - @Override - public Object getObject() { - return this.getBytes(); - } - - @Override - public Class getObjectClass() { - return byte[].class; - } - - @Override - public String getString() { - byte[] bytes = this.getBytes(); - if (bytes == null) { - return null; - } - - return new String(bytes, StandardCharsets.UTF_8); - } - - @Override - public InputStream getAsciiStream() { - byte[] bytes = getBytes(); - if (bytes == null) { - return null; - } - - return new ByteArrayInputStream(bytes); - } - - @Override - public InputStream getUnicodeStream() { - byte[] bytes = getBytes(); - if (bytes == null) { - return null; - } - - return new ByteArrayInputStream(bytes); - } - - @Override - public InputStream getBinaryStream() { - byte[] bytes = getBytes(); - if (bytes == null) { - return null; - } - - return new ByteArrayInputStream(bytes); - } - - @Override - public Reader getCharacterStream() { - String string = getString(); - if (string == null) { - return null; - } - - return new CharArrayReader(string.toCharArray()); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessor.java deleted file mode 100644 index ebe4016209527..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessor.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorGetter.Getter; -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorGetter.Holder; -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorGetter.createGetter; -import static org.apache.arrow.driver.jdbc.utils.DateTimeUtils.getTimestampValue; -import static org.apache.calcite.avatica.util.DateTimeUtils.MILLIS_PER_DAY; -import static org.apache.calcite.avatica.util.DateTimeUtils.unixDateToString; - -import java.sql.Date; -import java.sql.Timestamp; -import java.util.Calendar; -import java.util.concurrent.TimeUnit; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.DateTimeUtils; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.ValueVector; - -/** Accessor for the Arrow types: {@link DateDayVector} and {@link DateMilliVector}. */ -public class ArrowFlightJdbcDateVectorAccessor extends ArrowFlightJdbcAccessor { - - private final Getter getter; - private final TimeUnit timeUnit; - private final Holder holder; - - /** - * Instantiate an accessor for a {@link DateDayVector}. - * - * @param vector an instance of a DateDayVector. - * @param currentRowSupplier the supplier to track the lines. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcDateVectorAccessor( - DateDayVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new Holder(); - this.getter = createGetter(vector); - this.timeUnit = getTimeUnitForVector(vector); - } - - /** - * Instantiate an accessor for a {@link DateMilliVector}. - * - * @param vector an instance of a DateMilliVector. - * @param currentRowSupplier the supplier to track the lines. - */ - public ArrowFlightJdbcDateVectorAccessor( - DateMilliVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new Holder(); - this.getter = createGetter(vector); - this.timeUnit = getTimeUnitForVector(vector); - } - - @Override - public Class getObjectClass() { - return Date.class; - } - - @Override - public Object getObject() { - return this.getDate(null); - } - - @Override - public Date getDate(Calendar calendar) { - fillHolder(); - if (this.wasNull) { - return null; - } - - long value = holder.value; - long milliseconds = this.timeUnit.toMillis(value); - - long millisWithCalendar = DateTimeUtils.applyCalendarOffset(milliseconds, calendar); - - return new Date(getTimestampValue(millisWithCalendar).getTime()); - } - - private void fillHolder() { - getter.get(getCurrentRow(), holder); - this.wasNull = holder.isSet == 0; - this.wasNullConsumer.setWasNull(this.wasNull); - } - - @Override - public Timestamp getTimestamp(Calendar calendar) { - Date date = getDate(calendar); - if (date == null) { - return null; - } - return new Timestamp(date.getTime()); - } - - @Override - public String getString() { - fillHolder(); - if (wasNull) { - return null; - } - long milliseconds = timeUnit.toMillis(holder.value); - return unixDateToString((int) (milliseconds / MILLIS_PER_DAY)); - } - - protected static TimeUnit getTimeUnitForVector(ValueVector vector) { - if (vector instanceof DateDayVector) { - return TimeUnit.DAYS; - } else if (vector instanceof DateMilliVector) { - return TimeUnit.MILLISECONDS; - } - - throw new IllegalArgumentException("Invalid Arrow vector"); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorGetter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorGetter.java deleted file mode 100644 index 5b99e9ec369f3..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorGetter.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.holders.NullableDateDayHolder; -import org.apache.arrow.vector.holders.NullableDateMilliHolder; - -/** Auxiliary class used to unify data access on TimeStampVectors. */ -final class ArrowFlightJdbcDateVectorGetter { - - private ArrowFlightJdbcDateVectorGetter() { - // Prevent instantiation. - } - - /** - * Auxiliary class meant to unify Date*Vector#get implementations with different classes of - * ValueHolders. - */ - static class Holder { - int isSet; // Tells if value is set; 0 = not set, 1 = set - long value; // Holds actual value in its respective timeunit - } - - /** Functional interface used to unify Date*Vector#get implementations. */ - @FunctionalInterface - interface Getter { - void get(int index, Holder holder); - } - - static Getter createGetter(DateDayVector vector) { - NullableDateDayHolder auxHolder = new NullableDateDayHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - static Getter createGetter(DateMilliVector vector) { - NullableDateMilliHolder auxHolder = new NullableDateMilliHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessor.java deleted file mode 100644 index 2a5eedd0c0b65..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessor.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import java.time.Duration; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.DurationVector; - -/** Accessor for the Arrow type {@link DurationVector}. */ -public class ArrowFlightJdbcDurationVectorAccessor extends ArrowFlightJdbcAccessor { - - private final DurationVector vector; - - public ArrowFlightJdbcDurationVectorAccessor( - DurationVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - } - - @Override - public Class getObjectClass() { - return Duration.class; - } - - @Override - public Object getObject() { - Duration duration = vector.getObject(getCurrentRow()); - this.wasNull = duration == null; - this.wasNullConsumer.setWasNull(this.wasNull); - - return duration; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessor.java deleted file mode 100644 index 4a3a70b0dfe7a..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessor.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.apache.arrow.driver.jdbc.utils.IntervalStringUtils.formatIntervalDay; -import static org.apache.arrow.driver.jdbc.utils.IntervalStringUtils.formatIntervalYear; -import static org.apache.arrow.vector.util.DateUtility.yearsToMonths; - -import java.sql.SQLException; -import java.time.Duration; -import java.time.Period; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.PeriodDuration; -import org.apache.arrow.vector.holders.NullableIntervalDayHolder; -import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder; -import org.apache.arrow.vector.holders.NullableIntervalYearHolder; - -/** Accessor for the Arrow type {@link IntervalDayVector}. */ -public class ArrowFlightJdbcIntervalVectorAccessor extends ArrowFlightJdbcAccessor { - - private final BaseFixedWidthVector vector; - private final StringGetter stringGetter; - private final Class objectClass; - - /** - * Instantiate an accessor for a {@link IntervalDayVector}. - * - * @param vector an instance of a IntervalDayVector. - * @param currentRowSupplier the supplier to track the rows. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcIntervalVectorAccessor( - IntervalDayVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - stringGetter = - (index) -> { - final NullableIntervalDayHolder holder = new NullableIntervalDayHolder(); - vector.get(index, holder); - if (holder.isSet == 0) { - return null; - } else { - final int days = holder.days; - final int millis = holder.milliseconds; - return formatIntervalDay(Duration.ofDays(days).plusMillis(millis)); - } - }; - objectClass = java.time.Duration.class; - } - - /** - * Instantiate an accessor for a {@link IntervalYearVector}. - * - * @param vector an instance of a IntervalYearVector. - * @param currentRowSupplier the supplier to track the rows. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcIntervalVectorAccessor( - IntervalYearVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - stringGetter = - (index) -> { - final NullableIntervalYearHolder holder = new NullableIntervalYearHolder(); - vector.get(index, holder); - if (holder.isSet == 0) { - return null; - } else { - final int interval = holder.value; - final int years = (interval / yearsToMonths); - final int months = (interval % yearsToMonths); - return formatIntervalYear(Period.ofYears(years).plusMonths(months)); - } - }; - objectClass = java.time.Period.class; - } - - /** - * Instantiate an accessor for a {@link IntervalMonthDayNanoVector}. - * - * @param vector an instance of a IntervalMonthDayNanoVector. - * @param currentRowSupplier the supplier to track the rows. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcIntervalVectorAccessor( - IntervalMonthDayNanoVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - stringGetter = - (index) -> { - final NullableIntervalMonthDayNanoHolder holder = - new NullableIntervalMonthDayNanoHolder(); - vector.get(index, holder); - if (holder.isSet == 0) { - return null; - } else { - final int months = holder.months; - final int days = holder.days; - final long nanos = holder.nanoseconds; - final Period period = Period.ofMonths(months).plusDays(days); - final Duration duration = Duration.ofNanos(nanos); - return new PeriodDuration(period, duration).toISO8601IntervalString(); - } - }; - objectClass = PeriodDuration.class; - } - - @Override - public Class getObjectClass() { - return objectClass; - } - - @Override - public String getString() throws SQLException { - String result = stringGetter.get(getCurrentRow()); - wasNull = result == null; - wasNullConsumer.setWasNull(wasNull); - return result; - } - - @Override - public Object getObject() { - Object object = vector.getObject(getCurrentRow()); - wasNull = object == null; - wasNullConsumer.setWasNull(wasNull); - return object; - } - - /** Functional interface used to unify Interval*Vector#getAsStringBuilder implementations. */ - @FunctionalInterface - interface StringGetter { - String get(int index); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessor.java deleted file mode 100644 index debdd0fcb4b65..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessor.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorGetter.Getter; -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorGetter.Holder; -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorGetter.createGetter; - -import java.sql.Date; -import java.sql.Time; -import java.sql.Timestamp; -import java.time.LocalDateTime; -import java.time.temporal.ChronoUnit; -import java.util.Calendar; -import java.util.TimeZone; -import java.util.concurrent.TimeUnit; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.DateUtility; - -/** Accessor for the Arrow types extending from {@link TimeStampVector}. */ -public class ArrowFlightJdbcTimeStampVectorAccessor extends ArrowFlightJdbcAccessor { - - private final TimeZone timeZone; - private final Getter getter; - private final TimeUnit timeUnit; - private final LongToLocalDateTime longToLocalDateTime; - private final Holder holder; - - /** Functional interface used to convert a number (in any time resolution) to LocalDateTime. */ - interface LongToLocalDateTime { - LocalDateTime fromLong(long value); - } - - /** Instantiate a ArrowFlightJdbcTimeStampVectorAccessor for given vector. */ - public ArrowFlightJdbcTimeStampVectorAccessor( - TimeStampVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new Holder(); - this.getter = createGetter(vector); - - this.timeZone = getTimeZoneForVector(vector); - this.timeUnit = getTimeUnitForVector(vector); - this.longToLocalDateTime = getLongToLocalDateTimeForVector(vector, this.timeZone); - } - - @Override - public Class getObjectClass() { - return Timestamp.class; - } - - @Override - public Object getObject() { - return this.getTimestamp(null); - } - - private LocalDateTime getLocalDateTime(Calendar calendar) { - getter.get(getCurrentRow(), holder); - this.wasNull = holder.isSet == 0; - this.wasNullConsumer.setWasNull(this.wasNull); - if (this.wasNull) { - return null; - } - - long value = holder.value; - - LocalDateTime localDateTime = this.longToLocalDateTime.fromLong(value); - - if (calendar != null) { - TimeZone timeZone = calendar.getTimeZone(); - long millis = this.timeUnit.toMillis(value); - localDateTime = - localDateTime.minus( - timeZone.getOffset(millis) - this.timeZone.getOffset(millis), ChronoUnit.MILLIS); - } - return localDateTime; - } - - @Override - public Date getDate(Calendar calendar) { - LocalDateTime localDateTime = getLocalDateTime(calendar); - if (localDateTime == null) { - return null; - } - - return new Date(Timestamp.valueOf(localDateTime).getTime()); - } - - @Override - public Time getTime(Calendar calendar) { - LocalDateTime localDateTime = getLocalDateTime(calendar); - if (localDateTime == null) { - return null; - } - - return new Time(Timestamp.valueOf(localDateTime).getTime()); - } - - @Override - public Timestamp getTimestamp(Calendar calendar) { - LocalDateTime localDateTime = getLocalDateTime(calendar); - if (localDateTime == null) { - return null; - } - - return Timestamp.valueOf(localDateTime); - } - - protected static TimeUnit getTimeUnitForVector(TimeStampVector vector) { - ArrowType.Timestamp arrowType = - (ArrowType.Timestamp) vector.getField().getFieldType().getType(); - - switch (arrowType.getUnit()) { - case NANOSECOND: - return TimeUnit.NANOSECONDS; - case MICROSECOND: - return TimeUnit.MICROSECONDS; - case MILLISECOND: - return TimeUnit.MILLISECONDS; - case SECOND: - return TimeUnit.SECONDS; - default: - throw new UnsupportedOperationException("Invalid Arrow time unit"); - } - } - - protected static LongToLocalDateTime getLongToLocalDateTimeForVector( - TimeStampVector vector, TimeZone timeZone) { - String timeZoneID = timeZone.getID(); - - ArrowType.Timestamp arrowType = - (ArrowType.Timestamp) vector.getField().getFieldType().getType(); - - switch (arrowType.getUnit()) { - case NANOSECOND: - return nanoseconds -> DateUtility.getLocalDateTimeFromEpochNano(nanoseconds, timeZoneID); - case MICROSECOND: - return microseconds -> DateUtility.getLocalDateTimeFromEpochMicro(microseconds, timeZoneID); - case MILLISECOND: - return milliseconds -> DateUtility.getLocalDateTimeFromEpochMilli(milliseconds, timeZoneID); - case SECOND: - return seconds -> - DateUtility.getLocalDateTimeFromEpochMilli( - TimeUnit.SECONDS.toMillis(seconds), timeZoneID); - default: - throw new UnsupportedOperationException("Invalid Arrow time unit"); - } - } - - protected static TimeZone getTimeZoneForVector(TimeStampVector vector) { - ArrowType.Timestamp arrowType = - (ArrowType.Timestamp) vector.getField().getFieldType().getType(); - - String timezoneName = arrowType.getTimezone(); - if (timezoneName == null) { - return TimeZone.getTimeZone("UTC"); - } - - return TimeZone.getTimeZone(timezoneName); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorGetter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorGetter.java deleted file mode 100644 index 7fb74f4a7fba4..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorGetter.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMicroTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder; -import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampSecHolder; -import org.apache.arrow.vector.holders.NullableTimeStampSecTZHolder; - -/** Auxiliary class used to unify data access on TimeStampVectors. */ -final class ArrowFlightJdbcTimeStampVectorGetter { - - private ArrowFlightJdbcTimeStampVectorGetter() { - // Prevent instantiation. - } - - /** - * Auxiliary class meant to unify TimeStamp*Vector#get implementations with different classes of - * ValueHolders. - */ - static class Holder { - int isSet; // Tells if value is set; 0 = not set, 1 = set - long value; // Holds actual value in its respective timeunit - } - - /** Functional interface used to unify TimeStamp*Vector#get implementations. */ - @FunctionalInterface - interface Getter { - void get(int index, Holder holder); - } - - static Getter createGetter(TimeStampVector vector) { - if (vector instanceof TimeStampNanoVector) { - return createGetter((TimeStampNanoVector) vector); - } else if (vector instanceof TimeStampNanoTZVector) { - return createGetter((TimeStampNanoTZVector) vector); - } else if (vector instanceof TimeStampMicroVector) { - return createGetter((TimeStampMicroVector) vector); - } else if (vector instanceof TimeStampMicroTZVector) { - return createGetter((TimeStampMicroTZVector) vector); - } else if (vector instanceof TimeStampMilliVector) { - return createGetter((TimeStampMilliVector) vector); - } else if (vector instanceof TimeStampMilliTZVector) { - return createGetter((TimeStampMilliTZVector) vector); - } else if (vector instanceof TimeStampSecVector) { - return createGetter((TimeStampSecVector) vector); - } else if (vector instanceof TimeStampSecTZVector) { - return createGetter((TimeStampSecTZVector) vector); - } - - throw new UnsupportedOperationException("Unsupported Timestamp vector type"); - } - - private static Getter createGetter(TimeStampNanoVector vector) { - NullableTimeStampNanoHolder auxHolder = new NullableTimeStampNanoHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - private static Getter createGetter(TimeStampNanoTZVector vector) { - NullableTimeStampNanoTZHolder auxHolder = new NullableTimeStampNanoTZHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - private static Getter createGetter(TimeStampMicroVector vector) { - NullableTimeStampMicroHolder auxHolder = new NullableTimeStampMicroHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - private static Getter createGetter(TimeStampMicroTZVector vector) { - NullableTimeStampMicroTZHolder auxHolder = new NullableTimeStampMicroTZHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - private static Getter createGetter(TimeStampMilliVector vector) { - NullableTimeStampMilliHolder auxHolder = new NullableTimeStampMilliHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - private static Getter createGetter(TimeStampMilliTZVector vector) { - NullableTimeStampMilliTZHolder auxHolder = new NullableTimeStampMilliTZHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - private static Getter createGetter(TimeStampSecVector vector) { - NullableTimeStampSecHolder auxHolder = new NullableTimeStampSecHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - private static Getter createGetter(TimeStampSecTZVector vector) { - NullableTimeStampSecTZHolder auxHolder = new NullableTimeStampSecTZHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessor.java deleted file mode 100644 index 2c03ee631ec96..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessor.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeVectorGetter.Getter; -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeVectorGetter.Holder; -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeVectorGetter.createGetter; - -import java.sql.Time; -import java.sql.Timestamp; -import java.util.Calendar; -import java.util.concurrent.TimeUnit; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.ArrowFlightJdbcTime; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.DateTimeUtils; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.ValueVector; - -/** - * Accessor for the Arrow types: {@link TimeNanoVector}, {@link TimeMicroVector}, {@link - * TimeMilliVector} and {@link TimeSecVector}. - */ -public class ArrowFlightJdbcTimeVectorAccessor extends ArrowFlightJdbcAccessor { - - private final Getter getter; - private final TimeUnit timeUnit; - private final Holder holder; - - /** - * Instantiate an accessor for a {@link TimeNanoVector}. - * - * @param vector an instance of a TimeNanoVector. - * @param currentRowSupplier the supplier to track the lines. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcTimeVectorAccessor( - TimeNanoVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new Holder(); - this.getter = createGetter(vector); - this.timeUnit = getTimeUnitForVector(vector); - } - - /** - * Instantiate an accessor for a {@link TimeMicroVector}. - * - * @param vector an instance of a TimeMicroVector. - * @param currentRowSupplier the supplier to track the lines. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcTimeVectorAccessor( - TimeMicroVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new Holder(); - this.getter = createGetter(vector); - this.timeUnit = getTimeUnitForVector(vector); - } - - /** - * Instantiate an accessor for a {@link TimeMilliVector}. - * - * @param vector an instance of a TimeMilliVector. - * @param currentRowSupplier the supplier to track the lines. - */ - public ArrowFlightJdbcTimeVectorAccessor( - TimeMilliVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new Holder(); - this.getter = createGetter(vector); - this.timeUnit = getTimeUnitForVector(vector); - } - - /** - * Instantiate an accessor for a {@link TimeSecVector}. - * - * @param vector an instance of a TimeSecVector. - * @param currentRowSupplier the supplier to track the lines. - */ - public ArrowFlightJdbcTimeVectorAccessor( - TimeSecVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new Holder(); - this.getter = createGetter(vector); - this.timeUnit = getTimeUnitForVector(vector); - } - - @Override - public Class getObjectClass() { - return Time.class; - } - - @Override - public Object getObject() { - return this.getTime(null); - } - - @Override - public Time getTime(Calendar calendar) { - fillHolder(); - if (this.wasNull) { - return null; - } - - long value = holder.value; - long milliseconds = this.timeUnit.toMillis(value); - - return new ArrowFlightJdbcTime(DateTimeUtils.applyCalendarOffset(milliseconds, calendar)); - } - - private void fillHolder() { - getter.get(getCurrentRow(), holder); - this.wasNull = holder.isSet == 0; - this.wasNullConsumer.setWasNull(this.wasNull); - } - - @Override - public Timestamp getTimestamp(Calendar calendar) { - Time time = getTime(calendar); - if (time == null) { - return null; - } - return new Timestamp(time.getTime()); - } - - protected static TimeUnit getTimeUnitForVector(ValueVector vector) { - if (vector instanceof TimeNanoVector) { - return TimeUnit.NANOSECONDS; - } else if (vector instanceof TimeMicroVector) { - return TimeUnit.MICROSECONDS; - } else if (vector instanceof TimeMilliVector) { - return TimeUnit.MILLISECONDS; - } else if (vector instanceof TimeSecVector) { - return TimeUnit.SECONDS; - } - - throw new IllegalArgumentException("Invalid Arrow vector"); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorGetter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorGetter.java deleted file mode 100644 index ebe159b318efd..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorGetter.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.holders.NullableTimeMicroHolder; -import org.apache.arrow.vector.holders.NullableTimeMilliHolder; -import org.apache.arrow.vector.holders.NullableTimeNanoHolder; -import org.apache.arrow.vector.holders.NullableTimeSecHolder; - -/** Auxiliary class used to unify data access on Time*Vectors. */ -final class ArrowFlightJdbcTimeVectorGetter { - - private ArrowFlightJdbcTimeVectorGetter() { - // Prevent instantiation. - } - - /** - * Auxiliary class meant to unify TimeStamp*Vector#get implementations with different classes of - * ValueHolders. - */ - static class Holder { - int isSet; // Tells if value is set; 0 = not set, 1 = set - long value; // Holds actual value in its respective timeunit - } - - /** Functional interface used to unify TimeStamp*Vector#get implementations. */ - @FunctionalInterface - interface Getter { - void get(int index, Holder holder); - } - - static Getter createGetter(TimeNanoVector vector) { - NullableTimeNanoHolder auxHolder = new NullableTimeNanoHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - static Getter createGetter(TimeMicroVector vector) { - NullableTimeMicroHolder auxHolder = new NullableTimeMicroHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - static Getter createGetter(TimeMilliVector vector) { - NullableTimeMilliHolder auxHolder = new NullableTimeMilliHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } - - static Getter createGetter(TimeSecVector vector) { - NullableTimeSecHolder auxHolder = new NullableTimeSecHolder(); - return (index, holder) -> { - vector.get(index, auxHolder); - holder.isSet = auxHolder.isSet; - holder.value = auxHolder.value; - }; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListVectorAccessor.java deleted file mode 100644 index cbd07b7d4ca5e..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListVectorAccessor.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.sql.Array; -import java.util.List; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.ArrowFlightJdbcArray; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; - -/** - * Base Accessor for the Arrow types {@link ListVector}, {@link LargeListVector} and {@link - * FixedSizeListVector}. - */ -public abstract class AbstractArrowFlightJdbcListVectorAccessor extends ArrowFlightJdbcAccessor { - - protected AbstractArrowFlightJdbcListVectorAccessor( - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - } - - @Override - public Class getObjectClass() { - return List.class; - } - - protected abstract long getStartOffset(int index); - - protected abstract long getEndOffset(int index); - - protected abstract FieldVector getDataVector(); - - protected abstract boolean isNull(int index); - - @Override - public final Array getArray() { - int index = getCurrentRow(); - FieldVector dataVector = getDataVector(); - - this.wasNull = isNull(index); - this.wasNullConsumer.setWasNull(this.wasNull); - if (this.wasNull) { - return null; - } - - long startOffset = getStartOffset(index); - long endOffset = getEndOffset(index); - - long valuesCount = endOffset - startOffset; - return new ArrowFlightJdbcArray(dataVector, startOffset, valuesCount); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessor.java deleted file mode 100644 index 99364be2abdd6..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessor.java +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.io.InputStream; -import java.io.Reader; -import java.math.BigDecimal; -import java.net.URL; -import java.sql.Array; -import java.sql.Blob; -import java.sql.Clob; -import java.sql.Date; -import java.sql.NClob; -import java.sql.Ref; -import java.sql.SQLException; -import java.sql.SQLXML; -import java.sql.Struct; -import java.sql.Time; -import java.sql.Timestamp; -import java.util.Calendar; -import java.util.Map; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.accessor.impl.ArrowFlightJdbcNullVectorAccessor; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.UnionVector; - -/** Base accessor for {@link UnionVector} and {@link DenseUnionVector}. */ -public abstract class AbstractArrowFlightJdbcUnionVectorAccessor extends ArrowFlightJdbcAccessor { - - /** - * Array of accessors for each type contained in UnionVector. Index corresponds to UnionVector and - * DenseUnionVector typeIds which are both limited to 128. - */ - private final ArrowFlightJdbcAccessor[] accessors = new ArrowFlightJdbcAccessor[128]; - - private final ArrowFlightJdbcNullVectorAccessor nullAccessor = - new ArrowFlightJdbcNullVectorAccessor((boolean wasNull) -> {}); - - protected AbstractArrowFlightJdbcUnionVectorAccessor( - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - } - - protected abstract ArrowFlightJdbcAccessor createAccessorForVector(ValueVector vector); - - protected abstract byte getCurrentTypeId(); - - protected abstract ValueVector getVectorByTypeId(byte typeId); - - /** - * Returns an accessor for UnionVector child vector on current row. - * - * @return ArrowFlightJdbcAccessor for child vector on current row. - */ - protected ArrowFlightJdbcAccessor getAccessor() { - // Get the typeId and child vector for the current row being accessed. - byte typeId = this.getCurrentTypeId(); - ValueVector vector = this.getVectorByTypeId(typeId); - - if (typeId < 0) { - // typeId may be negative if the current row has no type defined. - return this.nullAccessor; - } - - // Ensure there is an accessor for given typeId - if (this.accessors[typeId] == null) { - this.accessors[typeId] = this.createAccessorForVector(vector); - } - - return this.accessors[typeId]; - } - - @Override - public Class getObjectClass() { - return getAccessor().getObjectClass(); - } - - @Override - public boolean wasNull() { - return getAccessor().wasNull(); - } - - @Override - public String getString() throws SQLException { - return getAccessor().getString(); - } - - @Override - public boolean getBoolean() throws SQLException { - return getAccessor().getBoolean(); - } - - @Override - public byte getByte() throws SQLException { - return getAccessor().getByte(); - } - - @Override - public short getShort() throws SQLException { - return getAccessor().getShort(); - } - - @Override - public int getInt() throws SQLException { - return getAccessor().getInt(); - } - - @Override - public long getLong() throws SQLException { - return getAccessor().getLong(); - } - - @Override - public float getFloat() throws SQLException { - return getAccessor().getFloat(); - } - - @Override - public double getDouble() throws SQLException { - return getAccessor().getDouble(); - } - - @Override - public BigDecimal getBigDecimal() throws SQLException { - return getAccessor().getBigDecimal(); - } - - @Override - public BigDecimal getBigDecimal(int i) throws SQLException { - return getAccessor().getBigDecimal(i); - } - - @Override - public byte[] getBytes() throws SQLException { - return getAccessor().getBytes(); - } - - @Override - public InputStream getAsciiStream() throws SQLException { - return getAccessor().getAsciiStream(); - } - - @Override - public InputStream getUnicodeStream() throws SQLException { - return getAccessor().getUnicodeStream(); - } - - @Override - public InputStream getBinaryStream() throws SQLException { - return getAccessor().getBinaryStream(); - } - - @Override - public Object getObject() throws SQLException { - return getAccessor().getObject(); - } - - @Override - public Reader getCharacterStream() throws SQLException { - return getAccessor().getCharacterStream(); - } - - @Override - public Object getObject(Map> map) throws SQLException { - return getAccessor().getObject(map); - } - - @Override - public Ref getRef() throws SQLException { - return getAccessor().getRef(); - } - - @Override - public Blob getBlob() throws SQLException { - return getAccessor().getBlob(); - } - - @Override - public Clob getClob() throws SQLException { - return getAccessor().getClob(); - } - - @Override - public Array getArray() throws SQLException { - return getAccessor().getArray(); - } - - @Override - public Struct getStruct() throws SQLException { - return getAccessor().getStruct(); - } - - @Override - public Date getDate(Calendar calendar) throws SQLException { - return getAccessor().getDate(calendar); - } - - @Override - public Time getTime(Calendar calendar) throws SQLException { - return getAccessor().getTime(calendar); - } - - @Override - public Timestamp getTimestamp(Calendar calendar) throws SQLException { - return getAccessor().getTimestamp(calendar); - } - - @Override - public URL getURL() throws SQLException { - return getAccessor().getURL(); - } - - @Override - public NClob getNClob() throws SQLException { - return getAccessor().getNClob(); - } - - @Override - public SQLXML getSQLXML() throws SQLException { - return getAccessor().getSQLXML(); - } - - @Override - public String getNString() throws SQLException { - return getAccessor().getNString(); - } - - @Override - public Reader getNCharacterStream() throws SQLException { - return getAccessor().getNCharacterStream(); - } - - @Override - public T getObject(Class type) throws SQLException { - return getAccessor().getObject(type); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessor.java deleted file mode 100644 index b847cc74f0e19..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessor.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.DenseUnionVector; - -/** Accessor for the Arrow type {@link DenseUnionVector}. */ -public class ArrowFlightJdbcDenseUnionVectorAccessor - extends AbstractArrowFlightJdbcUnionVectorAccessor { - - private final DenseUnionVector vector; - - /** - * Instantiate an accessor for a {@link DenseUnionVector}. - * - * @param vector an instance of a DenseUnionVector. - * @param currentRowSupplier the supplier to track the rows. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcDenseUnionVectorAccessor( - DenseUnionVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - } - - @Override - protected ArrowFlightJdbcAccessor createAccessorForVector(ValueVector vector) { - return ArrowFlightJdbcAccessorFactory.createAccessor( - vector, () -> this.vector.getOffset(this.getCurrentRow()), (boolean wasNull) -> {}); - } - - @Override - protected byte getCurrentTypeId() { - int index = getCurrentRow(); - return this.vector.getTypeId(index); - } - - @Override - protected ValueVector getVectorByTypeId(byte typeId) { - return this.vector.getVectorByType(typeId); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcFixedSizeListVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcFixedSizeListVectorAccessor.java deleted file mode 100644 index 970418475ea32..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcFixedSizeListVectorAccessor.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.util.List; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; - -/** Accessor for the Arrow type {@link FixedSizeListVector}. */ -public class ArrowFlightJdbcFixedSizeListVectorAccessor - extends AbstractArrowFlightJdbcListVectorAccessor { - - private final FixedSizeListVector vector; - - public ArrowFlightJdbcFixedSizeListVectorAccessor( - FixedSizeListVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - } - - @Override - protected long getStartOffset(int index) { - return (long) vector.getListSize() * index; - } - - @Override - protected long getEndOffset(int index) { - return (long) vector.getListSize() * (index + 1); - } - - @Override - protected FieldVector getDataVector() { - return vector.getDataVector(); - } - - @Override - protected boolean isNull(int index) { - return vector.isNull(index); - } - - @Override - public Object getObject() { - List object = vector.getObject(getCurrentRow()); - this.wasNull = object == null; - this.wasNullConsumer.setWasNull(this.wasNull); - - return object; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcLargeListVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcLargeListVectorAccessor.java deleted file mode 100644 index 33e1a85897be6..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcLargeListVectorAccessor.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.util.List; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.LargeListVector; - -/** Accessor for the Arrow type {@link LargeListVector}. */ -public class ArrowFlightJdbcLargeListVectorAccessor - extends AbstractArrowFlightJdbcListVectorAccessor { - - private final LargeListVector vector; - - public ArrowFlightJdbcLargeListVectorAccessor( - LargeListVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - } - - @Override - protected long getStartOffset(int index) { - return vector.getOffsetBuffer().getLong((long) index * LargeListVector.OFFSET_WIDTH); - } - - @Override - protected long getEndOffset(int index) { - return vector.getOffsetBuffer().getLong((long) (index + 1) * LargeListVector.OFFSET_WIDTH); - } - - @Override - protected FieldVector getDataVector() { - return vector.getDataVector(); - } - - @Override - protected boolean isNull(int index) { - return vector.isNull(index); - } - - @Override - public Object getObject() { - List object = vector.getObject(getCurrentRow()); - this.wasNull = object == null; - this.wasNullConsumer.setWasNull(this.wasNull); - - return object; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcListVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcListVectorAccessor.java deleted file mode 100644 index 8827fdf6a08c1..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcListVectorAccessor.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.util.List; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueVector; -import org.apache.arrow.vector.complex.ListVector; - -/** Accessor for the Arrow type {@link ListVector}. */ -public class ArrowFlightJdbcListVectorAccessor extends AbstractArrowFlightJdbcListVectorAccessor { - - private final ListVector vector; - - public ArrowFlightJdbcListVectorAccessor( - ListVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - } - - @Override - protected long getStartOffset(int index) { - return vector.getOffsetBuffer().getInt((long) index * BaseRepeatedValueVector.OFFSET_WIDTH); - } - - @Override - protected long getEndOffset(int index) { - return vector - .getOffsetBuffer() - .getInt((long) (index + 1) * BaseRepeatedValueVector.OFFSET_WIDTH); - } - - @Override - protected FieldVector getDataVector() { - return vector.getDataVector(); - } - - @Override - protected boolean isNull(int index) { - return vector.isNull(index); - } - - @Override - public Object getObject() { - List object = vector.getObject(getCurrentRow()); - this.wasNull = object == null; - this.wasNullConsumer.setWasNull(this.wasNull); - - return object; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessor.java deleted file mode 100644 index 32336f31ecf1b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessor.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.util.Map; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.impl.UnionMapReader; -import org.apache.arrow.vector.util.JsonStringHashMap; - -/** Accessor for the Arrow type {@link MapVector}. */ -public class ArrowFlightJdbcMapVectorAccessor extends AbstractArrowFlightJdbcListVectorAccessor { - - private final MapVector vector; - - public ArrowFlightJdbcMapVectorAccessor( - MapVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - } - - @Override - public Class getObjectClass() { - return Map.class; - } - - @Override - public Object getObject() { - int index = getCurrentRow(); - - this.wasNull = vector.isNull(index); - this.wasNullConsumer.setWasNull(this.wasNull); - if (this.wasNull) { - return null; - } - - Map result = new JsonStringHashMap<>(); - UnionMapReader reader = vector.getReader(); - - reader.setPosition(index); - while (reader.next()) { - Object key = reader.key().readObject(); - Object value = reader.value().readObject(); - - result.put(key, value); - } - - return result; - } - - @Override - protected long getStartOffset(int index) { - return vector.getOffsetBuffer().getInt((long) index * BaseRepeatedValueVector.OFFSET_WIDTH); - } - - @Override - protected long getEndOffset(int index) { - return vector - .getOffsetBuffer() - .getInt((long) (index + 1) * BaseRepeatedValueVector.OFFSET_WIDTH); - } - - @Override - protected boolean isNull(int index) { - return vector.isNull(index); - } - - @Override - protected FieldVector getDataVector() { - return vector.getDataVector(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessor.java deleted file mode 100644 index b5f054edcb883..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessor.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.sql.Struct; -import java.util.List; -import java.util.Map; -import java.util.function.IntSupplier; -import java.util.stream.Collectors; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.calcite.avatica.util.StructImpl; - -/** Accessor for the Arrow type {@link StructVector}. */ -public class ArrowFlightJdbcStructVectorAccessor extends ArrowFlightJdbcAccessor { - - private final StructVector vector; - - public ArrowFlightJdbcStructVectorAccessor( - StructVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - } - - @Override - public Class getObjectClass() { - return Map.class; - } - - @Override - public Object getObject() { - Map object = vector.getObject(getCurrentRow()); - this.wasNull = object == null; - this.wasNullConsumer.setWasNull(this.wasNull); - - return object; - } - - @Override - public Struct getStruct() { - int currentRow = getCurrentRow(); - - this.wasNull = vector.isNull(currentRow); - this.wasNullConsumer.setWasNull(this.wasNull); - if (this.wasNull) { - return null; - } - - List attributes = - vector.getChildrenFromFields().stream() - .map(vector -> vector.getObject(currentRow)) - .collect(Collectors.toList()); - - return new StructImpl(attributes); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessor.java deleted file mode 100644 index 5b4d7dce38b38..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessor.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.UnionVector; - -/** Accessor for the Arrow type {@link UnionVector}. */ -public class ArrowFlightJdbcUnionVectorAccessor extends AbstractArrowFlightJdbcUnionVectorAccessor { - - private final UnionVector vector; - - /** - * Instantiate an accessor for a {@link UnionVector}. - * - * @param vector an instance of a UnionVector. - * @param currentRowSupplier the supplier to track the rows. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcUnionVectorAccessor( - UnionVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - } - - @Override - protected ArrowFlightJdbcAccessor createAccessorForVector(ValueVector vector) { - return ArrowFlightJdbcAccessorFactory.createAccessor( - vector, this::getCurrentRow, (boolean wasNull) -> {}); - } - - @Override - protected byte getCurrentTypeId() { - int index = getCurrentRow(); - return (byte) this.vector.getTypeValue(index); - } - - @Override - protected ValueVector getVectorByTypeId(byte typeId) { - return this.vector.getVectorByType(typeId); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java deleted file mode 100644 index cdddbb327e0f5..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import static org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcNumericGetter.Getter; -import static org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcNumericGetter.createGetter; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcNumericGetter.NumericHolder; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.types.Types.MinorType; - -/** - * Accessor for the arrow types: TinyIntVector, SmallIntVector, IntVector, BigIntVector, - * UInt1Vector, UInt2Vector, UInt4Vector and UInt8Vector. - */ -public class ArrowFlightJdbcBaseIntVectorAccessor extends ArrowFlightJdbcAccessor { - - private final MinorType type; - private final boolean isUnsigned; - private final Getter getter; - private final NumericHolder holder; - - public ArrowFlightJdbcBaseIntVectorAccessor( - UInt1Vector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector, currentRowSupplier, true, setCursorWasNull); - } - - public ArrowFlightJdbcBaseIntVectorAccessor( - UInt2Vector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector, currentRowSupplier, true, setCursorWasNull); - } - - public ArrowFlightJdbcBaseIntVectorAccessor( - UInt4Vector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector, currentRowSupplier, true, setCursorWasNull); - } - - public ArrowFlightJdbcBaseIntVectorAccessor( - UInt8Vector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector, currentRowSupplier, true, setCursorWasNull); - } - - public ArrowFlightJdbcBaseIntVectorAccessor( - TinyIntVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector, currentRowSupplier, false, setCursorWasNull); - } - - public ArrowFlightJdbcBaseIntVectorAccessor( - SmallIntVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector, currentRowSupplier, false, setCursorWasNull); - } - - public ArrowFlightJdbcBaseIntVectorAccessor( - IntVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector, currentRowSupplier, false, setCursorWasNull); - } - - public ArrowFlightJdbcBaseIntVectorAccessor( - BigIntVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector, currentRowSupplier, false, setCursorWasNull); - } - - private ArrowFlightJdbcBaseIntVectorAccessor( - BaseIntVector vector, - IntSupplier currentRowSupplier, - boolean isUnsigned, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.type = vector.getMinorType(); - this.holder = new NumericHolder(); - this.getter = createGetter(vector); - this.isUnsigned = isUnsigned; - } - - @Override - public long getLong() { - getter.get(getCurrentRow(), holder); - - this.wasNull = holder.isSet == 0; - this.wasNullConsumer.setWasNull(this.wasNull); - if (this.wasNull) { - return 0; - } - - return holder.value; - } - - @Override - public Class getObjectClass() { - return Long.class; - } - - @Override - public String getString() { - final long number = getLong(); - - if (this.wasNull) { - return null; - } else { - return isUnsigned ? Long.toUnsignedString(number) : Long.toString(number); - } - } - - @Override - public byte getByte() { - return (byte) getLong(); - } - - @Override - public short getShort() { - return (short) getLong(); - } - - @Override - public int getInt() { - return (int) getLong(); - } - - @Override - public float getFloat() { - return (float) getLong(); - } - - @Override - public double getDouble() { - return (double) getLong(); - } - - @Override - public BigDecimal getBigDecimal() { - final BigDecimal value = BigDecimal.valueOf(getLong()); - return this.wasNull ? null : value; - } - - @Override - public BigDecimal getBigDecimal(int scale) { - final BigDecimal value = - BigDecimal.valueOf(this.getDouble()).setScale(scale, RoundingMode.HALF_UP); - return this.wasNull ? null : value; - } - - @Override - public Number getObject() { - final Number number; - switch (type) { - case TINYINT: - case UINT1: - number = getByte(); - break; - case SMALLINT: - case UINT2: - number = getShort(); - break; - case INT: - case UINT4: - number = getInt(); - break; - case BIGINT: - case UINT8: - number = getLong(); - break; - default: - throw new IllegalStateException("No valid MinorType was provided."); - } - return wasNull ? null : number; - } - - @Override - public boolean getBoolean() { - final long value = getLong(); - - return value != 0; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java deleted file mode 100644 index 65252d5e068d6..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import java.math.BigDecimal; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.holders.NullableBitHolder; - -/** Accessor for the arrow {@link BitVector}. */ -public class ArrowFlightJdbcBitVectorAccessor extends ArrowFlightJdbcAccessor { - - private final BitVector vector; - private final NullableBitHolder holder; - - /** - * Constructor for the BitVectorAccessor. - * - * @param vector an instance of a {@link BitVector}. - * @param currentRowSupplier a supplier to check which row is being accessed. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcBitVectorAccessor( - BitVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.vector = vector; - this.holder = new NullableBitHolder(); - } - - @Override - public Class getObjectClass() { - return Boolean.class; - } - - @Override - public String getString() { - final boolean value = getBoolean(); - return wasNull ? null : Boolean.toString(value); - } - - @Override - public boolean getBoolean() { - return this.getLong() != 0; - } - - @Override - public byte getByte() { - return (byte) this.getLong(); - } - - @Override - public short getShort() { - return (short) this.getLong(); - } - - @Override - public int getInt() { - return (int) this.getLong(); - } - - @Override - public long getLong() { - vector.get(getCurrentRow(), holder); - - this.wasNull = holder.isSet == 0; - this.wasNullConsumer.setWasNull(this.wasNull); - if (this.wasNull) { - return 0; - } - - return holder.value; - } - - @Override - public float getFloat() { - return this.getLong(); - } - - @Override - public double getDouble() { - return this.getLong(); - } - - @Override - public BigDecimal getBigDecimal() { - final long value = this.getLong(); - - return this.wasNull ? null : BigDecimal.valueOf(value); - } - - @Override - public Object getObject() { - final boolean value = this.getBoolean(); - return this.wasNull ? null : value; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessor.java deleted file mode 100644 index 325e9bc700ad9..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessor.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; - -/** Accessor for {@link DecimalVector} and {@link Decimal256Vector}. */ -public class ArrowFlightJdbcDecimalVectorAccessor extends ArrowFlightJdbcAccessor { - - private final Getter getter; - - /** Functional interface used to unify Decimal*Vector#getObject implementations. */ - @FunctionalInterface - interface Getter { - BigDecimal getObject(int index); - } - - public ArrowFlightJdbcDecimalVectorAccessor( - DecimalVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.getter = vector::getObject; - } - - public ArrowFlightJdbcDecimalVectorAccessor( - Decimal256Vector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.getter = vector::getObject; - } - - @Override - public Class getObjectClass() { - return BigDecimal.class; - } - - @Override - public BigDecimal getBigDecimal() { - final BigDecimal value = getter.getObject(getCurrentRow()); - this.wasNull = value == null; - this.wasNullConsumer.setWasNull(this.wasNull); - return value; - } - - @Override - public String getString() { - final BigDecimal value = this.getBigDecimal(); - return this.wasNull ? null : value.toString(); - } - - @Override - public boolean getBoolean() { - final BigDecimal value = this.getBigDecimal(); - - return !this.wasNull && !value.equals(BigDecimal.ZERO); - } - - @Override - public byte getByte() { - final BigDecimal value = this.getBigDecimal(); - - return this.wasNull ? 0 : value.byteValue(); - } - - @Override - public short getShort() { - final BigDecimal value = this.getBigDecimal(); - - return this.wasNull ? 0 : value.shortValue(); - } - - @Override - public int getInt() { - final BigDecimal value = this.getBigDecimal(); - - return this.wasNull ? 0 : value.intValue(); - } - - @Override - public long getLong() { - final BigDecimal value = this.getBigDecimal(); - - return this.wasNull ? 0 : value.longValue(); - } - - @Override - public float getFloat() { - final BigDecimal value = this.getBigDecimal(); - - return this.wasNull ? 0 : value.floatValue(); - } - - @Override - public double getDouble() { - final BigDecimal value = this.getBigDecimal(); - - return this.wasNull ? 0 : value.doubleValue(); - } - - @Override - public BigDecimal getBigDecimal(int scale) { - final BigDecimal value = this.getBigDecimal(); - - return this.wasNull ? null : value.setScale(scale, RoundingMode.HALF_UP); - } - - @Override - public Object getObject() { - return this.getBigDecimal(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessor.java deleted file mode 100644 index 4d9d0d7261876..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessor.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.SQLException; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.holders.NullableFloat4Holder; - -/** Accessor for the Float4Vector. */ -public class ArrowFlightJdbcFloat4VectorAccessor extends ArrowFlightJdbcAccessor { - - private final Float4Vector vector; - private final NullableFloat4Holder holder; - - /** - * Instantiate a accessor for the {@link Float4Vector}. - * - * @param vector an instance of a Float4Vector. - * @param currentRowSupplier the supplier to track the lines. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcFloat4VectorAccessor( - Float4Vector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new NullableFloat4Holder(); - this.vector = vector; - } - - @Override - public Class getObjectClass() { - return Float.class; - } - - @Override - public String getString() { - final float value = this.getFloat(); - - return this.wasNull ? null : Float.toString(value); - } - - @Override - public boolean getBoolean() { - return this.getFloat() != 0.0; - } - - @Override - public byte getByte() { - return (byte) this.getFloat(); - } - - @Override - public short getShort() { - return (short) this.getFloat(); - } - - @Override - public int getInt() { - return (int) this.getFloat(); - } - - @Override - public long getLong() { - return (long) this.getFloat(); - } - - @Override - public float getFloat() { - vector.get(getCurrentRow(), holder); - - this.wasNull = holder.isSet == 0; - this.wasNullConsumer.setWasNull(this.wasNull); - if (this.wasNull) { - return 0; - } - - return holder.value; - } - - @Override - public double getDouble() { - return this.getFloat(); - } - - @Override - public BigDecimal getBigDecimal() throws SQLException { - final float value = this.getFloat(); - - if (Float.isInfinite(value) || Float.isNaN(value)) { - throw new SQLException("BigDecimal doesn't support Infinite/NaN."); - } - - return this.wasNull ? null : BigDecimal.valueOf(value); - } - - @Override - public BigDecimal getBigDecimal(int scale) throws SQLException { - final float value = this.getFloat(); - if (Float.isInfinite(value) || Float.isNaN(value)) { - throw new SQLException("BigDecimal doesn't support Infinite/NaN."); - } - return this.wasNull ? null : BigDecimal.valueOf(value).setScale(scale, RoundingMode.HALF_UP); - } - - @Override - public Object getObject() { - final float value = this.getFloat(); - return this.wasNull ? null : value; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessor.java deleted file mode 100644 index 10dbb1351949f..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessor.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.SQLException; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.holders.NullableFloat8Holder; - -/** Accessor for the Float8Vector. */ -public class ArrowFlightJdbcFloat8VectorAccessor extends ArrowFlightJdbcAccessor { - - private final Float8Vector vector; - private final NullableFloat8Holder holder; - - /** - * Instantiate a accessor for the {@link Float8Vector}. - * - * @param vector an instance of a Float8Vector. - * @param currentRowSupplier the supplier to track the lines. - * @param setCursorWasNull the consumer to set if value was null. - */ - public ArrowFlightJdbcFloat8VectorAccessor( - Float8Vector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.holder = new NullableFloat8Holder(); - this.vector = vector; - } - - @Override - public Class getObjectClass() { - return Double.class; - } - - @Override - public double getDouble() { - vector.get(getCurrentRow(), holder); - - this.wasNull = holder.isSet == 0; - this.wasNullConsumer.setWasNull(this.wasNull); - if (this.wasNull) { - return 0; - } - - return holder.value; - } - - @Override - public Object getObject() { - final double value = this.getDouble(); - - return this.wasNull ? null : value; - } - - @Override - public String getString() { - final double value = this.getDouble(); - return this.wasNull ? null : Double.toString(value); - } - - @Override - public boolean getBoolean() { - return this.getDouble() != 0.0; - } - - @Override - public byte getByte() { - return (byte) this.getDouble(); - } - - @Override - public short getShort() { - return (short) this.getDouble(); - } - - @Override - public int getInt() { - return (int) this.getDouble(); - } - - @Override - public long getLong() { - return (long) this.getDouble(); - } - - @Override - public float getFloat() { - return (float) this.getDouble(); - } - - @Override - public BigDecimal getBigDecimal() throws SQLException { - final double value = this.getDouble(); - if (Double.isInfinite(value) || Double.isNaN(value)) { - throw new SQLException("BigDecimal doesn't support Infinite/NaN."); - } - return this.wasNull ? null : BigDecimal.valueOf(value); - } - - @Override - public BigDecimal getBigDecimal(int scale) throws SQLException { - final double value = this.getDouble(); - if (Double.isInfinite(value) || Double.isNaN(value)) { - throw new SQLException("BigDecimal doesn't support Infinite/NaN."); - } - return this.wasNull ? null : BigDecimal.valueOf(value).setScale(scale, RoundingMode.HALF_UP); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcNumericGetter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcNumericGetter.java deleted file mode 100644 index 5e96cc9e0986b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcNumericGetter.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableSmallIntHolder; -import org.apache.arrow.vector.holders.NullableTinyIntHolder; -import org.apache.arrow.vector.holders.NullableUInt1Holder; -import org.apache.arrow.vector.holders.NullableUInt2Holder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.holders.NullableUInt8Holder; - -/** A custom getter for values from the {@link BaseIntVector}. */ -class ArrowFlightJdbcNumericGetter { - /** A holder for values from the {@link BaseIntVector}. */ - static class NumericHolder { - int isSet; // Tells if value is set; 0 = not set, 1 = set - long value; // Holds actual value - } - - /** Functional interface for a getter to baseInt values. */ - @FunctionalInterface - interface Getter { - void get(int index, NumericHolder holder); - } - - /** - * Main class that will check the type of the vector to create a specific getter. - * - * @param vector an instance of the {@link BaseIntVector} - * @return a getter. - */ - static Getter createGetter(BaseIntVector vector) { - if (vector instanceof UInt1Vector) { - return createGetter((UInt1Vector) vector); - } else if (vector instanceof UInt2Vector) { - return createGetter((UInt2Vector) vector); - } else if (vector instanceof UInt4Vector) { - return createGetter((UInt4Vector) vector); - } else if (vector instanceof UInt8Vector) { - return createGetter((UInt8Vector) vector); - } else if (vector instanceof TinyIntVector) { - return createGetter((TinyIntVector) vector); - } else if (vector instanceof SmallIntVector) { - return createGetter((SmallIntVector) vector); - } else if (vector instanceof IntVector) { - return createGetter((IntVector) vector); - } else if (vector instanceof BigIntVector) { - return createGetter((BigIntVector) vector); - } - - throw new UnsupportedOperationException("No valid IntVector was provided."); - } - - /** - * Create a specific getter for {@link UInt1Vector}. - * - * @param vector an instance of the {@link UInt1Vector} - * @return a getter. - */ - private static Getter createGetter(UInt1Vector vector) { - NullableUInt1Holder nullableUInt1Holder = new NullableUInt1Holder(); - - return (index, holder) -> { - vector.get(index, nullableUInt1Holder); - - holder.isSet = nullableUInt1Holder.isSet; - holder.value = nullableUInt1Holder.value; - }; - } - - /** - * Create a specific getter for {@link UInt2Vector}. - * - * @param vector an instance of the {@link UInt2Vector} - * @return a getter. - */ - private static Getter createGetter(UInt2Vector vector) { - NullableUInt2Holder nullableUInt2Holder = new NullableUInt2Holder(); - return (index, holder) -> { - vector.get(index, nullableUInt2Holder); - - holder.isSet = nullableUInt2Holder.isSet; - holder.value = nullableUInt2Holder.value; - }; - } - - /** - * Create a specific getter for {@link UInt4Vector}. - * - * @param vector an instance of the {@link UInt4Vector} - * @return a getter. - */ - private static Getter createGetter(UInt4Vector vector) { - NullableUInt4Holder nullableUInt4Holder = new NullableUInt4Holder(); - return (index, holder) -> { - vector.get(index, nullableUInt4Holder); - - holder.isSet = nullableUInt4Holder.isSet; - holder.value = nullableUInt4Holder.value; - }; - } - - /** - * Create a specific getter for {@link UInt8Vector}. - * - * @param vector an instance of the {@link UInt8Vector} - * @return a getter. - */ - private static Getter createGetter(UInt8Vector vector) { - NullableUInt8Holder nullableUInt8Holder = new NullableUInt8Holder(); - return (index, holder) -> { - vector.get(index, nullableUInt8Holder); - - holder.isSet = nullableUInt8Holder.isSet; - holder.value = nullableUInt8Holder.value; - }; - } - - /** - * Create a specific getter for {@link TinyIntVector}. - * - * @param vector an instance of the {@link TinyIntVector} - * @return a getter. - */ - private static Getter createGetter(TinyIntVector vector) { - NullableTinyIntHolder nullableTinyIntHolder = new NullableTinyIntHolder(); - return (index, holder) -> { - vector.get(index, nullableTinyIntHolder); - - holder.isSet = nullableTinyIntHolder.isSet; - holder.value = nullableTinyIntHolder.value; - }; - } - - /** - * Create a specific getter for {@link SmallIntVector}. - * - * @param vector an instance of the {@link SmallIntVector} - * @return a getter. - */ - private static Getter createGetter(SmallIntVector vector) { - NullableSmallIntHolder nullableSmallIntHolder = new NullableSmallIntHolder(); - return (index, holder) -> { - vector.get(index, nullableSmallIntHolder); - - holder.isSet = nullableSmallIntHolder.isSet; - holder.value = nullableSmallIntHolder.value; - }; - } - - /** - * Create a specific getter for {@link IntVector}. - * - * @param vector an instance of the {@link IntVector} - * @return a getter. - */ - private static Getter createGetter(IntVector vector) { - NullableIntHolder nullableIntHolder = new NullableIntHolder(); - return (index, holder) -> { - vector.get(index, nullableIntHolder); - - holder.isSet = nullableIntHolder.isSet; - holder.value = nullableIntHolder.value; - }; - } - - /** - * Create a specific getter for {@link BigIntVector}. - * - * @param vector an instance of the {@link BigIntVector} - * @return a getter. - */ - private static Getter createGetter(BigIntVector vector) { - NullableBigIntHolder nullableBigIntHolder = new NullableBigIntHolder(); - return (index, holder) -> { - vector.get(index, nullableBigIntHolder); - - holder.isSet = nullableBigIntHolder.isSet; - holder.value = nullableBigIntHolder.value; - }; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java deleted file mode 100644 index ebebf6ca747ce..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessor.java +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.text; - -import static java.nio.charset.StandardCharsets.US_ASCII; - -import java.io.ByteArrayInputStream; -import java.io.CharArrayReader; -import java.io.InputStream; -import java.io.Reader; -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.sql.Date; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Timestamp; -import java.util.Calendar; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.DateTimeUtils; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.util.Text; - -/** Accessor for the Arrow types: {@link VarCharVector} and {@link LargeVarCharVector}. */ -public class ArrowFlightJdbcVarCharVectorAccessor extends ArrowFlightJdbcAccessor { - - /** Functional interface to help integrating VarCharVector and LargeVarCharVector. */ - @FunctionalInterface - interface Getter { - byte[] get(int index); - } - - private final Getter getter; - - public ArrowFlightJdbcVarCharVectorAccessor( - VarCharVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector::get, currentRowSupplier, setCursorWasNull); - } - - public ArrowFlightJdbcVarCharVectorAccessor( - LargeVarCharVector vector, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - this(vector::get, currentRowSupplier, setCursorWasNull); - } - - ArrowFlightJdbcVarCharVectorAccessor( - Getter getter, - IntSupplier currentRowSupplier, - ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) { - super(currentRowSupplier, setCursorWasNull); - this.getter = getter; - } - - @Override - public Class getObjectClass() { - return String.class; - } - - @Override - public String getObject() { - final byte[] bytes = getBytes(); - return bytes == null ? null : new String(bytes, StandardCharsets.UTF_8); - } - - @Override - public String getString() { - return getObject(); - } - - @Override - public byte[] getBytes() { - final byte[] bytes = this.getter.get(getCurrentRow()); - this.wasNull = bytes == null; - this.wasNullConsumer.setWasNull(this.wasNull); - return bytes; - } - - @Override - public boolean getBoolean() throws SQLException { - String value = getString(); - if (value == null || value.equalsIgnoreCase("false") || value.equals("0")) { - return false; - } else if (value.equalsIgnoreCase("true") || value.equals("1")) { - return true; - } else { - throw new SQLException("It is not possible to convert this value to boolean: " + value); - } - } - - @Override - public byte getByte() throws SQLException { - try { - return Byte.parseByte(this.getString()); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public short getShort() throws SQLException { - try { - return Short.parseShort(this.getString()); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public int getInt() throws SQLException { - try { - return Integer.parseInt(this.getString()); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public long getLong() throws SQLException { - try { - return Long.parseLong(this.getString()); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public float getFloat() throws SQLException { - try { - return Float.parseFloat(this.getString()); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public double getDouble() throws SQLException { - try { - return Double.parseDouble(this.getString()); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public BigDecimal getBigDecimal() throws SQLException { - try { - return new BigDecimal(this.getString()); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public BigDecimal getBigDecimal(int i) throws SQLException { - try { - return BigDecimal.valueOf(this.getLong(), i); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public InputStream getAsciiStream() { - final String textValue = getString(); - if (textValue == null) { - return null; - } - // Already in UTF-8 - return new ByteArrayInputStream(textValue.getBytes(US_ASCII)); - } - - @Override - public InputStream getUnicodeStream() { - final byte[] value = getBytes(); - if (value == null) { - return null; - } - - // Already in UTF-8 - final Text textValue = new Text(value); - return new ByteArrayInputStream(textValue.getBytes(), 0, (int) textValue.getLength()); - } - - @Override - public Reader getCharacterStream() { - return new CharArrayReader(getString().toCharArray()); - } - - @Override - public Date getDate(Calendar calendar) throws SQLException { - try { - Date date = Date.valueOf(getString()); - if (calendar == null) { - return date; - } - - // Use Calendar to apply time zone's offset - long milliseconds = date.getTime(); - return new Date(DateTimeUtils.applyCalendarOffset(milliseconds, calendar)); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public Time getTime(Calendar calendar) throws SQLException { - try { - Time time = Time.valueOf(getString()); - if (calendar == null) { - return time; - } - - // Use Calendar to apply time zone's offset - long milliseconds = time.getTime(); - return new Time(DateTimeUtils.applyCalendarOffset(milliseconds, calendar)); - } catch (Exception e) { - throw new SQLException(e); - } - } - - @Override - public Timestamp getTimestamp(Calendar calendar) throws SQLException { - try { - Timestamp timestamp = Timestamp.valueOf(getString()); - if (calendar == null) { - return timestamp; - } - - // Use Calendar to apply time zone's offset - long milliseconds = timestamp.getTime(); - return new Timestamp(DateTimeUtils.applyCalendarOffset(milliseconds, calendar)); - } catch (Exception e) { - throw new SQLException(e); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java deleted file mode 100644 index 0e9c79a0907a5..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java +++ /dev/null @@ -1,926 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.client; - -import com.google.common.collect.ImmutableMap; -import java.io.IOException; -import java.net.URI; -import java.security.GeneralSecurityException; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import org.apache.arrow.driver.jdbc.client.utils.ClientAuthenticationUtils; -import org.apache.arrow.flight.CallOption; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.CloseSessionRequest; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightClientMiddleware; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightStatusCode; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.LocationSchemes; -import org.apache.arrow.flight.SessionOptionValue; -import org.apache.arrow.flight.SessionOptionValueFactory; -import org.apache.arrow.flight.SetSessionOptionsRequest; -import org.apache.arrow.flight.SetSessionOptionsResult; -import org.apache.arrow.flight.auth2.BearerCredentialWriter; -import org.apache.arrow.flight.auth2.ClientBearerHeaderHandler; -import org.apache.arrow.flight.auth2.ClientIncomingAuthHeaderMiddleware; -import org.apache.arrow.flight.client.ClientCookieMiddleware; -import org.apache.arrow.flight.grpc.CredentialCallOption; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlInfo; -import org.apache.arrow.flight.sql.util.TableRef; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.util.VisibleForTesting; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.calcite.avatica.Meta.StatementType; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** A {@link FlightSqlClient} handler. */ -public final class ArrowFlightSqlClientHandler implements AutoCloseable { - private static final Logger LOGGER = LoggerFactory.getLogger(ArrowFlightSqlClientHandler.class); - // JDBC connection string query parameter - private static final String CATALOG = "catalog"; - - private final FlightSqlClient sqlClient; - private final Set options = new HashSet<>(); - private final Builder builder; - private final Optional catalog; - - ArrowFlightSqlClientHandler( - final FlightSqlClient sqlClient, - final Builder builder, - final Collection credentialOptions, - final Optional catalog) { - this.options.addAll(builder.options); - this.options.addAll(credentialOptions); - this.sqlClient = Preconditions.checkNotNull(sqlClient); - this.builder = builder; - this.catalog = catalog; - } - - /** - * Creates a new {@link ArrowFlightSqlClientHandler} from the provided {@code client} and {@code - * options}. - * - * @param client the {@link FlightClient} to manage under a {@link FlightSqlClient} wrapper. - * @param options the {@link CallOption}s to persist in between subsequent client calls. - * @return a new {@link ArrowFlightSqlClientHandler}. - */ - static ArrowFlightSqlClientHandler createNewHandler( - final FlightClient client, - final Builder builder, - final Collection options, - final Optional catalog) { - final ArrowFlightSqlClientHandler handler = - new ArrowFlightSqlClientHandler(new FlightSqlClient(client), builder, options, catalog); - handler.setSetCatalogInSessionIfPresent(); - return handler; - } - - /** - * Gets the {@link #options} for the subsequent calls from this handler. - * - * @return the {@link CallOption}s. - */ - private CallOption[] getOptions() { - return options.toArray(new CallOption[0]); - } - - /** - * Makes an RPC "getStream" request based on the provided {@link FlightInfo} object. Retrieves the - * result of the query previously prepared with "getInfo." - * - * @param flightInfo The {@link FlightInfo} instance from which to fetch results. - * @return a {@code FlightStream} of results. - */ - public List getStreams(final FlightInfo flightInfo) - throws SQLException { - final ArrayList endpoints = - new ArrayList<>(flightInfo.getEndpoints().size()); - - try { - for (FlightEndpoint endpoint : flightInfo.getEndpoints()) { - if (endpoint.getLocations().isEmpty()) { - // Create a stream using the current client only and do not close the client at the end. - endpoints.add( - new CloseableEndpointStreamPair( - sqlClient.getStream(endpoint.getTicket(), getOptions()), null)); - } else { - // Clone the builder and then set the new endpoint on it. - - // GH-38574: Currently a new FlightClient will be made for each partition that returns a - // non-empty Location - // then disposed of. It may be better to cache clients because a server may report the - // same Locations. - // It would also be good to identify when the reported location is the same as the - // original connection's - // Location and skip creating a FlightClient in that scenario. - List exceptions = new ArrayList<>(); - CloseableEndpointStreamPair stream = null; - for (Location location : endpoint.getLocations()) { - final URI endpointUri = location.getUri(); - if (endpointUri.getScheme().equals(LocationSchemes.REUSE_CONNECTION)) { - stream = - new CloseableEndpointStreamPair( - sqlClient.getStream(endpoint.getTicket(), getOptions()), null); - break; - } - final Builder builderForEndpoint = - new Builder(ArrowFlightSqlClientHandler.this.builder) - .withHost(endpointUri.getHost()) - .withPort(endpointUri.getPort()) - .withEncryption(endpointUri.getScheme().equals(LocationSchemes.GRPC_TLS)); - - ArrowFlightSqlClientHandler endpointHandler = null; - try { - endpointHandler = builderForEndpoint.build(); - stream = - new CloseableEndpointStreamPair( - endpointHandler.sqlClient.getStream( - endpoint.getTicket(), endpointHandler.getOptions()), - endpointHandler.sqlClient); - // Make sure we actually get data from the server - stream.getStream().getSchema(); - } catch (Exception ex) { - if (endpointHandler != null) { - AutoCloseables.close(endpointHandler); - } - exceptions.add(ex); - continue; - } - break; - } - if (stream != null) { - endpoints.add(stream); - } else if (exceptions.isEmpty()) { - // This should never happen... - throw new IllegalStateException("Could not connect to endpoint and no errors occurred"); - } else { - Exception ex = exceptions.remove(0); - while (!exceptions.isEmpty()) { - ex.addSuppressed(exceptions.remove(exceptions.size() - 1)); - } - throw ex; - } - } - } - } catch (Exception outerException) { - try { - AutoCloseables.close(endpoints); - } catch (Exception innerEx) { - outerException.addSuppressed(innerEx); - } - - if (outerException instanceof SQLException) { - throw (SQLException) outerException; - } - throw new SQLException(outerException); - } - return endpoints; - } - - /** - * Makes an RPC "getInfo" request based on the provided {@code query} object. - * - * @param query The query. - * @return a {@code FlightStream} of results. - */ - public FlightInfo getInfo(final String query) { - return sqlClient.execute(query, getOptions()); - } - - @Override - public void close() throws SQLException { - if (catalog.isPresent()) { - sqlClient.closeSession(new CloseSessionRequest(), getOptions()); - } - try { - AutoCloseables.close(sqlClient); - } catch (final Exception e) { - throw new SQLException("Failed to clean up client resources.", e); - } - } - - /** A prepared statement handler. */ - public interface PreparedStatement extends AutoCloseable { - /** - * Executes this {@link PreparedStatement}. - * - * @return the {@link FlightInfo} representing the outcome of this query execution. - * @throws SQLException on error. - */ - FlightInfo executeQuery() throws SQLException; - - /** - * Executes a {@link StatementType#UPDATE} query. - * - * @return the number of rows affected. - */ - long executeUpdate(); - - /** - * Gets the {@link StatementType} of this {@link PreparedStatement}. - * - * @return the Statement Type. - */ - StatementType getType(); - - /** - * Gets the {@link Schema} of this {@link PreparedStatement}. - * - * @return {@link Schema}. - */ - Schema getDataSetSchema(); - - /** - * Gets the {@link Schema} of the parameters for this {@link PreparedStatement}. - * - * @return {@link Schema}. - */ - Schema getParameterSchema(); - - void setParameters(VectorSchemaRoot parameters); - - @Override - void close(); - } - - /** A connection is created with catalog set as a session option. */ - private void setSetCatalogInSessionIfPresent() { - if (catalog.isPresent()) { - final SetSessionOptionsRequest setSessionOptionRequest = - new SetSessionOptionsRequest( - ImmutableMap.builder() - .put(CATALOG, SessionOptionValueFactory.makeSessionOptionValue(catalog.get())) - .build()); - final SetSessionOptionsResult result = - sqlClient.setSessionOptions(setSessionOptionRequest, getOptions()); - - if (result.hasErrors()) { - Map errors = result.getErrors(); - for (Map.Entry error : errors.entrySet()) { - LOGGER.warn(error.toString()); - } - throw CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Cannot set session option for catalog = %s. Check log for details.", catalog)) - .toRuntimeException(); - } - } - } - - /** - * Creates a new {@link PreparedStatement} for the given {@code query}. - * - * @param query the SQL query. - * @return a new prepared statement. - */ - public PreparedStatement prepare(final String query) { - final FlightSqlClient.PreparedStatement preparedStatement = - sqlClient.prepare(query, getOptions()); - return new PreparedStatement() { - @Override - public FlightInfo executeQuery() throws SQLException { - return preparedStatement.execute(getOptions()); - } - - @Override - public long executeUpdate() { - return preparedStatement.executeUpdate(getOptions()); - } - - @Override - public StatementType getType() { - final Schema schema = preparedStatement.getResultSetSchema(); - return schema.getFields().isEmpty() ? StatementType.UPDATE : StatementType.SELECT; - } - - @Override - public Schema getDataSetSchema() { - return preparedStatement.getResultSetSchema(); - } - - @Override - public Schema getParameterSchema() { - return preparedStatement.getParameterSchema(); - } - - @Override - public void setParameters(VectorSchemaRoot parameters) { - preparedStatement.setParameters(parameters); - } - - @Override - public void close() { - try { - preparedStatement.close(getOptions()); - } catch (FlightRuntimeException fre) { - // ARROW-17785: suppress exceptions caused by flaky gRPC layer - if (fre.status().code().equals(FlightStatusCode.UNAVAILABLE) - || (fre.status().code().equals(FlightStatusCode.INTERNAL) - && fre.getMessage().contains("Connection closed after GOAWAY"))) { - LOGGER.warn("Supressed error closing PreparedStatement", fre); - return; - } - throw fre; - } - } - }; - } - - /** - * Makes an RPC "getCatalogs" request. - * - * @return a {@code FlightStream} of results. - */ - public FlightInfo getCatalogs() { - return sqlClient.getCatalogs(getOptions()); - } - - /** - * Makes an RPC "getImportedKeys" request based on the provided info. - * - * @param catalog The catalog name. Must match the catalog name as it is stored in the database. - * Retrieves those without a catalog. Null means that the catalog name should not be used to - * narrow the search. - * @param schema The schema name. Must match the schema name as it is stored in the database. "" - * retrieves those without a schema. Null means that the schema name should not be used to - * narrow the search. - * @param table The table name. Must match the table name as it is stored in the database. - * @return a {@code FlightStream} of results. - */ - public FlightInfo getImportedKeys(final String catalog, final String schema, final String table) { - return sqlClient.getImportedKeys(TableRef.of(catalog, schema, table), getOptions()); - } - - /** - * Makes an RPC "getExportedKeys" request based on the provided info. - * - * @param catalog The catalog name. Must match the catalog name as it is stored in the database. - * Retrieves those without a catalog. Null means that the catalog name should not be used to - * narrow the search. - * @param schema The schema name. Must match the schema name as it is stored in the database. "" - * retrieves those without a schema. Null means that the schema name should not be used to - * narrow the search. - * @param table The table name. Must match the table name as it is stored in the database. - * @return a {@code FlightStream} of results. - */ - public FlightInfo getExportedKeys(final String catalog, final String schema, final String table) { - return sqlClient.getExportedKeys(TableRef.of(catalog, schema, table), getOptions()); - } - - /** - * Makes an RPC "getSchemas" request based on the provided info. - * - * @param catalog The catalog name. Must match the catalog name as it is stored in the database. - * Retrieves those without a catalog. Null means that the catalog name should not be used to - * narrow the search. - * @param schemaPattern The schema name pattern. Must match the schema name as it is stored in the - * database. Null means that schema name should not be used to narrow down the search. - * @return a {@code FlightStream} of results. - */ - public FlightInfo getSchemas(final String catalog, final String schemaPattern) { - return sqlClient.getSchemas(catalog, schemaPattern, getOptions()); - } - - /** - * Makes an RPC "getTableTypes" request. - * - * @return a {@code FlightStream} of results. - */ - public FlightInfo getTableTypes() { - return sqlClient.getTableTypes(getOptions()); - } - - /** - * Makes an RPC "getTables" request based on the provided info. - * - * @param catalog The catalog name. Must match the catalog name as it is stored in the database. - * Retrieves those without a catalog. Null means that the catalog name should not be used to - * narrow the search. - * @param schemaPattern The schema name pattern. Must match the schema name as it is stored in the - * database. "" retrieves those without a schema. Null means that the schema name should not - * be used to narrow the search. - * @param tableNamePattern The table name pattern. Must match the table name as it is stored in - * the database. - * @param types The list of table types, which must be from the list of table types to include. - * Null returns all types. - * @param includeSchema Whether to include schema. - * @return a {@code FlightStream} of results. - */ - public FlightInfo getTables( - final String catalog, - final String schemaPattern, - final String tableNamePattern, - final List types, - final boolean includeSchema) { - - return sqlClient.getTables( - catalog, schemaPattern, tableNamePattern, types, includeSchema, getOptions()); - } - - /** - * Gets SQL info. - * - * @return the SQL info. - */ - public FlightInfo getSqlInfo(SqlInfo... info) { - return sqlClient.getSqlInfo(info, getOptions()); - } - - /** - * Makes an RPC "getPrimaryKeys" request based on the provided info. - * - * @param catalog The catalog name; must match the catalog name as it is stored in the database. - * "" retrieves those without a catalog. Null means that the catalog name should not be used - * to narrow the search. - * @param schema The schema name; must match the schema name as it is stored in the database. "" - * retrieves those without a schema. Null means that the schema name should not be used to - * narrow the search. - * @param table The table name. Must match the table name as it is stored in the database. - * @return a {@code FlightStream} of results. - */ - public FlightInfo getPrimaryKeys(final String catalog, final String schema, final String table) { - return sqlClient.getPrimaryKeys(TableRef.of(catalog, schema, table), getOptions()); - } - - /** - * Makes an RPC "getCrossReference" request based on the provided info. - * - * @param pkCatalog The catalog name. Must match the catalog name as it is stored in the database. - * Retrieves those without a catalog. Null means that the catalog name should not be used to - * narrow the search. - * @param pkSchema The schema name. Must match the schema name as it is stored in the database. "" - * retrieves those without a schema. Null means that the schema name should not be used to - * narrow the search. - * @param pkTable The table name. Must match the table name as it is stored in the database. - * @param fkCatalog The catalog name. Must match the catalog name as it is stored in the database. - * Retrieves those without a catalog. Null means that the catalog name should not be used to - * narrow the search. - * @param fkSchema The schema name. Must match the schema name as it is stored in the database. "" - * retrieves those without a schema. Null means that the schema name should not be used to - * narrow the search. - * @param fkTable The table name. Must match the table name as it is stored in the database. - * @return a {@code FlightStream} of results. - */ - public FlightInfo getCrossReference( - String pkCatalog, - String pkSchema, - String pkTable, - String fkCatalog, - String fkSchema, - String fkTable) { - return sqlClient.getCrossReference( - TableRef.of(pkCatalog, pkSchema, pkTable), - TableRef.of(fkCatalog, fkSchema, fkTable), - getOptions()); - } - - /** Builder for {@link ArrowFlightSqlClientHandler}. */ - public static final class Builder { - private final Set middlewareFactories = new HashSet<>(); - private final Set options = new HashSet<>(); - private String host; - private int port; - - @VisibleForTesting String username; - - @VisibleForTesting String password; - - @VisibleForTesting String trustStorePath; - - @VisibleForTesting String trustStorePassword; - - @VisibleForTesting String token; - - @VisibleForTesting boolean useEncryption = true; - - @VisibleForTesting boolean disableCertificateVerification; - - @VisibleForTesting boolean useSystemTrustStore = true; - - @VisibleForTesting String tlsRootCertificatesPath; - - @VisibleForTesting String clientCertificatePath; - - @VisibleForTesting String clientKeyPath; - - @VisibleForTesting private BufferAllocator allocator; - - @VisibleForTesting boolean retainCookies = true; - - @VisibleForTesting boolean retainAuth = true; - - @VisibleForTesting Optional catalog = Optional.empty(); - - // These two middleware are for internal use within build() and should not be exposed by builder - // APIs. - // Note that these middleware may not necessarily be registered. - @VisibleForTesting - ClientIncomingAuthHeaderMiddleware.Factory authFactory = - new ClientIncomingAuthHeaderMiddleware.Factory(new ClientBearerHeaderHandler()); - - @VisibleForTesting - ClientCookieMiddleware.Factory cookieFactory = new ClientCookieMiddleware.Factory(); - - public Builder() {} - - /** - * Copies the builder. - * - * @param original The builder to base this copy off of. - */ - @VisibleForTesting - Builder(Builder original) { - this.middlewareFactories.addAll(original.middlewareFactories); - this.options.addAll(original.options); - this.host = original.host; - this.port = original.port; - this.username = original.username; - this.password = original.password; - this.trustStorePath = original.trustStorePath; - this.trustStorePassword = original.trustStorePassword; - this.token = original.token; - this.useEncryption = original.useEncryption; - this.disableCertificateVerification = original.disableCertificateVerification; - this.useSystemTrustStore = original.useSystemTrustStore; - this.tlsRootCertificatesPath = original.tlsRootCertificatesPath; - this.clientCertificatePath = original.clientCertificatePath; - this.clientKeyPath = original.clientKeyPath; - this.allocator = original.allocator; - this.catalog = original.catalog; - - if (original.retainCookies) { - this.cookieFactory = original.cookieFactory; - } - - if (original.retainAuth) { - this.authFactory = original.authFactory; - } - } - - /** - * Sets the host for this handler. - * - * @param host the host. - * @return this instance. - */ - public Builder withHost(final String host) { - this.host = host; - return this; - } - - /** - * Sets the port for this handler. - * - * @param port the port. - * @return this instance. - */ - public Builder withPort(final int port) { - this.port = port; - return this; - } - - /** - * Sets the username for this handler. - * - * @param username the username. - * @return this instance. - */ - public Builder withUsername(final String username) { - this.username = username; - return this; - } - - /** - * Sets the password for this handler. - * - * @param password the password. - * @return this instance. - */ - public Builder withPassword(final String password) { - this.password = password; - return this; - } - - /** - * Sets the KeyStore path for this handler. - * - * @param trustStorePath the KeyStore path. - * @return this instance. - */ - public Builder withTrustStorePath(final String trustStorePath) { - this.trustStorePath = trustStorePath; - return this; - } - - /** - * Sets the KeyStore password for this handler. - * - * @param trustStorePassword the KeyStore password. - * @return this instance. - */ - public Builder withTrustStorePassword(final String trustStorePassword) { - this.trustStorePassword = trustStorePassword; - return this; - } - - /** - * Sets whether to use TLS encryption in this handler. - * - * @param useEncryption whether to use TLS encryption. - * @return this instance. - */ - public Builder withEncryption(final boolean useEncryption) { - this.useEncryption = useEncryption; - return this; - } - - /** - * Sets whether to disable the certificate verification in this handler. - * - * @param disableCertificateVerification whether to disable certificate verification. - * @return this instance. - */ - public Builder withDisableCertificateVerification( - final boolean disableCertificateVerification) { - this.disableCertificateVerification = disableCertificateVerification; - return this; - } - - /** - * Sets whether to use the certificates from the operating system. - * - * @param useSystemTrustStore whether to use the system operating certificates. - * @return this instance. - */ - public Builder withSystemTrustStore(final boolean useSystemTrustStore) { - this.useSystemTrustStore = useSystemTrustStore; - return this; - } - - /** - * Sets the TLS root certificate path as an alternative to using the System or other Trust - * Store. The path must contain a valid PEM file. - * - * @param tlsRootCertificatesPath the TLS root certificate path (if TLS is required). - * @return this instance. - */ - public Builder withTlsRootCertificates(final String tlsRootCertificatesPath) { - this.tlsRootCertificatesPath = tlsRootCertificatesPath; - return this; - } - - /** - * Sets the mTLS client certificate path (if mTLS is required). - * - * @param clientCertificatePath the mTLS client certificate path (if mTLS is required). - * @return this instance. - */ - public Builder withClientCertificate(final String clientCertificatePath) { - this.clientCertificatePath = clientCertificatePath; - return this; - } - - /** - * Sets the mTLS client certificate private key path (if mTLS is required). - * - * @param clientKeyPath the mTLS client certificate private key path (if mTLS is required). - * @return this instance. - */ - public Builder withClientKey(final String clientKeyPath) { - this.clientKeyPath = clientKeyPath; - return this; - } - - /** - * Sets the token used in the token authentication. - * - * @param token the token value. - * @return this builder instance. - */ - public Builder withToken(final String token) { - this.token = token; - return this; - } - - /** - * Sets the {@link BufferAllocator} to use in this handler. - * - * @param allocator the allocator. - * @return this instance. - */ - public Builder withBufferAllocator(final BufferAllocator allocator) { - this.allocator = - allocator.newChildAllocator("ArrowFlightSqlClientHandler", 0, allocator.getLimit()); - return this; - } - - /** - * Indicates if cookies should be re-used by connections spawned for getStreams() calls. - * - * @param retainCookies The flag indicating if cookies should be re-used. - * @return this builder instance. - */ - public Builder withRetainCookies(boolean retainCookies) { - this.retainCookies = retainCookies; - return this; - } - - /** - * Indicates if bearer tokens negotiated should be re-used by connections spawned for - * getStreams() calls. - * - * @param retainAuth The flag indicating if auth tokens should be re-used. - * @return this builder instance. - */ - public Builder withRetainAuth(boolean retainAuth) { - this.retainAuth = retainAuth; - return this; - } - - /** - * Adds the provided {@code factories} to the list of {@link #middlewareFactories} of this - * handler. - * - * @param factories the factories to add. - * @return this instance. - */ - public Builder withMiddlewareFactories(final FlightClientMiddleware.Factory... factories) { - return withMiddlewareFactories(Arrays.asList(factories)); - } - - /** - * Adds the provided {@code factories} to the list of {@link #middlewareFactories} of this - * handler. - * - * @param factories the factories to add. - * @return this instance. - */ - public Builder withMiddlewareFactories( - final Collection factories) { - this.middlewareFactories.addAll(factories); - return this; - } - - /** - * Adds the provided {@link CallOption}s to this handler. - * - * @param options the options - * @return this instance. - */ - public Builder withCallOptions(final CallOption... options) { - return withCallOptions(Arrays.asList(options)); - } - - /** - * Adds the provided {@link CallOption}s to this handler. - * - * @param options the options - * @return this instance. - */ - public Builder withCallOptions(final Collection options) { - this.options.addAll(options); - return this; - } - - /** - * Sets the catalog for this handler if it is not null. - * - * @param catalog the catalog - * @return this instance. - */ - public Builder withCatalog(@Nullable final String catalog) { - this.catalog = Optional.ofNullable(catalog); - return this; - } - - /** - * Builds a new {@link ArrowFlightSqlClientHandler} from the provided fields. - * - * @return a new client handler. - * @throws SQLException on error. - */ - public ArrowFlightSqlClientHandler build() throws SQLException { - // Copy middleware so that the build method doesn't change the state of the builder fields - // itself. - Set buildTimeMiddlewareFactories = - new HashSet<>(this.middlewareFactories); - FlightClient client = null; - boolean isUsingUserPasswordAuth = username != null && token == null; - - try { - // Token should take priority since some apps pass in a username/password even when a token - // is provided - if (isUsingUserPasswordAuth) { - buildTimeMiddlewareFactories.add(authFactory); - } - final FlightClient.Builder clientBuilder = FlightClient.builder().allocator(allocator); - - buildTimeMiddlewareFactories.add(new ClientCookieMiddleware.Factory()); - buildTimeMiddlewareFactories.forEach(clientBuilder::intercept); - Location location; - if (useEncryption) { - location = Location.forGrpcTls(host, port); - clientBuilder.useTls(); - } else { - location = Location.forGrpcInsecure(host, port); - } - clientBuilder.location(location); - - if (useEncryption) { - if (disableCertificateVerification) { - clientBuilder.verifyServer(false); - } else { - if (tlsRootCertificatesPath != null) { - clientBuilder.trustedCertificates( - ClientAuthenticationUtils.getTlsRootCertificatesStream(tlsRootCertificatesPath)); - } else if (useSystemTrustStore) { - clientBuilder.trustedCertificates( - ClientAuthenticationUtils.getCertificateInputStreamFromSystem( - trustStorePassword)); - } else if (trustStorePath != null) { - clientBuilder.trustedCertificates( - ClientAuthenticationUtils.getCertificateStream( - trustStorePath, trustStorePassword)); - } - } - - if (clientCertificatePath != null && clientKeyPath != null) { - clientBuilder.clientCertificate( - ClientAuthenticationUtils.getClientCertificateStream(clientCertificatePath), - ClientAuthenticationUtils.getClientKeyStream(clientKeyPath)); - } - } - - client = clientBuilder.build(); - final ArrayList credentialOptions = new ArrayList<>(); - if (isUsingUserPasswordAuth) { - // If the authFactory has already been used for a handshake, use the existing token. - // This can occur if the authFactory is being re-used for a new connection spawned for - // getStream(). - if (authFactory.getCredentialCallOption() != null) { - credentialOptions.add(authFactory.getCredentialCallOption()); - } else { - // Otherwise do the handshake and get the token if possible. - credentialOptions.add( - ClientAuthenticationUtils.getAuthenticate( - client, username, password, authFactory, options.toArray(new CallOption[0]))); - } - } else if (token != null) { - credentialOptions.add( - ClientAuthenticationUtils.getAuthenticate( - client, - new CredentialCallOption(new BearerCredentialWriter(token)), - options.toArray(new CallOption[0]))); - } - return ArrowFlightSqlClientHandler.createNewHandler( - client, this, credentialOptions, catalog); - - } catch (final IllegalArgumentException - | GeneralSecurityException - | IOException - | FlightRuntimeException e) { - final SQLException originalException = new SQLException(e); - if (client != null) { - try { - client.close(); - } catch (final InterruptedException interruptedException) { - originalException.addSuppressed(interruptedException); - } - } - throw originalException; - } - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/CloseableEndpointStreamPair.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/CloseableEndpointStreamPair.java deleted file mode 100644 index 9a5f4db468d5c..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/CloseableEndpointStreamPair.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.client; - -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; - -/** Represents a connection to a {@link org.apache.arrow.flight.FlightEndpoint}. */ -public class CloseableEndpointStreamPair implements AutoCloseable { - - private final FlightStream stream; - private final FlightSqlClient client; - - public CloseableEndpointStreamPair(FlightStream stream, FlightSqlClient client) { - this.stream = Preconditions.checkNotNull(stream); - this.client = client; - } - - public FlightStream getStream() { - return stream; - } - - @Override - public void close() throws Exception { - AutoCloseables.close(stream, client); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java deleted file mode 100644 index 8b3c3a9ed350c..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java +++ /dev/null @@ -1,310 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.client.utils; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringWriter; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.security.GeneralSecurityException; -import java.security.KeyStore; -import java.security.KeyStoreException; -import java.security.NoSuchAlgorithmException; -import java.security.cert.Certificate; -import java.security.cert.CertificateException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Enumeration; -import java.util.List; -import org.apache.arrow.flight.CallOption; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.auth2.BasicAuthCredentialWriter; -import org.apache.arrow.flight.auth2.ClientIncomingAuthHeaderMiddleware; -import org.apache.arrow.flight.grpc.CredentialCallOption; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.util.VisibleForTesting; -import org.bouncycastle.openssl.jcajce.JcaPEMWriter; - -/** Utils for {@link FlightClientHandler} authentication. */ -public final class ClientAuthenticationUtils { - - private ClientAuthenticationUtils() { - // Prevent instantiation. - } - - /** - * Gets the {@link CredentialCallOption} for the provided authentication info. - * - * @param client the client. - * @param credential the credential as CallOptions. - * @param options the {@link CallOption}s to use. - * @return the credential call option. - */ - public static CredentialCallOption getAuthenticate( - final FlightClient client, - final CredentialCallOption credential, - final CallOption... options) { - - final List theseOptions = new ArrayList<>(); - theseOptions.add(credential); - theseOptions.addAll(Arrays.asList(options)); - client.handshake(theseOptions.toArray(new CallOption[0])); - - return (CredentialCallOption) theseOptions.get(0); - } - - /** - * Gets the {@link CredentialCallOption} for the provided authentication info. - * - * @param client the client. - * @param username the username. - * @param password the password. - * @param factory the {@link ClientIncomingAuthHeaderMiddleware.Factory} to use. - * @param options the {@link CallOption}s to use. - * @return the credential call option. - */ - public static CredentialCallOption getAuthenticate( - final FlightClient client, - final String username, - final String password, - final ClientIncomingAuthHeaderMiddleware.Factory factory, - final CallOption... options) { - - return getAuthenticate( - client, - new CredentialCallOption(new BasicAuthCredentialWriter(username, password)), - factory, - options); - } - - private static CredentialCallOption getAuthenticate( - final FlightClient client, - final CredentialCallOption token, - final ClientIncomingAuthHeaderMiddleware.Factory factory, - final CallOption... options) { - final List theseOptions = new ArrayList<>(); - theseOptions.add(token); - theseOptions.addAll(Arrays.asList(options)); - client.handshake(theseOptions.toArray(new CallOption[0])); - return factory.getCredentialCallOption(); - } - - @VisibleForTesting - static KeyStore getKeyStoreInstance(String instance) - throws KeyStoreException, CertificateException, IOException, NoSuchAlgorithmException { - KeyStore keyStore = KeyStore.getInstance(instance); - keyStore.load(null, null); - - return keyStore; - } - - @VisibleForTesting - static KeyStore getDefaultKeyStoreInstance(String password) - throws KeyStoreException, CertificateException, NoSuchAlgorithmException, IOException { - try (InputStream fileInputStream = getKeystoreInputStream()) { - KeyStore keyStore = KeyStore.getInstance(KeyStore.getDefaultType()); - keyStore.load(fileInputStream, password == null ? null : password.toCharArray()); - return keyStore; - } - } - - static String getOperatingSystem() { - return System.getProperty("os.name"); - } - - /** - * Check if the operating system running the software is Windows. - * - * @return whether is the windows system. - */ - public static boolean isWindows() { - return getOperatingSystem().contains("Windows"); - } - - /** - * Check if the operating system running the software is Mac. - * - * @return whether is the mac system. - */ - public static boolean isMac() { - return getOperatingSystem().contains("Mac"); - } - - /** - * It gets the trusted certificate based on the operating system and loads all the certificate - * into a {@link InputStream}. - * - * @return An input stream with all the certificates. - * @throws KeyStoreException if a key store could not be loaded. - * @throws CertificateException if a certificate could not be found. - * @throws IOException if it fails reading the file. - */ - public static InputStream getCertificateInputStreamFromSystem(String password) - throws KeyStoreException, CertificateException, IOException, NoSuchAlgorithmException { - - List keyStoreList = new ArrayList<>(); - if (isWindows()) { - keyStoreList.add(getKeyStoreInstance("Windows-ROOT")); - keyStoreList.add(getKeyStoreInstance("Windows-MY")); - } else if (isMac()) { - keyStoreList.add(getKeyStoreInstance("KeychainStore")); - keyStoreList.add(getDefaultKeyStoreInstance(password)); - } else { - keyStoreList.add(getDefaultKeyStoreInstance(password)); - } - - return getCertificatesInputStream(keyStoreList); - } - - @VisibleForTesting - static InputStream getKeystoreInputStream() throws IOException { - Path path = Paths.get(System.getProperty("java.home"), "lib", "security", "cacerts"); - if (Files.notExists(path)) { - // for JDK8 - path = Paths.get(System.getProperty("java.home"), "jre", "lib", "security", "cacerts"); - } - return Files.newInputStream(path); - } - - @VisibleForTesting - static void getCertificatesInputStream(KeyStore keyStore, JcaPEMWriter pemWriter) - throws IOException, KeyStoreException { - Enumeration aliases = keyStore.aliases(); - while (aliases.hasMoreElements()) { - String alias = aliases.nextElement(); - if (keyStore.isCertificateEntry(alias)) { - pemWriter.writeObject(keyStore.getCertificate(alias)); - } - } - pemWriter.flush(); - } - - @VisibleForTesting - static InputStream getCertificatesInputStream(Collection keyStores) - throws IOException, KeyStoreException { - try (final StringWriter writer = new StringWriter(); - final JcaPEMWriter pemWriter = new JcaPEMWriter(writer)) { - - for (KeyStore keyStore : keyStores) { - getCertificatesInputStream(keyStore, pemWriter); - } - - return new ByteArrayInputStream(writer.toString().getBytes(StandardCharsets.UTF_8)); - } - } - - /** - * Generates an {@link InputStream} that contains certificates for a private key. - * - * @param keyStorePath The path of the KeyStore. - * @param keyStorePass The password of the KeyStore. - * @return a new {code InputStream} containing the certificates. - * @throws GeneralSecurityException on error. - * @throws IOException on error. - */ - public static InputStream getCertificateStream( - final String keyStorePath, final String keyStorePass) - throws GeneralSecurityException, IOException { - Preconditions.checkNotNull(keyStorePath, "KeyStore path cannot be null!"); - Preconditions.checkNotNull(keyStorePass, "KeyStorePass cannot be null!"); - final KeyStore keyStore = KeyStore.getInstance(KeyStore.getDefaultType()); - - try (final InputStream keyStoreStream = Files.newInputStream(Paths.get(keyStorePath))) { - keyStore.load(keyStoreStream, keyStorePass.toCharArray()); - } - - return getSingleCertificateInputStream(keyStore); - } - - /** - * Generates an {@link InputStream} that contains certificates for path-based TLS Root - * Certificates. - * - * @param tlsRootsCertificatesPath The path of the TLS Root Certificates. - * @return a new {code InputStream} containing the certificates. - * @throws GeneralSecurityException on error. - * @throws IOException on error. - */ - public static InputStream getTlsRootCertificatesStream(final String tlsRootsCertificatesPath) - throws GeneralSecurityException, IOException { - Preconditions.checkNotNull( - tlsRootsCertificatesPath, "TLS Root certificates path cannot be null!"); - - return Files.newInputStream(Paths.get(tlsRootsCertificatesPath)); - } - - /** - * Generates an {@link InputStream} that contains certificates for a path-based mTLS Client - * Certificate. - * - * @param clientCertificatePath The path of the mTLS Client Certificate. - * @return a new {code InputStream} containing the certificates. - * @throws GeneralSecurityException on error. - * @throws IOException on error. - */ - public static InputStream getClientCertificateStream(final String clientCertificatePath) - throws GeneralSecurityException, IOException { - Preconditions.checkNotNull(clientCertificatePath, "Client certificate path cannot be null!"); - - return Files.newInputStream(Paths.get(clientCertificatePath)); - } - - /** - * Generates an {@link InputStream} that contains certificates for a path-based mTLS Client Key. - * - * @param clientKeyPath The path of the mTLS Client Key. - * @return a new {code InputStream} containing the certificates. - * @throws GeneralSecurityException on error. - * @throws IOException on error. - */ - public static InputStream getClientKeyStream(final String clientKeyPath) - throws GeneralSecurityException, IOException { - Preconditions.checkNotNull(clientKeyPath, "Client key path cannot be null!"); - - return Files.newInputStream(Paths.get(clientKeyPath)); - } - - private static InputStream getSingleCertificateInputStream(KeyStore keyStore) - throws KeyStoreException, IOException, CertificateException { - final Enumeration aliases = keyStore.aliases(); - - while (aliases.hasMoreElements()) { - final String alias = aliases.nextElement(); - if (keyStore.isCertificateEntry(alias)) { - return toInputStream(keyStore.getCertificate(alias)); - } - } - - throw new CertificateException("Keystore did not have a certificate."); - } - - private static InputStream toInputStream(final Certificate certificate) throws IOException { - - try (final StringWriter writer = new StringWriter(); - final JcaPEMWriter pemWriter = new JcaPEMWriter(writer)) { - - pemWriter.writeObject(certificate); - pemWriter.flush(); - return new ByteArrayInputStream(writer.toString().getBytes(StandardCharsets.UTF_8)); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/AvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/AvaticaParameterConverter.java deleted file mode 100644 index 0bc38a2772026..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/AvaticaParameterConverter.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** - * Interface for a class in charge of converting between AvaticaParameters and TypedValues and - * Arrow. - */ -public interface AvaticaParameterConverter { - - /** - * Bind a TypedValue to a FieldVector at the given index. - * - * @param vector FieldVector that the parameter should be bound to. - * @param typedValue TypedValue to bind as a parameter. - * @param index Vector index (0-indexed) that the TypedValue should be bound to. - * @return Whether the value was set successfully. - */ - boolean bindParameter(FieldVector vector, TypedValue typedValue, int index); - - /** - * Create an AvaticaParameter from the given Field. - * - * @param field Arrow Field to generate an AvaticaParameter from. - */ - AvaticaParameter createParameter(Field field); -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BaseAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BaseAvaticaParameterConverter.java deleted file mode 100644 index 6dd4c965c2a2b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BaseAvaticaParameterConverter.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.driver.jdbc.converter.AvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.utils.SqlTypes; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.SqlType; - -/** - * Base AvaticaParameterConverter with a generic createParameter method that can be used by most - * Arrow types. - */ -abstract class BaseAvaticaParameterConverter implements AvaticaParameterConverter { - protected AvaticaParameter createParameter(Field field, boolean signed) { - final String name = field.getName(); - final ArrowType arrowType = field.getType(); - final String typeName = arrowType.toString(); - final int precision = 0; // Would have to know about the actual number - final int scale = - 0; // According to https://www.postgresql.org/docs/current/datatype-numeric.html - final int jdbcType = SqlTypes.getSqlTypeIdFromArrowType(arrowType); - final String className = SqlType.valueOf(jdbcType).clazz.getCanonicalName(); - return new AvaticaParameter(signed, precision, scale, jdbcType, typeName, className, name); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryAvaticaParameterConverter.java deleted file mode 100644 index ce13cdbc93d31..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryAvaticaParameterConverter.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Binary Arrow types. */ -public class BinaryAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public BinaryAvaticaParameterConverter(ArrowType.Binary type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - byte[] value = (byte[]) typedValue.toJdbc(null); - if (vector instanceof VarBinaryVector) { - ((VarBinaryVector) vector).setSafe(index, value); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java deleted file mode 100644 index a035bbba491d2..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for BinaryView Arrow types. */ -public class BinaryViewAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public BinaryViewAvaticaParameterConverter(ArrowType.BinaryView type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BoolAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BoolAvaticaParameterConverter.java deleted file mode 100644 index e26c0ad73d84c..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BoolAvaticaParameterConverter.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Bool Arrow types. */ -public class BoolAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public BoolAvaticaParameterConverter(ArrowType.Bool type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - boolean value = (boolean) typedValue.toLocal(); - if (vector instanceof BitVector) { - ((BitVector) vector).setSafe(index, value ? 1 : 0); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DateAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DateAvaticaParameterConverter.java deleted file mode 100644 index 8795213530358..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DateAvaticaParameterConverter.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Date Arrow types. */ -public class DateAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public DateAvaticaParameterConverter(ArrowType.Date type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - int value = (int) typedValue.toLocal(); - if (vector instanceof DateMilliVector) { - ((DateMilliVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof DateDayVector) { - ((DateDayVector) vector).setSafe(index, value); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DecimalAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DecimalAvaticaParameterConverter.java deleted file mode 100644 index 92faa119bdd76..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DecimalAvaticaParameterConverter.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import java.math.BigDecimal; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Decimal Arrow types. */ -public class DecimalAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public DecimalAvaticaParameterConverter(ArrowType.Decimal type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - BigDecimal value = (BigDecimal) typedValue.toLocal(); - if (vector instanceof DecimalVector) { - ((DecimalVector) vector).setSafe(index, value); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, true); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DurationAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DurationAvaticaParameterConverter.java deleted file mode 100644 index 5c985d544a2b6..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/DurationAvaticaParameterConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Duration Arrow types. */ -public class DurationAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public DurationAvaticaParameterConverter(ArrowType.Duration type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FixedSizeBinaryAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FixedSizeBinaryAvaticaParameterConverter.java deleted file mode 100644 index 6fc58e36813be..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FixedSizeBinaryAvaticaParameterConverter.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for FixedSizeBinary Arrow types. */ -public class FixedSizeBinaryAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public FixedSizeBinaryAvaticaParameterConverter(ArrowType.FixedSizeBinary type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - byte[] value = (byte[]) typedValue.toJdbc(null); - if (vector instanceof FixedSizeBinaryVector) { - ((FixedSizeBinaryVector) vector).setSafe(index, value); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FixedSizeListAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FixedSizeListAvaticaParameterConverter.java deleted file mode 100644 index 05c82a932ed93..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FixedSizeListAvaticaParameterConverter.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import java.util.List; -import org.apache.arrow.driver.jdbc.utils.AvaticaParameterBinder; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for FixedSizeList Arrow types. */ -public class FixedSizeListAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public FixedSizeListAvaticaParameterConverter(ArrowType.FixedSizeList type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - final List values = (List) typedValue.value; - final int arraySize = values.size(); - - if (vector instanceof FixedSizeListVector) { - FixedSizeListVector listVector = ((FixedSizeListVector) vector); - FieldVector childVector = listVector.getDataVector(); - int maxArraySize = listVector.getListSize(); - - if (arraySize != maxArraySize) { - if (!childVector.getField().isNullable()) { - throw new UnsupportedOperationException( - "Each array must contain " + maxArraySize + " elements"); - } else if (arraySize > maxArraySize) { - throw new UnsupportedOperationException( - "Each array must contain at most " + maxArraySize + " elements"); - } - } - - int startPos = listVector.startNewValue(index); - for (int i = 0; i < arraySize; i++) { - Object val = values.get(i); - int childIndex = startPos + i; - if (val == null) { - if (childVector.getField().isNullable()) { - childVector.setNull(childIndex); - } else { - throw new UnsupportedOperationException("Can't set null on non-nullable child list"); - } - } else { - childVector - .getField() - .getType() - .accept( - new AvaticaParameterBinder.BinderVisitor( - childVector, TypedValue.ofSerial(typedValue.componentType, val), childIndex)); - } - } - listVector.setValueCount(index + 1); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FloatingPointAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FloatingPointAvaticaParameterConverter.java deleted file mode 100644 index 13d3e9ad8f663..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/FloatingPointAvaticaParameterConverter.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for FloatingPoint Arrow types. */ -public class FloatingPointAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public FloatingPointAvaticaParameterConverter(ArrowType.FloatingPoint type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - Number value = (Number) typedValue.value; - if (vector instanceof Float4Vector) { - ((Float4Vector) vector).setSafe(index, value.floatValue()); - return true; - } else if (vector instanceof Float8Vector) { - ((Float8Vector) vector).setSafe(index, value.doubleValue()); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, true); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/IntAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/IntAvaticaParameterConverter.java deleted file mode 100644 index b77c33325fc68..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/IntAvaticaParameterConverter.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Int Arrow types. */ -public class IntAvaticaParameterConverter extends BaseAvaticaParameterConverter { - private final ArrowType.Int type; - - public IntAvaticaParameterConverter(ArrowType.Int type) { - this.type = type; - } - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - Number value = (Number) typedValue.value; - if (vector instanceof TinyIntVector) { - ((TinyIntVector) vector).setSafe(index, value.intValue()); - return true; - } else if (vector instanceof SmallIntVector) { - ((SmallIntVector) vector).setSafe(index, value.intValue()); - return true; - } else if (vector instanceof IntVector) { - ((IntVector) vector).setSafe(index, value.intValue()); - return true; - } else if (vector instanceof BigIntVector) { - ((BigIntVector) vector).setSafe(index, value.longValue()); - return true; - } else if (vector instanceof UInt1Vector) { - ((UInt1Vector) vector).setSafe(index, value.intValue()); - return true; - } else if (vector instanceof UInt2Vector) { - ((UInt2Vector) vector).setSafe(index, value.intValue()); - return true; - } else if (vector instanceof UInt4Vector) { - ((UInt4Vector) vector).setSafe(index, value.intValue()); - return true; - } else if (vector instanceof UInt8Vector) { - ((UInt8Vector) vector).setSafe(index, value.longValue()); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, type.getIsSigned()); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/IntervalAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/IntervalAvaticaParameterConverter.java deleted file mode 100644 index 18de8d6b7523f..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/IntervalAvaticaParameterConverter.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Interval Arrow types. */ -public class IntervalAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public IntervalAvaticaParameterConverter(ArrowType.Interval type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - // Object value = typedValue.toLocal(); - // if (vector instanceof IntervalDayVector) { - // ((IntervalDayVector) vector).setSafe(index, () value); - // } else if (vector instanceof IntervalYearVector) { - // ((IntervalYearVector) vector).setSafe(index, () value); - // } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeBinaryAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeBinaryAvaticaParameterConverter.java deleted file mode 100644 index 2d707d0ea488a..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeBinaryAvaticaParameterConverter.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for LargeBinary Arrow types. */ -public class LargeBinaryAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public LargeBinaryAvaticaParameterConverter(ArrowType.LargeBinary type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - byte[] value = (byte[]) typedValue.toJdbc(null); - if (vector instanceof LargeVarBinaryVector) { - ((LargeVarBinaryVector) vector).setSafe(index, value); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeListAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeListAvaticaParameterConverter.java deleted file mode 100644 index 3d03e93b1ff1a..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeListAvaticaParameterConverter.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import java.util.List; -import org.apache.arrow.driver.jdbc.utils.AvaticaParameterBinder; -import org.apache.arrow.memory.util.LargeMemoryUtil; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for LargeList Arrow types. */ -public class LargeListAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public LargeListAvaticaParameterConverter(ArrowType.LargeList type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - final List values = (List) typedValue.value; - - if (vector instanceof LargeListVector) { - LargeListVector listVector = ((LargeListVector) vector); - FieldVector childVector = listVector.getDataVector(); - - long startPos = listVector.startNewValue(index); - for (int i = 0; i < values.size(); i++) { - Object val = values.get(i); - int childIndex = LargeMemoryUtil.checkedCastToInt(startPos) + i; - if (val == null) { - if (childVector.getField().isNullable()) { - childVector.setNull(childIndex); - } else { - throw new UnsupportedOperationException("Can't set null on non-nullable child list"); - } - } else { - childVector - .getField() - .getType() - .accept( - new AvaticaParameterBinder.BinderVisitor( - childVector, TypedValue.ofSerial(typedValue.componentType, val), childIndex)); - } - } - listVector.endValue(index, values.size()); - listVector.setValueCount(index + 1); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeUtf8AvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeUtf8AvaticaParameterConverter.java deleted file mode 100644 index 4a89f5ccc0005..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/LargeUtf8AvaticaParameterConverter.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.Text; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for LargeUtf8 Arrow types. */ -public class LargeUtf8AvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public LargeUtf8AvaticaParameterConverter(ArrowType.LargeUtf8 type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - String value = (String) typedValue.toLocal(); - if (vector instanceof LargeVarCharVector) { - ((LargeVarCharVector) vector).setSafe(index, new Text(value)); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/ListAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/ListAvaticaParameterConverter.java deleted file mode 100644 index f4f9faaa22093..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/ListAvaticaParameterConverter.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import java.util.List; -import org.apache.arrow.driver.jdbc.utils.AvaticaParameterBinder; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for List Arrow types. */ -public class ListAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public ListAvaticaParameterConverter(ArrowType.List type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - final List values = (List) typedValue.value; - - if (vector instanceof ListVector) { - ListVector listVector = ((ListVector) vector); - FieldVector childVector = listVector.getDataVector(); - - int startPos = listVector.startNewValue(index); - for (int i = 0; i < values.size(); i++) { - Object val = values.get(i); - int childIndex = startPos + i; - if (val == null) { - if (childVector.getField().isNullable()) { - childVector.setNull(childIndex); - } else { - throw new UnsupportedOperationException("Can't set null on non-nullable child list"); - } - } else { - childVector - .getField() - .getType() - .accept( - new AvaticaParameterBinder.BinderVisitor( - childVector, TypedValue.ofSerial(typedValue.componentType, val), childIndex)); - } - } - listVector.endValue(index, values.size()); - listVector.setValueCount(index + 1); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/MapAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/MapAvaticaParameterConverter.java deleted file mode 100644 index 002c9d5a04fcd..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/MapAvaticaParameterConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Map Arrow types. */ -public class MapAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public MapAvaticaParameterConverter(ArrowType.Map type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/NullAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/NullAvaticaParameterConverter.java deleted file mode 100644 index c92b950d3bd9d..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/NullAvaticaParameterConverter.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Null Arrow types. */ -public class NullAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public NullAvaticaParameterConverter(ArrowType.Null type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - Object value = typedValue.toLocal(); - if (vector instanceof NullVector) { - if (value != null) { - throw new RuntimeException("Can't set non-null value on NullVector"); - } - vector.setNull(index); - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/StructAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/StructAvaticaParameterConverter.java deleted file mode 100644 index b3613447d2c62..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/StructAvaticaParameterConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Struct Arrow types. */ -public class StructAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public StructAvaticaParameterConverter(ArrowType.Struct type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/TimeAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/TimeAvaticaParameterConverter.java deleted file mode 100644 index 0340eb6099ec5..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/TimeAvaticaParameterConverter.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Time Arrow types. */ -public class TimeAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public TimeAvaticaParameterConverter(ArrowType.Time type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - int value = (int) typedValue.toLocal(); - if (vector instanceof TimeMicroVector) { - ((TimeMicroVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeMilliVector) { - ((TimeMilliVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeNanoVector) { - ((TimeNanoVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeSecVector) { - ((TimeSecVector) vector).setSafe(index, value); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/TimestampAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/TimestampAvaticaParameterConverter.java deleted file mode 100644 index add3e305984da..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/TimestampAvaticaParameterConverter.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Timestamp Arrow types. */ -public class TimestampAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public TimestampAvaticaParameterConverter(ArrowType.Timestamp type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - long value = (long) typedValue.toLocal(); - if (vector instanceof TimeStampSecVector) { - ((TimeStampSecVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeStampMicroVector) { - ((TimeStampMicroVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeStampMilliVector) { - ((TimeStampMilliVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeStampNanoVector) { - ((TimeStampNanoVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeStampSecTZVector) { - ((TimeStampSecTZVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeStampMicroTZVector) { - ((TimeStampMicroTZVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeStampMilliTZVector) { - ((TimeStampMilliTZVector) vector).setSafe(index, value); - return true; - } else if (vector instanceof TimeStampNanoTZVector) { - ((TimeStampNanoTZVector) vector).setSafe(index, value); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/UnionAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/UnionAvaticaParameterConverter.java deleted file mode 100644 index e7a3b6eed2a9f..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/UnionAvaticaParameterConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Union Arrow types. */ -public class UnionAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public UnionAvaticaParameterConverter(ArrowType.Union type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8AvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8AvaticaParameterConverter.java deleted file mode 100644 index 8b868b44a507b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8AvaticaParameterConverter.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.Text; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Utf8 Arrow types. */ -public class Utf8AvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public Utf8AvaticaParameterConverter(ArrowType.Utf8 type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - String value = (String) typedValue.toLocal(); - if (vector instanceof VarCharVector) { - ((VarCharVector) vector).setSafe(index, new Text(value)); - return true; - } - return false; - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java deleted file mode 100644 index 076fefc42a39c..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.converter.impl; - -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.remote.TypedValue; - -/** AvaticaParameterConverter for Utf8View Arrow types. */ -public class Utf8ViewAvaticaParameterConverter extends BaseAvaticaParameterConverter { - - public Utf8ViewAvaticaParameterConverter(ArrowType.Utf8View type) {} - - @Override - public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { - throw new UnsupportedOperationException("Utf8View not supported"); - } - - @Override - public AvaticaParameter createParameter(Field field) { - return createParameter(field, false); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java deleted file mode 100644 index e8bae2a207346..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.ArrowFlightConnection; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallOption; -import org.apache.arrow.flight.FlightCallHeaders; -import org.apache.arrow.flight.HeaderCallOption; -import org.apache.arrow.util.Preconditions; -import org.apache.calcite.avatica.ConnectionConfig; -import org.apache.calcite.avatica.ConnectionConfigImpl; -import org.apache.calcite.avatica.ConnectionProperty; - -/** A {@link ConnectionConfig} for the {@link ArrowFlightConnection}. */ -public final class ArrowFlightConnectionConfigImpl extends ConnectionConfigImpl { - public ArrowFlightConnectionConfigImpl(final Properties properties) { - super(properties); - } - - /** - * Gets the host. - * - * @return the host. - */ - public String getHost() { - return ArrowFlightConnectionProperty.HOST.getString(properties); - } - - /** - * Gets the port. - * - * @return the port. - */ - public int getPort() { - return ArrowFlightConnectionProperty.PORT.getInteger(properties); - } - - /** - * Gets the host. - * - * @return the host. - */ - public String getUser() { - return ArrowFlightConnectionProperty.USER.getString(properties); - } - - /** - * Gets the host. - * - * @return the host. - */ - public String getPassword() { - return ArrowFlightConnectionProperty.PASSWORD.getString(properties); - } - - public String getToken() { - return ArrowFlightConnectionProperty.TOKEN.getString(properties); - } - - /** - * Gets the KeyStore path. - * - * @return the path. - */ - public String getTrustStorePath() { - return ArrowFlightConnectionProperty.TRUST_STORE.getString(properties); - } - - /** - * Gets the KeyStore password. - * - * @return the password. - */ - public String getTrustStorePassword() { - return ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.getString(properties); - } - - /** - * Check if the JDBC should use the trusted store files from the operating system. - * - * @return whether to use system trusted store certificates. - */ - public boolean useSystemTrustStore() { - return ArrowFlightConnectionProperty.USE_SYSTEM_TRUST_STORE.getBoolean(properties); - } - - public String getTlsRootCertificatesPath() { - return ArrowFlightConnectionProperty.TLS_ROOT_CERTS.getString(properties); - } - - public String getClientCertificatePath() { - return ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.getString(properties); - } - - public String getClientKeyPath() { - return ArrowFlightConnectionProperty.CLIENT_KEY.getString(properties); - } - - /** - * Whether to use TLS encryption. - * - * @return whether to use TLS encryption. - */ - public boolean useEncryption() { - return ArrowFlightConnectionProperty.USE_ENCRYPTION.getBoolean(properties); - } - - public boolean getDisableCertificateVerification() { - return ArrowFlightConnectionProperty.CERTIFICATE_VERIFICATION.getBoolean(properties); - } - - /** - * Gets the thread pool size. - * - * @return the thread pool size. - */ - public int threadPoolSize() { - return ArrowFlightConnectionProperty.THREAD_POOL_SIZE.getInteger(properties); - } - - /** - * Indicates if sub-connections created for stream retrieval should reuse cookies from the main - * connection. - */ - public boolean retainCookies() { - return ArrowFlightConnectionProperty.RETAIN_COOKIES.getBoolean(properties); - } - - /** - * Indicates if sub-connections created for stream retrieval should reuse bearer tokens created - * from the main connection. - */ - public boolean retainAuth() { - return ArrowFlightConnectionProperty.RETAIN_AUTH.getBoolean(properties); - } - - /** - * The catalog to which a connection is made. - * - * @return the catalog. - */ - public String getCatalog() { - return ArrowFlightConnectionProperty.CATALOG.getString(properties); - } - - /** - * Gets the {@link CallOption}s from this {@link ConnectionConfig}. - * - * @return the call options. - */ - public CallOption toCallOption() { - final CallHeaders headers = new FlightCallHeaders(); - Map headerAttributes = getHeaderAttributes(); - headerAttributes.forEach(headers::insert); - return new HeaderCallOption(headers); - } - - /** - * Gets which properties should be added as headers. - * - * @return {@link Map} - */ - public Map getHeaderAttributes() { - Map headers = new HashMap<>(); - ArrowFlightConnectionProperty[] builtInProperties = ArrowFlightConnectionProperty.values(); - properties.forEach( - (key, val) -> { - // For built-in properties before adding new headers - if (Arrays.stream(builtInProperties) - .noneMatch( - builtInProperty -> builtInProperty.camelName.equalsIgnoreCase(key.toString()))) { - headers.put(key.toString(), val.toString()); - } - }); - return headers; - } - - /** Custom {@link ConnectionProperty} for the {@link ArrowFlightConnectionConfigImpl}. */ - public enum ArrowFlightConnectionProperty implements ConnectionProperty { - HOST("host", null, Type.STRING, true), - PORT("port", null, Type.NUMBER, true), - USER("user", null, Type.STRING, false), - PASSWORD("password", null, Type.STRING, false), - USE_ENCRYPTION("useEncryption", true, Type.BOOLEAN, false), - CERTIFICATE_VERIFICATION("disableCertificateVerification", false, Type.BOOLEAN, false), - TRUST_STORE("trustStore", null, Type.STRING, false), - TRUST_STORE_PASSWORD("trustStorePassword", null, Type.STRING, false), - USE_SYSTEM_TRUST_STORE("useSystemTrustStore", true, Type.BOOLEAN, false), - TLS_ROOT_CERTS("tlsRootCerts", null, Type.STRING, false), - CLIENT_CERTIFICATE("clientCertificate", null, Type.STRING, false), - CLIENT_KEY("clientKey", null, Type.STRING, false), - THREAD_POOL_SIZE("threadPoolSize", 1, Type.NUMBER, false), - TOKEN("token", null, Type.STRING, false), - RETAIN_COOKIES("retainCookies", true, Type.BOOLEAN, false), - RETAIN_AUTH("retainAuth", true, Type.BOOLEAN, false), - CATALOG("catalog", null, Type.STRING, false); - - private final String camelName; - private final Object defaultValue; - private final Type type; - private final boolean required; - - ArrowFlightConnectionProperty( - final String camelName, - final Object defaultValue, - final Type type, - final boolean required) { - this.camelName = Preconditions.checkNotNull(camelName); - this.defaultValue = defaultValue; - this.type = Preconditions.checkNotNull(type); - this.required = required; - } - - /** - * Gets the property. - * - * @param properties the properties from which to fetch this property. - * @return the property. - */ - public Object get(final Properties properties) { - Preconditions.checkNotNull(properties, "Properties cannot be null."); - Object value = properties.get(camelName); - if (value == null) { - value = properties.get(camelName.toLowerCase()); - } - if (required) { - if (value == null) { - throw new IllegalStateException( - String.format("Required property not provided: <%s>.", this)); - } - return value; - } else { - return value != null ? value : defaultValue; - } - } - - /** - * Gets the property as Boolean. - * - * @param properties the properties from which to fetch this property. - * @return the property. - */ - public Boolean getBoolean(final Properties properties) { - final String valueFromProperties = String.valueOf(get(properties)); - return valueFromProperties.equals("1") || valueFromProperties.equals("true"); - } - - /** - * Gets the property as Integer. - * - * @param properties the properties from which to fetch this property. - * @return the property. - */ - public Integer getInteger(final Properties properties) { - final String valueFromProperties = String.valueOf(get(properties)); - return valueFromProperties.equals("null") ? null : Integer.parseInt(valueFromProperties); - } - - /** - * Gets the property as String. - * - * @param properties the properties from which to fetch this property. - * @return the property. - */ - public String getString(final Properties properties) { - return Objects.toString(get(properties), null); - } - - @Override - public String camelName() { - return camelName; - } - - @Override - public Object defaultValue() { - return defaultValue; - } - - @Override - public Type type() { - return type; - } - - @Override - public PropEnv wrap(final Properties properties) { - throw new UnsupportedOperationException("Operation unsupported."); - } - - @Override - public boolean required() { - return required; - } - - @Override - public Class valueClass() { - return type.defaultValueClass(); - } - - /** - * Replaces the semicolons in the URL to the proper format. - * - * @param url the current connection string - * @return the formatted url - */ - public static String replaceSemiColons(String url) { - if (url != null) { - url = url.replaceFirst(";", "?"); - url = url.replaceAll(";", "&"); - } - return url; - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java deleted file mode 100644 index 4c2a9b865f141..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.util.List; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler.PreparedStatement; -import org.apache.arrow.driver.jdbc.converter.impl.BinaryAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.BoolAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.DateAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.DecimalAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.DurationAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.FixedSizeBinaryAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.FixedSizeListAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.FloatingPointAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.IntAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.IntervalAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.LargeBinaryAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.LargeListAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.LargeUtf8AvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.ListAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.MapAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.NullAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.StructAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.TimeAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.TimestampAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.UnionAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.Utf8AvaticaParameterConverter; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.calcite.avatica.remote.TypedValue; - -/** - * Convert Avatica PreparedStatement parameters from a list of TypedValue to Arrow and bind them to - * the VectorSchemaRoot representing the PreparedStatement parameters. - * - *

    NOTE: Make sure to close the parameters VectorSchemaRoot once we're done with them. - */ -public class AvaticaParameterBinder { - private final PreparedStatement preparedStatement; - private final VectorSchemaRoot parameters; - - /** - * Instantiate a new AvaticaParameterBinder. - * - * @param preparedStatement The PreparedStatement to bind parameters to. - * @param bufferAllocator The BufferAllocator to use for allocating memory. - */ - public AvaticaParameterBinder( - PreparedStatement preparedStatement, BufferAllocator bufferAllocator) { - this.parameters = - VectorSchemaRoot.create(preparedStatement.getParameterSchema(), bufferAllocator); - this.preparedStatement = preparedStatement; - } - - /** - * Bind the given Avatica values to the prepared statement. - * - * @param typedValues The parameter values. - */ - public void bind(List typedValues) { - bind(typedValues, 0); - } - - /** - * Bind the given Avatica values to the prepared statement at the given index. - * - * @param typedValues The parameter values. - * @param index index for parameter. - */ - public void bind(List typedValues, int index) { - if (preparedStatement.getParameterSchema().getFields().size() != typedValues.size()) { - throw new IllegalStateException( - String.format( - "Prepared statement has %s parameters, but only received %s", - preparedStatement.getParameterSchema().getFields().size(), typedValues.size())); - } - - for (int i = 0; i < typedValues.size(); i++) { - bind(parameters.getVector(i), typedValues.get(i), index); - } - - if (!typedValues.isEmpty()) { - parameters.setRowCount(index + 1); - preparedStatement.setParameters(parameters); - } - } - - /** - * Bind a TypedValue to the given index on the FieldVector. - * - * @param vector FieldVector to bind to. - * @param typedValue TypedValue to bind to the vector. - * @param index Vector index to bind the value at. - */ - private void bind(FieldVector vector, TypedValue typedValue, int index) { - try { - if (typedValue.value == null) { - if (vector.getField().isNullable()) { - vector.setNull(index); - } else { - throw new UnsupportedOperationException("Can't set null on non-nullable parameter"); - } - } else if (!vector - .getField() - .getType() - .accept(new BinderVisitor(vector, typedValue, index))) { - throw new UnsupportedOperationException( - String.format("Binding to vector type %s is not yet supported", vector.getClass())); - } - } catch (ClassCastException e) { - throw new UnsupportedOperationException( - String.format( - "Binding value of type %s is not yet supported for expected Arrow type %s", - typedValue.type, vector.getField().getType())); - } - } - - /** - * ArrowTypeVisitor that binds Avatica TypedValues to the given FieldVector at the specified - * index. - */ - public static class BinderVisitor implements ArrowType.ArrowTypeVisitor { - private final FieldVector vector; - private final TypedValue typedValue; - private final int index; - - /** - * Instantiate a new BinderVisitor. - * - * @param vector FieldVector to bind values to. - * @param value TypedValue to bind. - * @param index Vector index (0-based) to bind the value to. - */ - public BinderVisitor(FieldVector vector, TypedValue value, int index) { - this.vector = vector; - this.typedValue = value; - this.index = index; - } - - @Override - public Boolean visit(ArrowType.Null type) { - return new NullAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Struct type) { - return new StructAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.List type) { - return new ListAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.LargeList type) { - return new LargeListAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.FixedSizeList type) { - return new FixedSizeListAvaticaParameterConverter(type) - .bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Union type) { - return new UnionAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Map type) { - return new MapAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Int type) { - return new IntAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.FloatingPoint type) { - return new FloatingPointAvaticaParameterConverter(type) - .bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Utf8 type) { - return new Utf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Utf8View type) { - throw new UnsupportedOperationException("Utf8View is unsupported"); - } - - @Override - public Boolean visit(ArrowType.LargeUtf8 type) { - return new LargeUtf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Binary type) { - return new BinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.BinaryView type) { - throw new UnsupportedOperationException("BinaryView is unsupported"); - } - - @Override - public Boolean visit(ArrowType.LargeBinary type) { - return new LargeBinaryAvaticaParameterConverter(type) - .bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.FixedSizeBinary type) { - return new FixedSizeBinaryAvaticaParameterConverter(type) - .bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Bool type) { - return new BoolAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Decimal type) { - return new DecimalAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Date type) { - return new DateAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Time type) { - return new TimeAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Timestamp type) { - return new TimestampAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Interval type) { - return new IntervalAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.Duration type) { - return new DurationAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); - } - - @Override - public Boolean visit(ArrowType.ListView type) { - throw new UnsupportedOperationException("Binding is not yet supported for type " + type); - } - - @Override - public Boolean visit(ArrowType.LargeListView type) { - throw new UnsupportedOperationException("Binding is not yet supported for type " + type); - } - - @Override - public Boolean visit(ArrowType.RunEndEncoded type) { - throw new UnsupportedOperationException( - "No Avatica parameter binder implemented for type " + type); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java deleted file mode 100644 index 9a17c97c9d64d..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static com.google.common.base.Preconditions.checkNotNull; - -import java.sql.Array; -import java.sql.Blob; -import java.sql.CallableStatement; -import java.sql.Clob; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.NClob; -import java.sql.PreparedStatement; -import java.sql.SQLClientInfoException; -import java.sql.SQLException; -import java.sql.SQLWarning; -import java.sql.SQLXML; -import java.sql.Savepoint; -import java.sql.Statement; -import java.sql.Struct; -import java.util.Map; -import java.util.Properties; -import java.util.concurrent.Executor; -import org.apache.arrow.driver.jdbc.ArrowFlightJdbcPooledConnection; - -/** - * Auxiliary wrapper class for {@link Connection}, used on {@link ArrowFlightJdbcPooledConnection}. - */ -public class ConnectionWrapper implements Connection { - private final Connection realConnection; - - public ConnectionWrapper(final Connection connection) { - realConnection = checkNotNull(connection); - } - - @Override - public T unwrap(final Class type) { - return type.cast(realConnection); - } - - @Override - public boolean isWrapperFor(final Class type) { - return realConnection.getClass().isAssignableFrom(type); - } - - @Override - public Statement createStatement() throws SQLException { - return realConnection.createStatement(); - } - - @Override - public PreparedStatement prepareStatement(final String sqlQuery) throws SQLException { - return realConnection.prepareStatement(sqlQuery); - } - - @Override - public CallableStatement prepareCall(final String sqlQuery) throws SQLException { - return realConnection.prepareCall(sqlQuery); - } - - @Override - public String nativeSQL(final String sqlStatement) throws SQLException { - return realConnection.nativeSQL(sqlStatement); - } - - @Override - public void setAutoCommit(boolean autoCommit) throws SQLException { - realConnection.setAutoCommit(autoCommit); - } - - @Override - public boolean getAutoCommit() throws SQLException { - return realConnection.getAutoCommit(); - } - - @Override - public void commit() throws SQLException { - realConnection.commit(); - } - - @Override - public void rollback() throws SQLException { - realConnection.rollback(); - } - - @Override - public void close() throws SQLException { - realConnection.close(); - } - - @Override - public boolean isClosed() throws SQLException { - return realConnection.isClosed(); - } - - @Override - public DatabaseMetaData getMetaData() throws SQLException { - return realConnection.getMetaData(); - } - - @Override - public void setReadOnly(final boolean readOnly) throws SQLException { - realConnection.setReadOnly(readOnly); - } - - @Override - public boolean isReadOnly() throws SQLException { - return realConnection.isReadOnly(); - } - - @Override - public void setCatalog(final String catalogName) throws SQLException { - realConnection.setCatalog(catalogName); - } - - @Override - public String getCatalog() throws SQLException { - return realConnection.getCatalog(); - } - - @Override - public void setTransactionIsolation(final int transactionIsolationId) throws SQLException { - realConnection.setTransactionIsolation(transactionIsolationId); - } - - @Override - public int getTransactionIsolation() throws SQLException { - return realConnection.getTransactionIsolation(); - } - - @Override - public SQLWarning getWarnings() throws SQLException { - return realConnection.getWarnings(); - } - - @Override - public void clearWarnings() throws SQLException { - realConnection.clearWarnings(); - } - - @Override - public Statement createStatement(final int resultSetTypeId, final int resultSetConcurrencyId) - throws SQLException { - return realConnection.createStatement(resultSetTypeId, resultSetConcurrencyId); - } - - @Override - public PreparedStatement prepareStatement( - final String sqlQuery, final int resultSetTypeId, final int resultSetConcurrencyId) - throws SQLException { - return realConnection.prepareStatement(sqlQuery, resultSetTypeId, resultSetConcurrencyId); - } - - @Override - public CallableStatement prepareCall( - final String query, final int resultSetTypeId, final int resultSetConcurrencyId) - throws SQLException { - return realConnection.prepareCall(query, resultSetTypeId, resultSetConcurrencyId); - } - - @Override - public Map> getTypeMap() throws SQLException { - return realConnection.getTypeMap(); - } - - @Override - public void setTypeMap(final Map> typeNameToClass) throws SQLException { - realConnection.setTypeMap(typeNameToClass); - } - - @Override - public void setHoldability(final int holdabilityId) throws SQLException { - realConnection.setHoldability(holdabilityId); - } - - @Override - public int getHoldability() throws SQLException { - return realConnection.getHoldability(); - } - - @Override - public Savepoint setSavepoint() throws SQLException { - return realConnection.setSavepoint(); - } - - @Override - public Savepoint setSavepoint(final String savepointName) throws SQLException { - return realConnection.setSavepoint(savepointName); - } - - @Override - public void rollback(final Savepoint savepoint) throws SQLException { - realConnection.rollback(savepoint); - } - - @Override - public void releaseSavepoint(final Savepoint savepoint) throws SQLException { - realConnection.releaseSavepoint(savepoint); - } - - @Override - public Statement createStatement( - final int resultSetType, final int resultSetConcurrency, final int resultSetHoldability) - throws SQLException { - return realConnection.createStatement( - resultSetType, resultSetConcurrency, resultSetHoldability); - } - - @Override - public PreparedStatement prepareStatement( - final String sqlQuery, - final int resultSetType, - final int resultSetConcurrency, - final int resultSetHoldability) - throws SQLException { - return realConnection.prepareStatement( - sqlQuery, resultSetType, resultSetConcurrency, resultSetHoldability); - } - - @Override - public CallableStatement prepareCall( - final String sqlQuery, - final int resultSetType, - final int resultSetConcurrency, - final int resultSetHoldability) - throws SQLException { - return realConnection.prepareCall( - sqlQuery, resultSetType, resultSetConcurrency, resultSetHoldability); - } - - @Override - public PreparedStatement prepareStatement(final String sqlQuery, final int autoGeneratedKeysId) - throws SQLException { - return realConnection.prepareStatement(sqlQuery, autoGeneratedKeysId); - } - - @Override - public PreparedStatement prepareStatement(final String sqlQuery, final int[] columnIndices) - throws SQLException { - return realConnection.prepareStatement(sqlQuery, columnIndices); - } - - @Override - public PreparedStatement prepareStatement(final String sqlQuery, final String[] columnNames) - throws SQLException { - return realConnection.prepareStatement(sqlQuery, columnNames); - } - - @Override - public Clob createClob() throws SQLException { - return realConnection.createClob(); - } - - @Override - public Blob createBlob() throws SQLException { - return realConnection.createBlob(); - } - - @Override - public NClob createNClob() throws SQLException { - return realConnection.createNClob(); - } - - @Override - public SQLXML createSQLXML() throws SQLException { - return realConnection.createSQLXML(); - } - - @Override - public boolean isValid(final int timeout) throws SQLException { - return realConnection.isValid(timeout); - } - - @Override - public void setClientInfo(final String propertyName, final String propertyValue) - throws SQLClientInfoException { - realConnection.setClientInfo(propertyName, propertyValue); - } - - @Override - public void setClientInfo(final Properties properties) throws SQLClientInfoException { - realConnection.setClientInfo(properties); - } - - @Override - public String getClientInfo(final String propertyName) throws SQLException { - return realConnection.getClientInfo(propertyName); - } - - @Override - public Properties getClientInfo() throws SQLException { - return realConnection.getClientInfo(); - } - - @Override - public Array createArrayOf(final String typeName, final Object[] elements) throws SQLException { - return realConnection.createArrayOf(typeName, elements); - } - - @Override - public Struct createStruct(final String typeName, final Object[] attributes) throws SQLException { - return realConnection.createStruct(typeName, attributes); - } - - @Override - public void setSchema(final String schemaName) throws SQLException { - realConnection.setSchema(schemaName); - } - - @Override - public String getSchema() throws SQLException { - return realConnection.getSchema(); - } - - @Override - public void abort(final Executor executor) throws SQLException { - realConnection.abort(executor); - } - - @Override - public void setNetworkTimeout(final Executor executor, final int timeoutInMillis) - throws SQLException { - realConnection.setNetworkTimeout(executor, timeoutInMillis); - } - - @Override - public int getNetworkTimeout() throws SQLException { - return realConnection.getNetworkTimeout(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java deleted file mode 100644 index 17b0f42dc7111..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.converter.impl.BinaryAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.BinaryViewAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.BoolAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.DateAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.DecimalAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.DurationAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.FixedSizeBinaryAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.FixedSizeListAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.FloatingPointAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.IntAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.IntervalAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.LargeBinaryAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.LargeListAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.LargeUtf8AvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.ListAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.MapAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.NullAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.StructAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.TimeAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.TimestampAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.UnionAvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.Utf8AvaticaParameterConverter; -import org.apache.arrow.driver.jdbc.converter.impl.Utf8ViewAvaticaParameterConverter; -import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.calcite.avatica.AvaticaParameter; -import org.apache.calcite.avatica.ColumnMetaData; -import org.apache.calcite.avatica.proto.Common; - -/** Convert objects between Arrow and Avatica. */ -public final class ConvertUtils { - - private ConvertUtils() {} - - /** - * Convert Fields To Column MetaData List functions. - * - * @param fields list of {@link Field}. - * @return list of {@link ColumnMetaData}. - */ - public static List convertArrowFieldsToColumnMetaDataList( - final List fields) { - return Stream.iterate(0, Math::incrementExact) - .limit(fields.size()) - .map( - index -> { - final Field field = fields.get(index); - final ArrowType fieldType = field.getType(); - - final Common.ColumnMetaData.Builder builder = - Common.ColumnMetaData.newBuilder() - .setOrdinal(index) - .setColumnName(field.getName()) - .setLabel(field.getName()); - - setOnColumnMetaDataBuilder(builder, field.getMetadata()); - - builder.setType( - Common.AvaticaType.newBuilder() - .setId(SqlTypes.getSqlTypeIdFromArrowType(fieldType)) - .setName(SqlTypes.getSqlTypeNameFromArrowType(fieldType)) - .build()); - - return ColumnMetaData.fromProto(builder.build()); - }) - .collect(Collectors.toList()); - } - - /** - * Set on Column MetaData Builder. - * - * @param builder {@link Common.ColumnMetaData.Builder} - * @param metadataMap {@link Map} - */ - public static void setOnColumnMetaDataBuilder( - final Common.ColumnMetaData.Builder builder, final Map metadataMap) { - final FlightSqlColumnMetadata columnMetadata = new FlightSqlColumnMetadata(metadataMap); - final String catalogName = columnMetadata.getCatalogName(); - if (catalogName != null) { - builder.setCatalogName(catalogName); - } - final String schemaName = columnMetadata.getSchemaName(); - if (schemaName != null) { - builder.setSchemaName(schemaName); - } - final String tableName = columnMetadata.getTableName(); - if (tableName != null) { - builder.setTableName(tableName); - } - - final Integer precision = columnMetadata.getPrecision(); - if (precision != null) { - builder.setPrecision(precision); - } - final Integer scale = columnMetadata.getScale(); - if (scale != null) { - builder.setScale(scale); - } - - final Boolean isAutoIncrement = columnMetadata.isAutoIncrement(); - if (isAutoIncrement != null) { - builder.setAutoIncrement(isAutoIncrement); - } - final Boolean caseSensitive = columnMetadata.isCaseSensitive(); - if (caseSensitive != null) { - builder.setCaseSensitive(caseSensitive); - } - final Boolean readOnly = columnMetadata.isReadOnly(); - if (readOnly != null) { - builder.setReadOnly(readOnly); - } - final Boolean searchable = columnMetadata.isSearchable(); - if (searchable != null) { - builder.setSearchable(searchable); - } - } - - /** - * Convert Fields To Avatica Parameters. - * - * @param fields list of {@link Field}. - * @return list of {@link AvaticaParameter}. - */ - public static List convertArrowFieldsToAvaticaParameters( - final List fields) { - return fields.stream() - .map(field -> field.getType().accept(new ConverterVisitor(field))) - .collect(Collectors.toList()); - } - - private static class ConverterVisitor implements ArrowType.ArrowTypeVisitor { - private final Field field; - - private ConverterVisitor(Field field) { - this.field = field; - } - - @Override - public AvaticaParameter visit(ArrowType.Null type) { - return new NullAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Struct type) { - return new StructAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.List type) { - return new ListAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.LargeList type) { - return new LargeListAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.FixedSizeList type) { - return new FixedSizeListAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Union type) { - return new UnionAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Map type) { - return new MapAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Int type) { - return new IntAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.FloatingPoint type) { - return new FloatingPointAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Utf8 type) { - return new Utf8AvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Utf8View type) { - return new Utf8ViewAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.LargeUtf8 type) { - return new LargeUtf8AvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Binary type) { - return new BinaryAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.BinaryView type) { - return new BinaryViewAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.LargeBinary type) { - return new LargeBinaryAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.FixedSizeBinary type) { - return new FixedSizeBinaryAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Bool type) { - return new BoolAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Decimal type) { - return new DecimalAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Date type) { - return new DateAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Time type) { - return new TimeAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Timestamp type) { - return new TimestampAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Interval type) { - return new IntervalAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.Duration type) { - return new DurationAvaticaParameterConverter(type).createParameter(field); - } - - @Override - public AvaticaParameter visit(ArrowType.ListView type) { - throw new UnsupportedOperationException( - "AvaticaParameter not yet supported for type " + type); - } - - @Override - public AvaticaParameter visit(ArrowType.LargeListView type) { - throw new UnsupportedOperationException( - "AvaticaParameter not yet supported for type " + type); - } - - @Override - public AvaticaParameter visit(ArrowType.RunEndEncoded type) { - throw new UnsupportedOperationException( - "No Avatica parameter binder implemented for type " + type); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtils.java deleted file mode 100644 index 9363e3486c560..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtils.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.apache.calcite.avatica.util.DateTimeUtils.MILLIS_PER_DAY; - -import java.sql.Timestamp; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.LocalTime; -import java.util.Calendar; -import java.util.TimeZone; -import java.util.concurrent.TimeUnit; - -/** Datetime utility functions. */ -public class DateTimeUtils { - private DateTimeUtils() { - // Prevent instantiation. - } - - /** Subtracts given Calendar's TimeZone offset from epoch milliseconds. */ - public static long applyCalendarOffset(long milliseconds, Calendar calendar) { - if (calendar == null) { - calendar = Calendar.getInstance(); - } - - final TimeZone tz = calendar.getTimeZone(); - final TimeZone defaultTz = TimeZone.getDefault(); - - if (tz != defaultTz) { - milliseconds -= tz.getOffset(milliseconds) - defaultTz.getOffset(milliseconds); - } - - return milliseconds; - } - - /** - * Converts Epoch millis to a {@link Timestamp} object. - * - * @param millisWithCalendar the Timestamp in Epoch millis - * @return a {@link Timestamp} object representing the given Epoch millis - */ - public static Timestamp getTimestampValue(long millisWithCalendar) { - long milliseconds = millisWithCalendar; - if (milliseconds < 0) { - // LocalTime#ofNanoDay only accepts positive values - milliseconds -= ((milliseconds / MILLIS_PER_DAY) - 1) * MILLIS_PER_DAY; - } - - return Timestamp.valueOf( - LocalDateTime.of( - LocalDate.ofEpochDay(millisWithCalendar / MILLIS_PER_DAY), - LocalTime.ofNanoOfDay(TimeUnit.MILLISECONDS.toNanos(milliseconds % MILLIS_PER_DAY)))); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueue.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueue.java deleted file mode 100644 index 7c1c569fda729..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueue.java +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static java.lang.String.format; -import static java.util.Collections.synchronizedSet; -import static org.apache.arrow.util.Preconditions.checkNotNull; -import static org.apache.arrow.util.Preconditions.checkState; - -import java.sql.SQLException; -import java.sql.SQLTimeoutException; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.CancellationException; -import java.util.concurrent.CompletionService; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.arrow.driver.jdbc.client.CloseableEndpointStreamPair; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightStream; -import org.apache.calcite.avatica.AvaticaConnection; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Auxiliary class used to handle consuming of multiple {@link FlightStream}. - * - *

    The usage follows this routine: - * - *

      - *
    1. Create a FlightStreamQueue; - *
    2. Call enqueue(FlightStream) for all streams to be consumed; - *
    3. Call next() to get a FlightStream that is ready to consume - *
    4. Consume the given FlightStream and add it back to the queue - call - * enqueue(FlightStream) - *
    5. Repeat from (3) until next() returns null. - *
    - */ -public class FlightEndpointDataQueue implements AutoCloseable { - private static final Logger LOGGER = LoggerFactory.getLogger(FlightEndpointDataQueue.class); - private final CompletionService completionService; - private final Set> futures = synchronizedSet(new HashSet<>()); - private final Set endpointsToClose = - synchronizedSet(new HashSet<>()); - private final AtomicBoolean closed = new AtomicBoolean(); - - /** Instantiate a new FlightStreamQueue. */ - protected FlightEndpointDataQueue( - final CompletionService executorService) { - completionService = checkNotNull(executorService); - } - - /** - * Creates a new {@link FlightEndpointDataQueue} from the provided {@link ExecutorService}. - * - * @param service the service from which to create a new queue. - * @return a new queue. - */ - public static FlightEndpointDataQueue createNewQueue(final ExecutorService service) { - return new FlightEndpointDataQueue(new ExecutorCompletionService<>(service)); - } - - /** - * Gets whether this queue is closed. - * - * @return a boolean indicating whether this resource is closed. - */ - public boolean isClosed() { - return closed.get(); - } - - /** Auxiliary functional interface for getting ready-to-consume FlightStreams. */ - @FunctionalInterface - interface EndpointStreamSupplier { - Future get() throws SQLException; - } - - private CloseableEndpointStreamPair next(final EndpointStreamSupplier endpointStreamSupplier) - throws SQLException { - checkOpen(); - while (!futures.isEmpty()) { - final Future future = endpointStreamSupplier.get(); - futures.remove(future); - try { - final CloseableEndpointStreamPair endpoint = future.get(); - // Get the next FlightStream that has a root with content. - if (endpoint != null) { - return endpoint; - } - } catch (final ExecutionException e) { - // Unwrap one layer - final Throwable cause = e.getCause(); - if (cause instanceof FlightRuntimeException) { - throw (FlightRuntimeException) cause; - } - throw AvaticaConnection.HELPER.wrap(e.getMessage(), e); - } catch (InterruptedException | CancellationException e) { - throw AvaticaConnection.HELPER.wrap(e.getMessage(), e); - } - } - return null; - } - - /** - * Blocking request with timeout to get the next ready FlightStream in queue. - * - * @param timeoutValue the amount of time to be waited - * @param timeoutUnit the timeoutValue time unit - * @return a FlightStream that is ready to consume or null if all FlightStreams are ended. - */ - public CloseableEndpointStreamPair next(final long timeoutValue, final TimeUnit timeoutUnit) - throws SQLException { - return next( - () -> { - try { - final Future future = - completionService.poll(timeoutValue, timeoutUnit); - if (future != null) { - return future; - } - } catch (final InterruptedException e) { - throw new SQLTimeoutException("Query was interrupted", e); - } - - throw new SQLTimeoutException( - String.format("Query timed out after %d %s", timeoutValue, timeoutUnit)); - }); - } - - /** - * Blocking request to get the next ready FlightStream in queue. - * - * @return a FlightStream that is ready to consume or null if all FlightStreams are ended. - */ - public CloseableEndpointStreamPair next() throws SQLException { - return next( - () -> { - try { - return completionService.take(); - } catch (final InterruptedException e) { - throw AvaticaConnection.HELPER.wrap(e.getMessage(), e); - } - }); - } - - /** Checks if this queue is open. */ - public synchronized void checkOpen() { - checkState(!isClosed(), format("%s closed", this.getClass().getSimpleName())); - } - - /** Readily adds given {@link FlightStream}s to the queue. */ - public void enqueue(final Collection endpointRequests) { - endpointRequests.forEach(this::enqueue); - } - - /** Adds given {@link FlightStream} to the queue. */ - public synchronized void enqueue(final CloseableEndpointStreamPair endpointRequest) { - checkNotNull(endpointRequest); - checkOpen(); - endpointsToClose.add(endpointRequest); - futures.add( - completionService.submit( - () -> { - // `FlightStream#next` will block until new data can be read or stream is over. - while (endpointRequest.getStream().next()) { - if (endpointRequest.getStream().getRoot().getRowCount() > 0) { - return endpointRequest; - } - } - return null; - })); - } - - private static boolean isCallStatusCancelled(final Exception e) { - return e.getCause() instanceof FlightRuntimeException - && ((FlightRuntimeException) e.getCause()).status().code() == CallStatus.CANCELLED.code(); - } - - @Override - public synchronized void close() throws SQLException { - if (isClosed()) { - return; - } - - final Set exceptions = new HashSet<>(); - try { - for (final CloseableEndpointStreamPair endpointToClose : endpointsToClose) { - try { - endpointToClose.getStream().cancel("Cancelling this FlightStream.", null); - } catch (final Exception e) { - final String errorMsg = "Failed to cancel a FlightStream."; - LOGGER.error(errorMsg, e); - exceptions.add(new SQLException(errorMsg, e)); - } - } - futures.forEach( - future -> { - try { - // TODO: Consider adding a hardcoded timeout? - future.get(); - } catch (final InterruptedException | ExecutionException e) { - // Ignore if future is already cancelled - if (!isCallStatusCancelled(e)) { - final String errorMsg = "Failed consuming a future during close."; - LOGGER.error(errorMsg, e); - exceptions.add(new SQLException(errorMsg, e)); - } - } - }); - for (final CloseableEndpointStreamPair endpointToClose : endpointsToClose) { - try { - endpointToClose.close(); - } catch (final Exception e) { - final String errorMsg = "Failed to close a FlightStream."; - LOGGER.error(errorMsg, e); - exceptions.add(new SQLException(errorMsg, e)); - } - } - } finally { - endpointsToClose.clear(); - futures.clear(); - closed.set(true); - } - if (!exceptions.isEmpty()) { - final SQLException sqlException = new SQLException("Failed to close streams."); - exceptions.forEach(sqlException::setNextException); - throw sqlException; - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java deleted file mode 100644 index 1aed6cd2cb2cc..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.time.Duration; -import java.time.Period; -import org.apache.arrow.vector.util.DateUtility; - -/** - * Utility class to format periods similar to Oracle's representation of "INTERVAL * to *" data - * type. - */ -public final class IntervalStringUtils { - - /** Constructor Method of class. */ - private IntervalStringUtils() {} - - /** - * Formats a period similar to Oracle INTERVAL YEAR TO MONTH data type
    - * . For example, the string "+21-02" defines an interval of 21 years and 2 months. - */ - public static String formatIntervalYear(final Period p) { - long months = p.toTotalMonths(); - boolean neg = false; - if (months < 0) { - months = -months; - neg = true; - } - final int years = (int) (months / DateUtility.yearsToMonths); - months = months % DateUtility.yearsToMonths; - - return String.format("%c%03d-%02d", neg ? '-' : '+', years, months); - } - - /** - * Formats a period similar to Oracle INTERVAL DAY TO SECOND data type.
    - * . For example, the string "-001 18:25:16.766" defines an interval of - 1 day 18 hours 25 - * minutes 16 seconds and 766 milliseconds. - */ - public static String formatIntervalDay(final Duration d) { - long millis = d.toMillis(); - - boolean neg = false; - if (millis < 0) { - millis = -millis; - neg = true; - } - - final int days = (int) (millis / DateUtility.daysToStandardMillis); - millis = millis % DateUtility.daysToStandardMillis; - - final int hours = (int) (millis / DateUtility.hoursToMillis); - millis = millis % DateUtility.hoursToMillis; - - final int minutes = (int) (millis / DateUtility.minutesToMillis); - millis = millis % DateUtility.minutesToMillis; - - final int seconds = (int) (millis / DateUtility.secondsToMillis); - millis = millis % DateUtility.secondsToMillis; - - return String.format( - "%c%03d %02d:%02d:%02d.%03d", neg ? '-' : '+', days, hours, minutes, seconds, millis); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/SqlTypes.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/SqlTypes.java deleted file mode 100644 index 96cb056db2bf4..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/SqlTypes.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.sql.Types; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** SQL Types utility functions. */ -public class SqlTypes { - private static final Map typeIdToName = new HashMap<>(); - - static { - typeIdToName.put(Types.BIT, "BIT"); - typeIdToName.put(Types.TINYINT, "TINYINT"); - typeIdToName.put(Types.SMALLINT, "SMALLINT"); - typeIdToName.put(Types.INTEGER, "INTEGER"); - typeIdToName.put(Types.BIGINT, "BIGINT"); - typeIdToName.put(Types.FLOAT, "FLOAT"); - typeIdToName.put(Types.REAL, "REAL"); - typeIdToName.put(Types.DOUBLE, "DOUBLE"); - typeIdToName.put(Types.NUMERIC, "NUMERIC"); - typeIdToName.put(Types.DECIMAL, "DECIMAL"); - typeIdToName.put(Types.CHAR, "CHAR"); - typeIdToName.put(Types.VARCHAR, "VARCHAR"); - typeIdToName.put(Types.LONGVARCHAR, "LONGVARCHAR"); - typeIdToName.put(Types.DATE, "DATE"); - typeIdToName.put(Types.TIME, "TIME"); - typeIdToName.put(Types.TIMESTAMP, "TIMESTAMP"); - typeIdToName.put(Types.BINARY, "BINARY"); - typeIdToName.put(Types.VARBINARY, "VARBINARY"); - typeIdToName.put(Types.LONGVARBINARY, "LONGVARBINARY"); - typeIdToName.put(Types.NULL, "NULL"); - typeIdToName.put(Types.OTHER, "OTHER"); - typeIdToName.put(Types.JAVA_OBJECT, "JAVA_OBJECT"); - typeIdToName.put(Types.DISTINCT, "DISTINCT"); - typeIdToName.put(Types.STRUCT, "STRUCT"); - typeIdToName.put(Types.ARRAY, "ARRAY"); - typeIdToName.put(Types.BLOB, "BLOB"); - typeIdToName.put(Types.CLOB, "CLOB"); - typeIdToName.put(Types.REF, "REF"); - typeIdToName.put(Types.DATALINK, "DATALINK"); - typeIdToName.put(Types.BOOLEAN, "BOOLEAN"); - typeIdToName.put(Types.ROWID, "ROWID"); - typeIdToName.put(Types.NCHAR, "NCHAR"); - typeIdToName.put(Types.NVARCHAR, "NVARCHAR"); - typeIdToName.put(Types.LONGNVARCHAR, "LONGNVARCHAR"); - typeIdToName.put(Types.NCLOB, "NCLOB"); - typeIdToName.put(Types.SQLXML, "SQLXML"); - typeIdToName.put(Types.REF_CURSOR, "REF_CURSOR"); - typeIdToName.put(Types.TIME_WITH_TIMEZONE, "TIME_WITH_TIMEZONE"); - typeIdToName.put(Types.TIMESTAMP_WITH_TIMEZONE, "TIMESTAMP_WITH_TIMEZONE"); - } - - /** - * Convert given {@link ArrowType} to its corresponding SQL type name. - * - * @param arrowType type to convert from - * @return corresponding SQL type name. - * @see java.sql.Types - */ - public static String getSqlTypeNameFromArrowType(ArrowType arrowType) { - final int typeId = getSqlTypeIdFromArrowType(arrowType); - return typeIdToName.get(typeId); - } - - /** - * Convert given {@link ArrowType} to its corresponding SQL type ID. - * - * @param arrowType type to convert from - * @return corresponding SQL type ID. - * @see java.sql.Types - */ - public static int getSqlTypeIdFromArrowType(ArrowType arrowType) { - final ArrowType.ArrowTypeID typeID = arrowType.getTypeID(); - switch (typeID) { - case Int: - final int bitWidth = ((ArrowType.Int) arrowType).getBitWidth(); - switch (bitWidth) { - case 8: - return Types.TINYINT; - case 16: - return Types.SMALLINT; - case 32: - return Types.INTEGER; - case 64: - return Types.BIGINT; - default: - break; - } - break; - case Binary: - return Types.VARBINARY; - case FixedSizeBinary: - return Types.BINARY; - case LargeBinary: - return Types.LONGVARBINARY; - case Utf8: - return Types.VARCHAR; - case LargeUtf8: - return Types.LONGVARCHAR; - case Date: - return Types.DATE; - case Time: - return Types.TIME; - case Timestamp: - return Types.TIMESTAMP; - case Bool: - return Types.BOOLEAN; - case Decimal: - return Types.DECIMAL; - case FloatingPoint: - final FloatingPointPrecision floatingPointPrecision = - ((ArrowType.FloatingPoint) arrowType).getPrecision(); - switch (floatingPointPrecision) { - case DOUBLE: - return Types.DOUBLE; - case SINGLE: - return Types.FLOAT; - default: - break; - } - break; - case List: - case FixedSizeList: - case LargeList: - return Types.ARRAY; - case Struct: - case Duration: - case Interval: - case Map: - case Union: - return Types.JAVA_OBJECT; - case NONE: - case Null: - return Types.NULL; - default: - break; - } - - throw new IllegalArgumentException("Unsupported ArrowType " + arrowType); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java deleted file mode 100644 index 5d4d9ce1aa37f..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; -import java.util.HashMap; -import java.util.Map; - -/** URL Parser for extracting key values from a connection string. */ -public final class UrlParser { - private UrlParser() {} - - /** - * Parse URL key value parameters. - * - *

    URL-decodes keys and values. - * - * @param url {@link String} - * @return {@link Map} - */ - @SuppressWarnings("StringSplitter") - public static Map parse(String url, String separator) { - Map resultMap = new HashMap<>(); - if (url != null) { - String[] keyValues = url.split(separator); - - for (String keyValue : keyValues) { - try { - int separatorKey = - keyValue.indexOf("="); // Find the first equal sign to split key and value. - if (separatorKey - != -1) { // Avoid crashes when not finding an equal sign in the property value. - String key = keyValue.substring(0, separatorKey); - key = URLDecoder.decode(key, "UTF-8"); - String value = ""; - if (!keyValue.endsWith("=")) { // Avoid crashes for empty values. - value = keyValue.substring(separatorKey + 1); - } - value = URLDecoder.decode(value, "UTF-8"); - resultMap.put(key, value); - } - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - } - return resultMap; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java deleted file mode 100644 index b3c7a1ee5c6c1..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Converts Arrow's {@link VectorSchemaRoot} format to one JDBC would expect. */ -@FunctionalInterface -public interface VectorSchemaRootTransformer { - VectorSchemaRoot transform(VectorSchemaRoot originalRoot, VectorSchemaRoot transformedRoot) - throws Exception; - - /** Transformer's helper class; builds a new {@link VectorSchemaRoot}. */ - class Builder { - - private final Schema schema; - private final BufferAllocator bufferAllocator; - private final List newFields = new ArrayList<>(); - private final Collection tasks = new ArrayList<>(); - - /** - * Constructor for the VectorSchemaRootTransformer's Builder. - * - * @param schema The Arrow schema. - * @param bufferAllocator The BufferAllocator to use for allocating memory. - */ - public Builder(final Schema schema, final BufferAllocator bufferAllocator) { - this.schema = schema; - this.bufferAllocator = - bufferAllocator.newChildAllocator( - "VectorSchemaRootTransformer", 0, bufferAllocator.getLimit()); - } - - /** - * Add task to transform a vector to a new vector renaming it. This also adds - * transformedVectorName to the transformed {@link VectorSchemaRoot} schema. - * - * @param originalVectorName Name of the original vector to be transformed. - * @param transformedVectorName Name of the vector that is the result of the transformation. - * @return a VectorSchemaRoot instance with a task to rename a field vector. - */ - public Builder renameFieldVector( - final String originalVectorName, final String transformedVectorName) { - tasks.add( - (originalRoot, transformedRoot) -> { - final FieldVector originalVector = originalRoot.getVector(originalVectorName); - final FieldVector transformedVector = transformedRoot.getVector(transformedVectorName); - - final ArrowType originalType = originalVector.getField().getType(); - final ArrowType transformedType = transformedVector.getField().getType(); - if (!originalType.equals(transformedType)) { - throw new IllegalArgumentException( - String.format( - "Cannot transfer vector with field type %s to %s", - originalType, transformedType)); - } - - if (originalVector instanceof BaseVariableWidthVector) { - ((BaseVariableWidthVector) originalVector) - .transferTo(((BaseVariableWidthVector) transformedVector)); - } else if (originalVector instanceof BaseFixedWidthVector) { - ((BaseFixedWidthVector) originalVector) - .transferTo(((BaseFixedWidthVector) transformedVector)); - } else { - throw new IllegalStateException( - String.format("Cannot transfer vector of type %s", originalVector.getClass())); - } - }); - - final Field originalField = schema.findField(originalVectorName); - newFields.add( - new Field( - transformedVectorName, - new FieldType( - originalField.isNullable(), - originalField.getType(), - originalField.getDictionary(), - originalField.getMetadata()), - originalField.getChildren())); - - return this; - } - - /** - * Adds an empty field to the transformed {@link VectorSchemaRoot} schema. - * - * @param fieldName Name of the field to be added. - * @param fieldType Type of the field to be added. - * @return a VectorSchemaRoot instance with the current tasks. - */ - public Builder addEmptyField(final String fieldName, final Types.MinorType fieldType) { - newFields.add(Field.nullable(fieldName, fieldType.getType())); - - return this; - } - - /** - * Adds an empty field to the transformed {@link VectorSchemaRoot} schema. - * - * @param fieldName Name of the field to be added. - * @param fieldType Type of the field to be added. - * @return a VectorSchemaRoot instance with the current tasks. - */ - public Builder addEmptyField(final String fieldName, final ArrowType fieldType) { - newFields.add(Field.nullable(fieldName, fieldType)); - - return this; - } - - /** - * Build the {@link VectorSchemaRoot} with applied transformation tasks. - * - * @return The built {@link VectorSchemaRoot}. - */ - public VectorSchemaRootTransformer build() { - return (originalRoot, transformedRoot) -> { - if (transformedRoot == null) { - transformedRoot = VectorSchemaRoot.create(new Schema(newFields), bufferAllocator); - } - - for (final Task task : tasks) { - task.run(originalRoot, transformedRoot); - } - - transformedRoot.setRowCount(originalRoot.getRowCount()); - - originalRoot.clear(); - return transformedRoot; - }; - } - - /** - * Functional interface used to a task to transform a VectorSchemaRoot into a new - * VectorSchemaRoot. - */ - @FunctionalInterface - interface Task { - void run(VectorSchemaRoot originalRoot, VectorSchemaRoot transformedRoot); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/main/resources/META-INF/services/java.sql.Driver b/java/flight/flight-sql-jdbc-core/src/main/resources/META-INF/services/java.sql.Driver deleted file mode 100644 index 83cfb23427f71..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/resources/META-INF/services/java.sql.Driver +++ /dev/null @@ -1,15 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -org.apache.arrow.driver.jdbc.ArrowFlightJdbcDriver \ No newline at end of file diff --git a/java/flight/flight-sql-jdbc-core/src/main/resources/properties/flight.properties b/java/flight/flight-sql-jdbc-core/src/main/resources/properties/flight.properties deleted file mode 100644 index 4ba818d81bb28..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/main/resources/properties/flight.properties +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -org.apache.arrow.flight.jdbc-driver.name=${project.name} -org.apache.arrow.flight.jdbc-driver.version=${project.version} -org.apache.arrow.flight.name=${project.groupId}\:${project.artifactId} -org.apache.arrow.flight.version=${project.version} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java deleted file mode 100644 index 88a172e4f2b3f..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadataTest.java +++ /dev/null @@ -1,1523 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static com.google.protobuf.ByteString.copyFrom; -import static java.lang.String.format; -import static java.sql.Types.BIGINT; -import static java.sql.Types.BIT; -import static java.sql.Types.INTEGER; -import static java.sql.Types.JAVA_OBJECT; -import static java.util.Collections.singletonList; -import static java.util.stream.Collectors.toList; -import static java.util.stream.IntStream.range; -import static org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer.serializeSchema; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCrossReference; -import static org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportsConvert.SQL_CONVERT_BIGINT_VALUE; -import static org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportsConvert.SQL_CONVERT_BIT_VALUE; -import static org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportsConvert.SQL_CONVERT_INTEGER_VALUE; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.protobuf.Message; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.function.Consumer; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.driver.jdbc.utils.ResultSetTestUtils; -import org.apache.arrow.driver.jdbc.utils.ThrowableAssertionUtils; -import org.apache.arrow.flight.FlightProducer.ServerStreamListener; -import org.apache.arrow.flight.sql.FlightSqlProducer.Schemas; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCatalogs; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetDbSchemas; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetExportedKeys; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetImportedKeys; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetPrimaryKeys; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTableTypes; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedSubqueries; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Class containing the tests from the {@link ArrowDatabaseMetadata}. */ -@SuppressWarnings("DoubleBraceInitialization") -public class ArrowDatabaseMetadataTest { - public static final boolean EXPECTED_MAX_ROW_SIZE_INCLUDES_BLOBS = false; - private static final MockFlightSqlProducer FLIGHT_SQL_PRODUCER = new MockFlightSqlProducer(); - private static final MockFlightSqlProducer FLIGHT_SQL_PRODUCER_EMPTY_SQLINFO = - new MockFlightSqlProducer(); - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION = - FlightServerTestExtension.createStandardTestExtension(FLIGHT_SQL_PRODUCER); - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_EMPTY_SQLINFO_TEST_RULE = - FlightServerTestExtension.createStandardTestExtension(FLIGHT_SQL_PRODUCER_EMPTY_SQLINFO); - - private static final int ROW_COUNT = 10; - private static final List> EXPECTED_GET_CATALOGS_RESULTS = - range(0, ROW_COUNT) - .mapToObj(i -> format("catalog #%d", i)) - .map(Object.class::cast) - .map(Collections::singletonList) - .collect(toList()); - private static final List> EXPECTED_GET_TABLE_TYPES_RESULTS = - range(0, ROW_COUNT) - .mapToObj(i -> format("table_type #%d", i)) - .map(Object.class::cast) - .map(Collections::singletonList) - .collect(toList()); - private static final List> EXPECTED_GET_TABLES_RESULTS = - range(0, ROW_COUNT) - .mapToObj( - i -> - new Object[] { - format("catalog_name #%d", i), - format("db_schema_name #%d", i), - format("table_name #%d", i), - format("table_type #%d", i), - // TODO Add these fields to FlightSQL, as it's currently not possible to fetch - // them. - null, - null, - null, - null, - null, - null - }) - .map(Arrays::asList) - .collect(toList()); - private static final List> EXPECTED_GET_SCHEMAS_RESULTS = - range(0, ROW_COUNT) - .mapToObj( - i -> new Object[] {format("db_schema_name #%d", i), format("catalog_name #%d", i)}) - .map(Arrays::asList) - .collect(toList()); - private static final List> EXPECTED_GET_EXPORTED_AND_IMPORTED_KEYS_RESULTS = - range(0, ROW_COUNT) - .mapToObj( - i -> - new Object[] { - format("pk_catalog_name #%d", i), - format("pk_db_schema_name #%d", i), - format("pk_table_name #%d", i), - format("pk_column_name #%d", i), - format("fk_catalog_name #%d", i), - format("fk_db_schema_name #%d", i), - format("fk_table_name #%d", i), - format("fk_column_name #%d", i), - i, - format("fk_key_name #%d", i), - format("pk_key_name #%d", i), - (byte) i, - (byte) i, - // TODO Add this field to FlightSQL, as it's currently not possible to fetch it. - null - }) - .map(Arrays::asList) - .collect(toList()); - private static final List> EXPECTED_CROSS_REFERENCE_RESULTS = - EXPECTED_GET_EXPORTED_AND_IMPORTED_KEYS_RESULTS; - private static final List> EXPECTED_PRIMARY_KEYS_RESULTS = - range(0, ROW_COUNT) - .mapToObj( - i -> - new Object[] { - format("catalog_name #%d", i), - format("db_schema_name #%d", i), - format("table_name #%d", i), - format("column_name #%d", i), - i, - format("key_name #%d", i) - }) - .map(Arrays::asList) - .collect(toList()); - private static final List FIELDS_GET_IMPORTED_EXPORTED_KEYS = - ImmutableList.of( - "PKTABLE_CAT", - "PKTABLE_SCHEM", - "PKTABLE_NAME", - "PKCOLUMN_NAME", - "FKTABLE_CAT", - "FKTABLE_SCHEM", - "FKTABLE_NAME", - "FKCOLUMN_NAME", - "KEY_SEQ", - "FK_NAME", - "PK_NAME", - "UPDATE_RULE", - "DELETE_RULE", - "DEFERRABILITY"); - private static final List FIELDS_GET_CROSS_REFERENCE = FIELDS_GET_IMPORTED_EXPORTED_KEYS; - private static final String TARGET_TABLE = "TARGET_TABLE"; - private static final String TARGET_FOREIGN_TABLE = "FOREIGN_TABLE"; - private static final String EXPECTED_DATABASE_PRODUCT_NAME = "Test Server Name"; - private static final String EXPECTED_DATABASE_PRODUCT_VERSION = "v0.0.1-alpha"; - private static final String EXPECTED_IDENTIFIER_QUOTE_STRING = "\""; - private static final boolean EXPECTED_IS_READ_ONLY = true; - private static final String EXPECTED_SQL_KEYWORDS = - "ADD, ADD CONSTRAINT, ALTER, ALTER TABLE, ANY, USER, TABLE"; - private static final String EXPECTED_NUMERIC_FUNCTIONS = - "ABS(), ACOS(), ASIN(), ATAN(), CEIL(), CEILING(), COT()"; - private static final String EXPECTED_STRING_FUNCTIONS = - "ASCII, CHAR, CHARINDEX, CONCAT, CONCAT_WS, FORMAT, LEFT"; - private static final String EXPECTED_SYSTEM_FUNCTIONS = - "CAST, CONVERT, CHOOSE, ISNULL, IS_NUMERIC, IIF, TRY_CAST"; - private static final String EXPECTED_TIME_DATE_FUNCTIONS = - "GETDATE(), DATEPART(), DATEADD(), DATEDIFF()"; - private static final String EXPECTED_SEARCH_STRING_ESCAPE = "\\"; - private static final String EXPECTED_EXTRA_NAME_CHARACTERS = ""; - private static final boolean EXPECTED_SUPPORTS_COLUMN_ALIASING = true; - private static final boolean EXPECTED_NULL_PLUS_NULL_IS_NULL = true; - private static final boolean EXPECTED_SQL_SUPPORTS_CONVERT = true; - private static final boolean EXPECTED_INVALID_SQL_SUPPORTS_CONVERT = false; - private static final boolean EXPECTED_SUPPORTS_TABLE_CORRELATION_NAMES = true; - private static final boolean EXPECTED_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES = false; - private static final boolean EXPECTED_EXPRESSIONS_IN_ORDER_BY = true; - private static final boolean EXPECTED_SUPPORTS_ORDER_BY_UNRELATED = true; - private static final boolean EXPECTED_SUPPORTS_GROUP_BY = true; - private static final boolean EXPECTED_SUPPORTS_GROUP_BY_UNRELATED = true; - private static final boolean EXPECTED_SUPPORTS_LIKE_ESCAPE_CLAUSE = true; - private static final boolean EXPECTED_NON_NULLABLE_COLUMNS = true; - private static final boolean EXPECTED_MINIMUM_SQL_GRAMMAR = true; - private static final boolean EXPECTED_CORE_SQL_GRAMMAR = true; - private static final boolean EXPECTED_EXTEND_SQL_GRAMMAR = false; - private static final boolean EXPECTED_ANSI92_ENTRY_LEVEL_SQL = true; - private static final boolean EXPECTED_ANSI92_INTERMEDIATE_SQL = true; - private static final boolean EXPECTED_ANSI92_FULL_SQL = false; - private static final String EXPECTED_SCHEMA_TERM = "schema"; - private static final String EXPECTED_PROCEDURE_TERM = "procedure"; - private static final String EXPECTED_CATALOG_TERM = "catalog"; - private static final boolean EXPECTED_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY = true; - private static final boolean EXPECTED_SUPPORTS_OUTER_JOINS = true; - private static final boolean EXPECTED_SUPPORTS_FULL_OUTER_JOINS = true; - private static final boolean EXPECTED_SUPPORTS_LIMITED_JOINS = false; - private static final boolean EXPECTED_CATALOG_AT_START = true; - private static final boolean EXPECTED_SCHEMAS_IN_PROCEDURE_CALLS = true; - private static final boolean EXPECTED_SCHEMAS_IN_INDEX_DEFINITIONS = true; - private static final boolean EXPECTED_SCHEMAS_IN_PRIVILEGE_DEFINITIONS = false; - private static final boolean EXPECTED_CATALOGS_IN_INDEX_DEFINITIONS = true; - private static final boolean EXPECTED_CATALOGS_IN_PRIVILEGE_DEFINITIONS = false; - private static final boolean EXPECTED_POSITIONED_DELETE = true; - private static final boolean EXPECTED_POSITIONED_UPDATE = false; - private static final boolean EXPECTED_TYPE_FORWARD_ONLY = true; - private static final boolean EXPECTED_TYPE_SCROLL_INSENSITIVE = true; - private static final boolean EXPECTED_TYPE_SCROLL_SENSITIVE = false; - private static final boolean EXPECTED_SELECT_FOR_UPDATE_SUPPORTED = false; - private static final boolean EXPECTED_STORED_PROCEDURES_SUPPORTED = false; - private static final boolean EXPECTED_SUBQUERIES_IN_COMPARISON = true; - private static final boolean EXPECTED_SUBQUERIES_IN_EXISTS = false; - private static final boolean EXPECTED_SUBQUERIES_IN_INS = false; - private static final boolean EXPECTED_SUBQUERIES_IN_QUANTIFIEDS = false; - private static final SqlSupportedSubqueries[] EXPECTED_SUPPORTED_SUBQUERIES = - new SqlSupportedSubqueries[] {SqlSupportedSubqueries.SQL_SUBQUERIES_IN_COMPARISONS}; - private static final boolean EXPECTED_CORRELATED_SUBQUERIES_SUPPORTED = true; - private static final boolean EXPECTED_SUPPORTS_UNION = true; - private static final boolean EXPECTED_SUPPORTS_UNION_ALL = true; - private static final int EXPECTED_MAX_BINARY_LITERAL_LENGTH = 0; - private static final int EXPECTED_MAX_CHAR_LITERAL_LENGTH = 0; - private static final int EXPECTED_MAX_COLUMN_NAME_LENGTH = 1024; - private static final int EXPECTED_MAX_COLUMNS_IN_GROUP_BY = 0; - private static final int EXPECTED_MAX_COLUMNS_IN_INDEX = 0; - private static final int EXPECTED_MAX_COLUMNS_IN_ORDER_BY = 0; - private static final int EXPECTED_MAX_COLUMNS_IN_SELECT = 0; - private static final int EXPECTED_MAX_CONNECTIONS = 0; - private static final int EXPECTED_MAX_CURSOR_NAME_LENGTH = 1024; - private static final int EXPECTED_MAX_INDEX_LENGTH = 0; - private static final int EXPECTED_SCHEMA_NAME_LENGTH = 1024; - private static final int EXPECTED_MAX_PROCEDURE_NAME_LENGTH = 0; - private static final int EXPECTED_MAX_CATALOG_NAME_LENGTH = 1024; - private static final int EXPECTED_MAX_ROW_SIZE = 0; - private static final int EXPECTED_MAX_STATEMENT_LENGTH = 0; - private static final int EXPECTED_MAX_STATEMENTS = 0; - private static final int EXPECTED_MAX_TABLE_NAME_LENGTH = 1024; - private static final int EXPECTED_MAX_TABLES_IN_SELECT = 0; - private static final int EXPECTED_MAX_USERNAME_LENGTH = 1024; - private static final int EXPECTED_DEFAULT_TRANSACTION_ISOLATION = 0; - private static final boolean EXPECTED_TRANSACTIONS_SUPPORTED = false; - private static final boolean EXPECTED_TRANSACTION_NONE = false; - private static final boolean EXPECTED_TRANSACTION_READ_UNCOMMITTED = false; - private static final boolean EXPECTED_TRANSACTION_READ_COMMITTED = true; - private static final boolean EXPECTED_TRANSACTION_REPEATABLE_READ = false; - private static final boolean EXPECTED_TRANSACTION_SERIALIZABLE = true; - private static final boolean EXPECTED_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT = true; - private static final boolean EXPECTED_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED = false; - private static final boolean EXPECTED_BATCH_UPDATES_SUPPORTED = true; - private static final boolean EXPECTED_SAVEPOINTS_SUPPORTED = false; - private static final boolean EXPECTED_NAMED_PARAMETERS_SUPPORTED = false; - private static final boolean EXPECTED_LOCATORS_UPDATE_COPY = true; - private static final boolean EXPECTED_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED = false; - private static final List> EXPECTED_GET_COLUMNS_RESULTS; - private static Connection connection; - - static { - List expectedGetColumnsDataTypes = Arrays.asList(3, 93, 4); - List expectedGetColumnsTypeName = Arrays.asList("DECIMAL", "TIMESTAMP", "INTEGER"); - List expectedGetColumnsRadix = Arrays.asList(10, null, 10); - List expectedGetColumnsColumnSize = Arrays.asList(5, 29, 10); - List expectedGetColumnsDecimalDigits = Arrays.asList(2, 9, 0); - List expectedGetColumnsIsNullable = Arrays.asList("YES", "YES", "NO"); - EXPECTED_GET_COLUMNS_RESULTS = - range(0, ROW_COUNT * 3) - .mapToObj( - i -> - new Object[] { - format("catalog_name #%d", i / 3), - format("db_schema_name #%d", i / 3), - format("table_name%d", i / 3), - format("column_%d", (i % 3) + 1), - expectedGetColumnsDataTypes.get(i % 3), - expectedGetColumnsTypeName.get(i % 3), - expectedGetColumnsColumnSize.get(i % 3), - null, - expectedGetColumnsDecimalDigits.get(i % 3), - expectedGetColumnsRadix.get(i % 3), - !Objects.equals(expectedGetColumnsIsNullable.get(i % 3), "NO") ? 1 : 0, - null, - null, - null, - null, - null, - (i % 3) + 1, - expectedGetColumnsIsNullable.get(i % 3), - null, - null, - null, - null, - "", - "" - }) - .map(Arrays::asList) - .collect(toList()); - } - - public final ResultSetTestUtils resultSetTestUtils = new ResultSetTestUtils(); - - @BeforeAll - public static void setUpBeforeClass() throws SQLException { - connection = FLIGHT_SERVER_TEST_EXTENSION.getConnection(false); - - final Message commandGetCatalogs = CommandGetCatalogs.getDefaultInstance(); - final Consumer commandGetCatalogsResultProducer = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(); - final VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_CATALOGS_SCHEMA, allocator)) { - final VarCharVector catalogName = (VarCharVector) root.getVector("catalog_name"); - range(0, ROW_COUNT) - .forEach(i -> catalogName.setSafe(i, new Text(format("catalog #%d", i)))); - root.setRowCount(ROW_COUNT); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - }; - FLIGHT_SQL_PRODUCER.addCatalogQuery(commandGetCatalogs, commandGetCatalogsResultProducer); - - final Message commandGetTableTypes = CommandGetTableTypes.getDefaultInstance(); - final Consumer commandGetTableTypesResultProducer = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(); - final VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_TABLE_TYPES_SCHEMA, allocator)) { - final VarCharVector tableType = (VarCharVector) root.getVector("table_type"); - range(0, ROW_COUNT) - .forEach(i -> tableType.setSafe(i, new Text(format("table_type #%d", i)))); - root.setRowCount(ROW_COUNT); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - }; - FLIGHT_SQL_PRODUCER.addCatalogQuery(commandGetTableTypes, commandGetTableTypesResultProducer); - - final Message commandGetTables = CommandGetTables.getDefaultInstance(); - final Consumer commandGetTablesResultProducer = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(); - final VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_TABLES_SCHEMA_NO_SCHEMA, allocator)) { - final VarCharVector catalogName = (VarCharVector) root.getVector("catalog_name"); - final VarCharVector schemaName = (VarCharVector) root.getVector("db_schema_name"); - final VarCharVector tableName = (VarCharVector) root.getVector("table_name"); - final VarCharVector tableType = (VarCharVector) root.getVector("table_type"); - range(0, ROW_COUNT) - .peek(i -> catalogName.setSafe(i, new Text(format("catalog_name #%d", i)))) - .peek(i -> schemaName.setSafe(i, new Text(format("db_schema_name #%d", i)))) - .peek(i -> tableName.setSafe(i, new Text(format("table_name #%d", i)))) - .forEach(i -> tableType.setSafe(i, new Text(format("table_type #%d", i)))); - root.setRowCount(ROW_COUNT); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - }; - FLIGHT_SQL_PRODUCER.addCatalogQuery(commandGetTables, commandGetTablesResultProducer); - - final Message commandGetTablesWithSchema = - CommandGetTables.newBuilder().setIncludeSchema(true).build(); - final Consumer commandGetTablesWithSchemaResultProducer = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(); - final VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_TABLES_SCHEMA, allocator)) { - final byte[] filledTableSchemaBytes = - copyFrom( - serializeSchema( - new Schema( - Arrays.asList( - Field.nullable( - "column_1", ArrowType.Decimal.createDecimal(5, 2, 128)), - Field.nullable( - "column_2", - new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC")), - Field.notNullable("column_3", Types.MinorType.INT.getType()))))) - .toByteArray(); - final VarCharVector catalogName = (VarCharVector) root.getVector("catalog_name"); - final VarCharVector schemaName = (VarCharVector) root.getVector("db_schema_name"); - final VarCharVector tableName = (VarCharVector) root.getVector("table_name"); - final VarCharVector tableType = (VarCharVector) root.getVector("table_type"); - final VarBinaryVector tableSchema = (VarBinaryVector) root.getVector("table_schema"); - range(0, ROW_COUNT) - .peek(i -> catalogName.setSafe(i, new Text(format("catalog_name #%d", i)))) - .peek(i -> schemaName.setSafe(i, new Text(format("db_schema_name #%d", i)))) - .peek(i -> tableName.setSafe(i, new Text(format("table_name%d", i)))) - .peek(i -> tableType.setSafe(i, new Text(format("table_type #%d", i)))) - .forEach(i -> tableSchema.setSafe(i, filledTableSchemaBytes)); - root.setRowCount(ROW_COUNT); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - }; - FLIGHT_SQL_PRODUCER.addCatalogQuery( - commandGetTablesWithSchema, commandGetTablesWithSchemaResultProducer); - - final Message commandGetDbSchemas = CommandGetDbSchemas.getDefaultInstance(); - final Consumer commandGetSchemasResultProducer = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(); - final VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_SCHEMAS_SCHEMA, allocator)) { - final VarCharVector catalogName = (VarCharVector) root.getVector("catalog_name"); - final VarCharVector schemaName = (VarCharVector) root.getVector("db_schema_name"); - range(0, ROW_COUNT) - .peek(i -> catalogName.setSafe(i, new Text(format("catalog_name #%d", i)))) - .forEach(i -> schemaName.setSafe(i, new Text(format("db_schema_name #%d", i)))); - root.setRowCount(ROW_COUNT); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - }; - FLIGHT_SQL_PRODUCER.addCatalogQuery(commandGetDbSchemas, commandGetSchemasResultProducer); - - final Message commandGetExportedKeys = - CommandGetExportedKeys.newBuilder().setTable(TARGET_TABLE).build(); - final Message commandGetImportedKeys = - CommandGetImportedKeys.newBuilder().setTable(TARGET_TABLE).build(); - final Message commandGetCrossReference = - CommandGetCrossReference.newBuilder() - .setPkTable(TARGET_TABLE) - .setFkTable(TARGET_FOREIGN_TABLE) - .build(); - final Consumer commandGetExportedAndImportedKeysResultProducer = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(); - final VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_IMPORTED_KEYS_SCHEMA, allocator)) { - final VarCharVector pkCatalogName = (VarCharVector) root.getVector("pk_catalog_name"); - final VarCharVector pkSchemaName = (VarCharVector) root.getVector("pk_db_schema_name"); - final VarCharVector pkTableName = (VarCharVector) root.getVector("pk_table_name"); - final VarCharVector pkColumnName = (VarCharVector) root.getVector("pk_column_name"); - final VarCharVector fkCatalogName = (VarCharVector) root.getVector("fk_catalog_name"); - final VarCharVector fkSchemaName = (VarCharVector) root.getVector("fk_db_schema_name"); - final VarCharVector fkTableName = (VarCharVector) root.getVector("fk_table_name"); - final VarCharVector fkColumnName = (VarCharVector) root.getVector("fk_column_name"); - final IntVector keySequence = (IntVector) root.getVector("key_sequence"); - final VarCharVector fkKeyName = (VarCharVector) root.getVector("fk_key_name"); - final VarCharVector pkKeyName = (VarCharVector) root.getVector("pk_key_name"); - final UInt1Vector updateRule = (UInt1Vector) root.getVector("update_rule"); - final UInt1Vector deleteRule = (UInt1Vector) root.getVector("delete_rule"); - range(0, ROW_COUNT) - .peek(i -> pkCatalogName.setSafe(i, new Text(format("pk_catalog_name #%d", i)))) - .peek(i -> pkSchemaName.setSafe(i, new Text(format("pk_db_schema_name #%d", i)))) - .peek(i -> pkTableName.setSafe(i, new Text(format("pk_table_name #%d", i)))) - .peek(i -> pkColumnName.setSafe(i, new Text(format("pk_column_name #%d", i)))) - .peek(i -> fkCatalogName.setSafe(i, new Text(format("fk_catalog_name #%d", i)))) - .peek(i -> fkSchemaName.setSafe(i, new Text(format("fk_db_schema_name #%d", i)))) - .peek(i -> fkTableName.setSafe(i, new Text(format("fk_table_name #%d", i)))) - .peek(i -> fkColumnName.setSafe(i, new Text(format("fk_column_name #%d", i)))) - .peek(i -> keySequence.setSafe(i, i)) - .peek(i -> fkKeyName.setSafe(i, new Text(format("fk_key_name #%d", i)))) - .peek(i -> pkKeyName.setSafe(i, new Text(format("pk_key_name #%d", i)))) - .peek(i -> updateRule.setSafe(i, i)) - .forEach(i -> deleteRule.setSafe(i, i)); - root.setRowCount(ROW_COUNT); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - }; - FLIGHT_SQL_PRODUCER.addCatalogQuery( - commandGetExportedKeys, commandGetExportedAndImportedKeysResultProducer); - FLIGHT_SQL_PRODUCER.addCatalogQuery( - commandGetImportedKeys, commandGetExportedAndImportedKeysResultProducer); - FLIGHT_SQL_PRODUCER.addCatalogQuery( - commandGetCrossReference, commandGetExportedAndImportedKeysResultProducer); - - final Message commandGetPrimaryKeys = - CommandGetPrimaryKeys.newBuilder().setTable(TARGET_TABLE).build(); - final Consumer commandGetPrimaryKeysResultProducer = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(); - final VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_PRIMARY_KEYS_SCHEMA, allocator)) { - final VarCharVector catalogName = (VarCharVector) root.getVector("catalog_name"); - final VarCharVector schemaName = (VarCharVector) root.getVector("db_schema_name"); - final VarCharVector tableName = (VarCharVector) root.getVector("table_name"); - final VarCharVector columnName = (VarCharVector) root.getVector("column_name"); - final IntVector keySequence = (IntVector) root.getVector("key_sequence"); - final VarCharVector keyName = (VarCharVector) root.getVector("key_name"); - range(0, ROW_COUNT) - .peek(i -> catalogName.setSafe(i, new Text(format("catalog_name #%d", i)))) - .peek(i -> schemaName.setSafe(i, new Text(format("db_schema_name #%d", i)))) - .peek(i -> tableName.setSafe(i, new Text(format("table_name #%d", i)))) - .peek(i -> columnName.setSafe(i, new Text(format("column_name #%d", i)))) - .peek(i -> keySequence.setSafe(i, i)) - .forEach(i -> keyName.setSafe(i, new Text(format("key_name #%d", i)))); - root.setRowCount(ROW_COUNT); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - }; - FLIGHT_SQL_PRODUCER.addCatalogQuery(commandGetPrimaryKeys, commandGetPrimaryKeysResultProducer); - - FLIGHT_SQL_PRODUCER - .getSqlInfoBuilder() - .withSqlOuterJoinSupportLevel(FlightSql.SqlOuterJoinsSupportLevel.SQL_FULL_OUTER_JOINS) - .withFlightSqlServerName(EXPECTED_DATABASE_PRODUCT_NAME) - .withFlightSqlServerVersion(EXPECTED_DATABASE_PRODUCT_VERSION) - .withSqlIdentifierQuoteChar(EXPECTED_IDENTIFIER_QUOTE_STRING) - .withFlightSqlServerReadOnly(EXPECTED_IS_READ_ONLY) - .withSqlKeywords(EXPECTED_SQL_KEYWORDS.split("\\s*,\\s*")) - .withSqlNumericFunctions(EXPECTED_NUMERIC_FUNCTIONS.split("\\s*,\\s*")) - .withSqlStringFunctions(EXPECTED_STRING_FUNCTIONS.split("\\s*,\\s*")) - .withSqlSystemFunctions(EXPECTED_SYSTEM_FUNCTIONS.split("\\s*,\\s*")) - .withSqlDatetimeFunctions(EXPECTED_TIME_DATE_FUNCTIONS.split("\\s*,\\s*")) - .withSqlSearchStringEscape(EXPECTED_SEARCH_STRING_ESCAPE) - .withSqlExtraNameCharacters(EXPECTED_EXTRA_NAME_CHARACTERS) - .withSqlSupportsColumnAliasing(EXPECTED_SUPPORTS_COLUMN_ALIASING) - .withSqlNullPlusNullIsNull(EXPECTED_NULL_PLUS_NULL_IS_NULL) - .withSqlSupportsConvert( - ImmutableMap.of( - SQL_CONVERT_BIT_VALUE, - Arrays.asList(SQL_CONVERT_INTEGER_VALUE, SQL_CONVERT_BIGINT_VALUE))) - .withSqlSupportsTableCorrelationNames(EXPECTED_SUPPORTS_TABLE_CORRELATION_NAMES) - .withSqlSupportsDifferentTableCorrelationNames( - EXPECTED_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES) - .withSqlSupportsExpressionsInOrderBy(EXPECTED_EXPRESSIONS_IN_ORDER_BY) - .withSqlSupportsOrderByUnrelated(EXPECTED_SUPPORTS_ORDER_BY_UNRELATED) - .withSqlSupportedGroupBy(FlightSql.SqlSupportedGroupBy.SQL_GROUP_BY_UNRELATED) - .withSqlSupportsLikeEscapeClause(EXPECTED_SUPPORTS_LIKE_ESCAPE_CLAUSE) - .withSqlSupportsNonNullableColumns(EXPECTED_NON_NULLABLE_COLUMNS) - .withSqlSupportedGrammar( - FlightSql.SupportedSqlGrammar.SQL_CORE_GRAMMAR, - FlightSql.SupportedSqlGrammar.SQL_MINIMUM_GRAMMAR) - .withSqlAnsi92SupportedLevel( - FlightSql.SupportedAnsi92SqlGrammarLevel.ANSI92_ENTRY_SQL, - FlightSql.SupportedAnsi92SqlGrammarLevel.ANSI92_INTERMEDIATE_SQL) - .withSqlSupportsIntegrityEnhancementFacility( - EXPECTED_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY) - .withSqlSchemaTerm(EXPECTED_SCHEMA_TERM) - .withSqlCatalogTerm(EXPECTED_CATALOG_TERM) - .withSqlProcedureTerm(EXPECTED_PROCEDURE_TERM) - .withSqlCatalogAtStart(EXPECTED_CATALOG_AT_START) - .withSqlSchemasSupportedActions( - FlightSql.SqlSupportedElementActions.SQL_ELEMENT_IN_PROCEDURE_CALLS, - FlightSql.SqlSupportedElementActions.SQL_ELEMENT_IN_INDEX_DEFINITIONS) - .withSqlCatalogsSupportedActions( - FlightSql.SqlSupportedElementActions.SQL_ELEMENT_IN_INDEX_DEFINITIONS) - .withSqlSupportedPositionedCommands( - FlightSql.SqlSupportedPositionedCommands.SQL_POSITIONED_DELETE) - .withSqlSelectForUpdateSupported(EXPECTED_SELECT_FOR_UPDATE_SUPPORTED) - .withSqlStoredProceduresSupported(EXPECTED_STORED_PROCEDURES_SUPPORTED) - .withSqlSubQueriesSupported(EXPECTED_SUPPORTED_SUBQUERIES) - .withSqlCorrelatedSubqueriesSupported(EXPECTED_CORRELATED_SUBQUERIES_SUPPORTED) - .withSqlSupportedUnions(FlightSql.SqlSupportedUnions.SQL_UNION_ALL) - .withSqlMaxBinaryLiteralLength(EXPECTED_MAX_BINARY_LITERAL_LENGTH) - .withSqlMaxCharLiteralLength(EXPECTED_MAX_CHAR_LITERAL_LENGTH) - .withSqlMaxColumnNameLength(EXPECTED_MAX_COLUMN_NAME_LENGTH) - .withSqlMaxColumnsInGroupBy(EXPECTED_MAX_COLUMNS_IN_GROUP_BY) - .withSqlMaxColumnsInIndex(EXPECTED_MAX_COLUMNS_IN_INDEX) - .withSqlMaxColumnsInOrderBy(EXPECTED_MAX_COLUMNS_IN_ORDER_BY) - .withSqlMaxColumnsInSelect(EXPECTED_MAX_COLUMNS_IN_SELECT) - .withSqlMaxConnections(EXPECTED_MAX_CONNECTIONS) - .withSqlMaxCursorNameLength(EXPECTED_MAX_CURSOR_NAME_LENGTH) - .withSqlMaxIndexLength(EXPECTED_MAX_INDEX_LENGTH) - .withSqlDbSchemaNameLength(EXPECTED_SCHEMA_NAME_LENGTH) - .withSqlMaxProcedureNameLength(EXPECTED_MAX_PROCEDURE_NAME_LENGTH) - .withSqlMaxCatalogNameLength(EXPECTED_MAX_CATALOG_NAME_LENGTH) - .withSqlMaxRowSize(EXPECTED_MAX_ROW_SIZE) - .withSqlMaxRowSizeIncludesBlobs(EXPECTED_MAX_ROW_SIZE_INCLUDES_BLOBS) - .withSqlMaxStatementLength(EXPECTED_MAX_STATEMENT_LENGTH) - .withSqlMaxStatements(EXPECTED_MAX_STATEMENTS) - .withSqlMaxTableNameLength(EXPECTED_MAX_TABLE_NAME_LENGTH) - .withSqlMaxTablesInSelect(EXPECTED_MAX_TABLES_IN_SELECT) - .withSqlMaxUsernameLength(EXPECTED_MAX_USERNAME_LENGTH) - .withSqlDefaultTransactionIsolation(EXPECTED_DEFAULT_TRANSACTION_ISOLATION) - .withSqlTransactionsSupported(EXPECTED_TRANSACTIONS_SUPPORTED) - .withSqlSupportedTransactionsIsolationLevels( - FlightSql.SqlTransactionIsolationLevel.SQL_TRANSACTION_SERIALIZABLE, - FlightSql.SqlTransactionIsolationLevel.SQL_TRANSACTION_READ_COMMITTED) - .withSqlDataDefinitionCausesTransactionCommit( - EXPECTED_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT) - .withSqlDataDefinitionsInTransactionsIgnored( - EXPECTED_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED) - .withSqlSupportedResultSetTypes( - FlightSql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_FORWARD_ONLY, - FlightSql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE) - .withSqlBatchUpdatesSupported(EXPECTED_BATCH_UPDATES_SUPPORTED) - .withSqlSavepointsSupported(EXPECTED_SAVEPOINTS_SUPPORTED) - .withSqlNamedParametersSupported(EXPECTED_NAMED_PARAMETERS_SUPPORTED) - .withSqlLocatorsUpdateCopy(EXPECTED_LOCATORS_UPDATE_COPY) - .withSqlStoredFunctionsUsingCallSyntaxSupported( - EXPECTED_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED); - } - - @AfterAll - public static void tearDown() throws Exception { - AutoCloseables.close(connection, FLIGHT_SQL_PRODUCER, FLIGHT_SQL_PRODUCER_EMPTY_SQLINFO); - } - - @Test - public void testGetCatalogsCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getCatalogs()) { - resultSetTestUtils.testData(resultSet, EXPECTED_GET_CATALOGS_RESULTS); - } - } - - @Test - public void testGetCatalogsCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getCatalogs()) { - resultSetTestUtils.testData( - resultSet, singletonList("TABLE_CAT"), EXPECTED_GET_CATALOGS_RESULTS); - } - } - - @Test - public void testTableTypesCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getTableTypes()) { - resultSetTestUtils.testData(resultSet, EXPECTED_GET_TABLE_TYPES_RESULTS); - } - } - - @Test - public void testTableTypesCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getTableTypes()) { - resultSetTestUtils.testData( - resultSet, singletonList("TABLE_TYPE"), EXPECTED_GET_TABLE_TYPES_RESULTS); - } - } - - @Test - public void testGetTablesCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getTables(null, null, null, null)) { - resultSetTestUtils.testData(resultSet, EXPECTED_GET_TABLES_RESULTS); - } - } - - @Test - public void testGetTablesCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getTables(null, null, null, null)) { - resultSetTestUtils.testData( - resultSet, - ImmutableList.of( - "TABLE_CAT", - "TABLE_SCHEM", - "TABLE_NAME", - "TABLE_TYPE", - "REMARKS", - "TYPE_CAT", - "TYPE_SCHEM", - "TYPE_NAME", - "SELF_REFERENCING_COL_NAME", - "REF_GENERATION"), - EXPECTED_GET_TABLES_RESULTS); - } - } - - @Test - public void testGetSchemasCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getSchemas()) { - resultSetTestUtils.testData(resultSet, EXPECTED_GET_SCHEMAS_RESULTS); - } - } - - @Test - public void testGetSchemasCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getSchemas()) { - resultSetTestUtils.testData( - resultSet, - ImmutableList.of("TABLE_SCHEM", "TABLE_CATALOG"), - EXPECTED_GET_SCHEMAS_RESULTS); - } - } - - @Test - public void testGetExportedKeysCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = - connection.getMetaData().getExportedKeys(null, null, TARGET_TABLE)) { - resultSetTestUtils.testData(resultSet, EXPECTED_GET_EXPORTED_AND_IMPORTED_KEYS_RESULTS); - } - } - - @Test - public void testGetExportedKeysCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = - connection.getMetaData().getExportedKeys(null, null, TARGET_TABLE)) { - resultSetTestUtils.testData( - resultSet, - FIELDS_GET_IMPORTED_EXPORTED_KEYS, - EXPECTED_GET_EXPORTED_AND_IMPORTED_KEYS_RESULTS); - } - } - - @Test - public void testGetImportedKeysCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = - connection.getMetaData().getImportedKeys(null, null, TARGET_TABLE)) { - resultSetTestUtils.testData(resultSet, EXPECTED_GET_EXPORTED_AND_IMPORTED_KEYS_RESULTS); - } - } - - @Test - public void testGetImportedKeysCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = - connection.getMetaData().getImportedKeys(null, null, TARGET_TABLE)) { - resultSetTestUtils.testData( - resultSet, - FIELDS_GET_IMPORTED_EXPORTED_KEYS, - EXPECTED_GET_EXPORTED_AND_IMPORTED_KEYS_RESULTS); - } - } - - @Test - public void testGetCrossReferenceCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = - connection - .getMetaData() - .getCrossReference(null, null, TARGET_TABLE, null, null, TARGET_FOREIGN_TABLE)) { - resultSetTestUtils.testData(resultSet, EXPECTED_CROSS_REFERENCE_RESULTS); - } - } - - @Test - public void testGetGetCrossReferenceCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = - connection - .getMetaData() - .getCrossReference(null, null, TARGET_TABLE, null, null, TARGET_FOREIGN_TABLE)) { - resultSetTestUtils.testData( - resultSet, FIELDS_GET_CROSS_REFERENCE, EXPECTED_CROSS_REFERENCE_RESULTS); - } - } - - @Test - public void testPrimaryKeysCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = - connection.getMetaData().getPrimaryKeys(null, null, TARGET_TABLE)) { - resultSetTestUtils.testData(resultSet, EXPECTED_PRIMARY_KEYS_RESULTS); - } - } - - @Test - public void testPrimaryKeysCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = - connection.getMetaData().getPrimaryKeys(null, null, TARGET_TABLE)) { - resultSetTestUtils.testData( - resultSet, - ImmutableList.of( - "TABLE_CAT", "TABLE_SCHEM", "TABLE_NAME", "COLUMN_NAME", "KEY_SEQ", "PK_NAME"), - EXPECTED_PRIMARY_KEYS_RESULTS); - } - } - - @Test - public void testGetColumnsCanBeAccessedByIndices() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getColumns(null, null, null, null)) { - resultSetTestUtils.testData(resultSet, EXPECTED_GET_COLUMNS_RESULTS); - } - } - - @Test - public void testGetColumnsCanByIndicesFilteringColumnNames() throws SQLException { - try (final ResultSet resultSet = - connection.getMetaData().getColumns(null, null, null, "column_1")) { - resultSetTestUtils.testData( - resultSet, - EXPECTED_GET_COLUMNS_RESULTS.stream() - .filter(insideList -> Objects.equals(insideList.get(3), "column_1")) - .collect(toList())); - } - } - - @Test - public void testGetSqlInfo() throws SQLException { - final DatabaseMetaData metaData = connection.getMetaData(); - assertThat(metaData.getDatabaseProductName(), is(EXPECTED_DATABASE_PRODUCT_NAME)); - assertThat(metaData.getDatabaseProductVersion(), is(EXPECTED_DATABASE_PRODUCT_VERSION)); - assertThat(metaData.getIdentifierQuoteString(), is(EXPECTED_IDENTIFIER_QUOTE_STRING)); - assertThat(metaData.isReadOnly(), is(EXPECTED_IS_READ_ONLY)); - assertThat(metaData.getSQLKeywords(), is(EXPECTED_SQL_KEYWORDS)); - assertThat(metaData.getNumericFunctions(), is(EXPECTED_NUMERIC_FUNCTIONS)); - assertThat(metaData.getStringFunctions(), is(EXPECTED_STRING_FUNCTIONS)); - assertThat(metaData.getSystemFunctions(), is(EXPECTED_SYSTEM_FUNCTIONS)); - assertThat(metaData.getTimeDateFunctions(), is(EXPECTED_TIME_DATE_FUNCTIONS)); - assertThat(metaData.getSearchStringEscape(), is(EXPECTED_SEARCH_STRING_ESCAPE)); - assertThat(metaData.getExtraNameCharacters(), is(EXPECTED_EXTRA_NAME_CHARACTERS)); - assertThat(metaData.supportsConvert(), is(EXPECTED_SQL_SUPPORTS_CONVERT)); - assertThat(metaData.supportsConvert(BIT, INTEGER), is(EXPECTED_SQL_SUPPORTS_CONVERT)); - assertThat(metaData.supportsConvert(BIT, BIGINT), is(EXPECTED_SQL_SUPPORTS_CONVERT)); - assertThat( - metaData.supportsConvert(BIGINT, INTEGER), is(EXPECTED_INVALID_SQL_SUPPORTS_CONVERT)); - assertThat( - metaData.supportsConvert(JAVA_OBJECT, INTEGER), is(EXPECTED_INVALID_SQL_SUPPORTS_CONVERT)); - assertThat( - metaData.supportsTableCorrelationNames(), is(EXPECTED_SUPPORTS_TABLE_CORRELATION_NAMES)); - assertThat(metaData.supportsExpressionsInOrderBy(), is(EXPECTED_EXPRESSIONS_IN_ORDER_BY)); - assertThat(metaData.supportsOrderByUnrelated(), is(EXPECTED_SUPPORTS_ORDER_BY_UNRELATED)); - assertThat(metaData.supportsGroupBy(), is(EXPECTED_SUPPORTS_GROUP_BY)); - assertThat(metaData.supportsGroupByUnrelated(), is(EXPECTED_SUPPORTS_GROUP_BY_UNRELATED)); - assertThat(metaData.supportsLikeEscapeClause(), is(EXPECTED_SUPPORTS_LIKE_ESCAPE_CLAUSE)); - assertThat(metaData.supportsNonNullableColumns(), is(EXPECTED_NON_NULLABLE_COLUMNS)); - assertThat(metaData.supportsMinimumSQLGrammar(), is(EXPECTED_MINIMUM_SQL_GRAMMAR)); - assertThat(metaData.supportsCoreSQLGrammar(), is(EXPECTED_CORE_SQL_GRAMMAR)); - assertThat(metaData.supportsExtendedSQLGrammar(), is(EXPECTED_EXTEND_SQL_GRAMMAR)); - assertThat(metaData.supportsANSI92EntryLevelSQL(), is(EXPECTED_ANSI92_ENTRY_LEVEL_SQL)); - assertThat(metaData.supportsANSI92IntermediateSQL(), is(EXPECTED_ANSI92_INTERMEDIATE_SQL)); - assertThat(metaData.supportsANSI92FullSQL(), is(EXPECTED_ANSI92_FULL_SQL)); - assertThat(metaData.supportsOuterJoins(), is(EXPECTED_SUPPORTS_OUTER_JOINS)); - assertThat(metaData.supportsFullOuterJoins(), is(EXPECTED_SUPPORTS_FULL_OUTER_JOINS)); - assertThat(metaData.supportsLimitedOuterJoins(), is(EXPECTED_SUPPORTS_LIMITED_JOINS)); - assertThat(metaData.getSchemaTerm(), is(EXPECTED_SCHEMA_TERM)); - assertThat(metaData.getProcedureTerm(), is(EXPECTED_PROCEDURE_TERM)); - assertThat(metaData.getCatalogTerm(), is(EXPECTED_CATALOG_TERM)); - assertThat(metaData.isCatalogAtStart(), is(EXPECTED_CATALOG_AT_START)); - assertThat(metaData.supportsSchemasInProcedureCalls(), is(EXPECTED_SCHEMAS_IN_PROCEDURE_CALLS)); - assertThat( - metaData.supportsSchemasInIndexDefinitions(), is(EXPECTED_SCHEMAS_IN_INDEX_DEFINITIONS)); - assertThat( - metaData.supportsCatalogsInIndexDefinitions(), is(EXPECTED_CATALOGS_IN_INDEX_DEFINITIONS)); - assertThat(metaData.supportsPositionedDelete(), is(EXPECTED_POSITIONED_DELETE)); - assertThat(metaData.supportsPositionedUpdate(), is(EXPECTED_POSITIONED_UPDATE)); - assertThat( - metaData.supportsResultSetType(ResultSet.TYPE_FORWARD_ONLY), - is(EXPECTED_TYPE_FORWARD_ONLY)); - assertThat(metaData.supportsSelectForUpdate(), is(EXPECTED_SELECT_FOR_UPDATE_SUPPORTED)); - assertThat(metaData.supportsStoredProcedures(), is(EXPECTED_STORED_PROCEDURES_SUPPORTED)); - assertThat(metaData.supportsSubqueriesInComparisons(), is(EXPECTED_SUBQUERIES_IN_COMPARISON)); - assertThat(metaData.supportsSubqueriesInExists(), is(EXPECTED_SUBQUERIES_IN_EXISTS)); - assertThat(metaData.supportsSubqueriesInIns(), is(EXPECTED_SUBQUERIES_IN_INS)); - assertThat(metaData.supportsSubqueriesInQuantifieds(), is(EXPECTED_SUBQUERIES_IN_QUANTIFIEDS)); - assertThat( - metaData.supportsCorrelatedSubqueries(), is(EXPECTED_CORRELATED_SUBQUERIES_SUPPORTED)); - assertThat(metaData.supportsUnion(), is(EXPECTED_SUPPORTS_UNION)); - assertThat(metaData.supportsUnionAll(), is(EXPECTED_SUPPORTS_UNION_ALL)); - assertThat(metaData.getMaxBinaryLiteralLength(), is(EXPECTED_MAX_BINARY_LITERAL_LENGTH)); - assertThat(metaData.getMaxCharLiteralLength(), is(EXPECTED_MAX_CHAR_LITERAL_LENGTH)); - assertThat(metaData.getMaxColumnsInGroupBy(), is(EXPECTED_MAX_COLUMNS_IN_GROUP_BY)); - assertThat(metaData.getMaxColumnsInIndex(), is(EXPECTED_MAX_COLUMNS_IN_INDEX)); - assertThat(metaData.getMaxColumnsInOrderBy(), is(EXPECTED_MAX_COLUMNS_IN_ORDER_BY)); - assertThat(metaData.getMaxColumnsInSelect(), is(EXPECTED_MAX_COLUMNS_IN_SELECT)); - assertThat(metaData.getMaxConnections(), is(EXPECTED_MAX_CONNECTIONS)); - assertThat(metaData.getMaxCursorNameLength(), is(EXPECTED_MAX_CURSOR_NAME_LENGTH)); - assertThat(metaData.getMaxIndexLength(), is(EXPECTED_MAX_INDEX_LENGTH)); - assertThat(metaData.getMaxSchemaNameLength(), is(EXPECTED_SCHEMA_NAME_LENGTH)); - assertThat(metaData.getMaxProcedureNameLength(), is(EXPECTED_MAX_PROCEDURE_NAME_LENGTH)); - assertThat(metaData.getMaxCatalogNameLength(), is(EXPECTED_MAX_CATALOG_NAME_LENGTH)); - assertThat(metaData.getMaxRowSize(), is(EXPECTED_MAX_ROW_SIZE)); - assertThat(metaData.doesMaxRowSizeIncludeBlobs(), is(EXPECTED_MAX_ROW_SIZE_INCLUDES_BLOBS)); - assertThat(metaData.getMaxStatementLength(), is(EXPECTED_MAX_STATEMENT_LENGTH)); - assertThat(metaData.getMaxStatements(), is(EXPECTED_MAX_STATEMENTS)); - assertThat(metaData.getMaxTableNameLength(), is(EXPECTED_MAX_TABLE_NAME_LENGTH)); - assertThat(metaData.getMaxTablesInSelect(), is(EXPECTED_MAX_TABLES_IN_SELECT)); - assertThat(metaData.getMaxUserNameLength(), is(EXPECTED_MAX_USERNAME_LENGTH)); - assertThat( - metaData.getDefaultTransactionIsolation(), is(EXPECTED_DEFAULT_TRANSACTION_ISOLATION)); - assertThat(metaData.supportsTransactions(), is(EXPECTED_TRANSACTIONS_SUPPORTED)); - assertThat(metaData.supportsBatchUpdates(), is(EXPECTED_BATCH_UPDATES_SUPPORTED)); - assertThat(metaData.supportsSavepoints(), is(EXPECTED_SAVEPOINTS_SUPPORTED)); - assertThat(metaData.supportsNamedParameters(), is(EXPECTED_NAMED_PARAMETERS_SUPPORTED)); - assertThat(metaData.locatorsUpdateCopy(), is(EXPECTED_LOCATORS_UPDATE_COPY)); - - assertThat( - metaData.supportsResultSetType(ResultSet.TYPE_SCROLL_INSENSITIVE), - is(EXPECTED_TYPE_SCROLL_INSENSITIVE)); - assertThat( - metaData.supportsResultSetType(ResultSet.TYPE_SCROLL_SENSITIVE), - is(EXPECTED_TYPE_SCROLL_SENSITIVE)); - assertThat( - metaData.supportsSchemasInPrivilegeDefinitions(), - is(EXPECTED_SCHEMAS_IN_PRIVILEGE_DEFINITIONS)); - assertThat( - metaData.supportsCatalogsInPrivilegeDefinitions(), - is(EXPECTED_CATALOGS_IN_PRIVILEGE_DEFINITIONS)); - assertThat( - metaData.supportsTransactionIsolationLevel(Connection.TRANSACTION_NONE), - is(EXPECTED_TRANSACTION_NONE)); - assertThat( - metaData.supportsTransactionIsolationLevel(Connection.TRANSACTION_READ_COMMITTED), - is(EXPECTED_TRANSACTION_READ_COMMITTED)); - assertThat( - metaData.supportsTransactionIsolationLevel(Connection.TRANSACTION_READ_UNCOMMITTED), - is(EXPECTED_TRANSACTION_READ_UNCOMMITTED)); - assertThat( - metaData.supportsTransactionIsolationLevel(Connection.TRANSACTION_REPEATABLE_READ), - is(EXPECTED_TRANSACTION_REPEATABLE_READ)); - assertThat( - metaData.supportsTransactionIsolationLevel(Connection.TRANSACTION_SERIALIZABLE), - is(EXPECTED_TRANSACTION_SERIALIZABLE)); - assertThat( - metaData.dataDefinitionCausesTransactionCommit(), - is(EXPECTED_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT)); - assertThat( - metaData.dataDefinitionIgnoredInTransactions(), - is(EXPECTED_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED)); - assertThat( - metaData.supportsStoredFunctionsUsingCallSyntax(), - is(EXPECTED_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED)); - assertThat( - metaData.supportsIntegrityEnhancementFacility(), - is(EXPECTED_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY)); - assertThat( - metaData.supportsDifferentTableCorrelationNames(), - is(EXPECTED_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES)); - - ThrowableAssertionUtils.simpleAssertThrowableClass( - SQLException.class, - () -> metaData.supportsTransactionIsolationLevel(Connection.TRANSACTION_SERIALIZABLE + 1)); - ThrowableAssertionUtils.simpleAssertThrowableClass( - SQLException.class, - () -> metaData.supportsResultSetType(ResultSet.HOLD_CURSORS_OVER_COMMIT)); - } - - @Test - public void testGetColumnsCanBeAccessedByNames() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getColumns(null, null, null, null)) { - resultSetTestUtils.testData( - resultSet, - ImmutableList.of( - "TABLE_CAT", - "TABLE_SCHEM", - "TABLE_NAME", - "COLUMN_NAME", - "DATA_TYPE", - "TYPE_NAME", - "COLUMN_SIZE", - "BUFFER_LENGTH", - "DECIMAL_DIGITS", - "NUM_PREC_RADIX", - "NULLABLE", - "REMARKS", - "COLUMN_DEF", - "SQL_DATA_TYPE", - "SQL_DATETIME_SUB", - "CHAR_OCTET_LENGTH", - "ORDINAL_POSITION", - "IS_NULLABLE", - "SCOPE_CATALOG", - "SCOPE_SCHEMA", - "SCOPE_TABLE", - "SOURCE_DATA_TYPE", - "IS_AUTOINCREMENT", - "IS_GENERATEDCOLUMN"), - EXPECTED_GET_COLUMNS_RESULTS); - } - } - - @Test - public void testGetProcedures() throws SQLException { - try (final ResultSet resultSet = connection.getMetaData().getProcedures(null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetProceduresSchema = - new HashMap() { - { - put(1, "PROCEDURE_CAT"); - put(2, "PROCEDURE_SCHEM"); - put(3, "PROCEDURE_NAME"); - put(4, "FUTURE_USE1"); - put(5, "FUTURE_USE2"); - put(6, "FUTURE_USE3"); - put(7, "REMARKS"); - put(8, "PROCEDURE_TYPE"); - put(9, "SPECIFIC_NAME"); - } - }; - testEmptyResultSet(resultSet, expectedGetProceduresSchema); - } - } - - @Test - public void testGetProcedureColumns() throws SQLException { - try (ResultSet resultSet = - connection.getMetaData().getProcedureColumns(null, null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetProcedureColumnsSchema = - new HashMap() { - { - put(1, "PROCEDURE_CAT"); - put(2, "PROCEDURE_SCHEM"); - put(3, "PROCEDURE_NAME"); - put(4, "COLUMN_NAME"); - put(5, "COLUMN_TYPE"); - put(6, "DATA_TYPE"); - put(7, "TYPE_NAME"); - put(8, "PRECISION"); - put(9, "LENGTH"); - put(10, "SCALE"); - put(11, "RADIX"); - put(12, "NULLABLE"); - put(13, "REMARKS"); - put(14, "COLUMN_DEF"); - put(15, "SQL_DATA_TYPE"); - put(16, "SQL_DATETIME_SUB"); - put(17, "CHAR_OCTET_LENGTH"); - put(18, "ORDINAL_POSITION"); - put(19, "IS_NULLABLE"); - put(20, "SPECIFIC_NAME"); - } - }; - testEmptyResultSet(resultSet, expectedGetProcedureColumnsSchema); - } - } - - @Test - public void testGetColumnPrivileges() throws SQLException { - try (ResultSet resultSet = - connection.getMetaData().getColumnPrivileges(null, null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetColumnPrivilegesSchema = - new HashMap() { - { - put(1, "TABLE_CAT"); - put(2, "TABLE_SCHEM"); - put(3, "TABLE_NAME"); - put(4, "COLUMN_NAME"); - put(5, "GRANTOR"); - put(6, "GRANTEE"); - put(7, "PRIVILEGE"); - put(8, "IS_GRANTABLE"); - } - }; - testEmptyResultSet(resultSet, expectedGetColumnPrivilegesSchema); - } - } - - @Test - public void testGetTablePrivileges() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getTablePrivileges(null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetTablePrivilegesSchema = - new HashMap() { - { - put(1, "TABLE_CAT"); - put(2, "TABLE_SCHEM"); - put(3, "TABLE_NAME"); - put(4, "GRANTOR"); - put(5, "GRANTEE"); - put(6, "PRIVILEGE"); - put(7, "IS_GRANTABLE"); - } - }; - testEmptyResultSet(resultSet, expectedGetTablePrivilegesSchema); - } - } - - @Test - public void testGetBestRowIdentifier() throws SQLException { - try (ResultSet resultSet = - connection.getMetaData().getBestRowIdentifier(null, null, null, 0, true)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetBestRowIdentifierSchema = - new HashMap() { - { - put(1, "SCOPE"); - put(2, "COLUMN_NAME"); - put(3, "DATA_TYPE"); - put(4, "TYPE_NAME"); - put(5, "COLUMN_SIZE"); - put(6, "BUFFER_LENGTH"); - put(7, "DECIMAL_DIGITS"); - put(8, "PSEUDO_COLUMN"); - } - }; - testEmptyResultSet(resultSet, expectedGetBestRowIdentifierSchema); - } - } - - @Test - public void testGetVersionColumns() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getVersionColumns(null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetVersionColumnsSchema = - new HashMap() { - { - put(1, "SCOPE"); - put(2, "COLUMN_NAME"); - put(3, "DATA_TYPE"); - put(4, "TYPE_NAME"); - put(5, "COLUMN_SIZE"); - put(6, "BUFFER_LENGTH"); - put(7, "DECIMAL_DIGITS"); - put(8, "PSEUDO_COLUMN"); - } - }; - testEmptyResultSet(resultSet, expectedGetVersionColumnsSchema); - } - } - - @Test - public void testGetTypeInfo() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getTypeInfo()) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetTypeInfoSchema = - new HashMap() { - { - put(1, "TYPE_NAME"); - put(2, "DATA_TYPE"); - put(3, "PRECISION"); - put(4, "LITERAL_PREFIX"); - put(5, "LITERAL_SUFFIX"); - put(6, "CREATE_PARAMS"); - put(7, "NULLABLE"); - put(8, "CASE_SENSITIVE"); - put(9, "SEARCHABLE"); - put(10, "UNSIGNED_ATTRIBUTE"); - put(11, "FIXED_PREC_SCALE"); - put(12, "AUTO_INCREMENT"); - put(13, "LOCAL_TYPE_NAME"); - put(14, "MINIMUM_SCALE"); - put(15, "MAXIMUM_SCALE"); - put(16, "SQL_DATA_TYPE"); - put(17, "SQL_DATETIME_SUB"); - put(18, "NUM_PREC_RADIX"); - } - }; - testEmptyResultSet(resultSet, expectedGetTypeInfoSchema); - } - } - - @Test - public void testGetIndexInfo() throws SQLException { - try (ResultSet resultSet = - connection.getMetaData().getIndexInfo(null, null, null, false, true)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetIndexInfoSchema = - new HashMap() { - { - put(1, "TABLE_CAT"); - put(2, "TABLE_SCHEM"); - put(3, "TABLE_NAME"); - put(4, "NON_UNIQUE"); - put(5, "INDEX_QUALIFIER"); - put(6, "INDEX_NAME"); - put(7, "TYPE"); - put(8, "ORDINAL_POSITION"); - put(9, "COLUMN_NAME"); - put(10, "ASC_OR_DESC"); - put(11, "CARDINALITY"); - put(12, "PAGES"); - put(13, "FILTER_CONDITION"); - } - }; - testEmptyResultSet(resultSet, expectedGetIndexInfoSchema); - } - } - - @Test - public void testGetUDTs() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getUDTs(null, null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetUDTsSchema = - new HashMap() { - { - put(1, "TYPE_CAT"); - put(2, "TYPE_SCHEM"); - put(3, "TYPE_NAME"); - put(4, "CLASS_NAME"); - put(5, "DATA_TYPE"); - put(6, "REMARKS"); - put(7, "BASE_TYPE"); - } - }; - testEmptyResultSet(resultSet, expectedGetUDTsSchema); - } - } - - @Test - public void testGetSuperTypes() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getSuperTypes(null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetSuperTypesSchema = - new HashMap() { - { - put(1, "TYPE_CAT"); - put(2, "TYPE_SCHEM"); - put(3, "TYPE_NAME"); - put(4, "SUPERTYPE_CAT"); - put(5, "SUPERTYPE_SCHEM"); - put(6, "SUPERTYPE_NAME"); - } - }; - testEmptyResultSet(resultSet, expectedGetSuperTypesSchema); - } - } - - @Test - public void testGetSuperTables() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getSuperTables(null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetSuperTablesSchema = - new HashMap() { - { - put(1, "TABLE_CAT"); - put(2, "TABLE_SCHEM"); - put(3, "TABLE_NAME"); - put(4, "SUPERTABLE_NAME"); - } - }; - testEmptyResultSet(resultSet, expectedGetSuperTablesSchema); - } - } - - @Test - public void testGetAttributes() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getAttributes(null, null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetAttributesSchema = - new HashMap() { - { - put(1, "TYPE_CAT"); - put(2, "TYPE_SCHEM"); - put(3, "TYPE_NAME"); - put(4, "ATTR_NAME"); - put(5, "DATA_TYPE"); - put(6, "ATTR_TYPE_NAME"); - put(7, "ATTR_SIZE"); - put(8, "DECIMAL_DIGITS"); - put(9, "NUM_PREC_RADIX"); - put(10, "NULLABLE"); - put(11, "REMARKS"); - put(12, "ATTR_DEF"); - put(13, "SQL_DATA_TYPE"); - put(14, "SQL_DATETIME_SUB"); - put(15, "CHAR_OCTET_LENGTH"); - put(16, "ORDINAL_POSITION"); - put(17, "IS_NULLABLE"); - put(18, "SCOPE_CATALOG"); - put(19, "SCOPE_SCHEMA"); - put(20, "SCOPE_TABLE"); - put(21, "SOURCE_DATA_TYPE"); - } - }; - testEmptyResultSet(resultSet, expectedGetAttributesSchema); - } - } - - @Test - public void testGetClientInfoProperties() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getClientInfoProperties()) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetClientInfoPropertiesSchema = - new HashMap() { - { - put(1, "NAME"); - put(2, "MAX_LEN"); - put(3, "DEFAULT_VALUE"); - put(4, "DESCRIPTION"); - } - }; - testEmptyResultSet(resultSet, expectedGetClientInfoPropertiesSchema); - } - } - - @Test - public void testGetFunctions() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getFunctions(null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetFunctionsSchema = - new HashMap() { - { - put(1, "FUNCTION_CAT"); - put(2, "FUNCTION_SCHEM"); - put(3, "FUNCTION_NAME"); - put(4, "REMARKS"); - put(5, "FUNCTION_TYPE"); - put(6, "SPECIFIC_NAME"); - } - }; - testEmptyResultSet(resultSet, expectedGetFunctionsSchema); - } - } - - @Test - public void testGetFunctionColumns() throws SQLException { - try (ResultSet resultSet = - connection.getMetaData().getFunctionColumns(null, null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetFunctionColumnsSchema = - new HashMap() { - { - put(1, "FUNCTION_CAT"); - put(2, "FUNCTION_SCHEM"); - put(3, "FUNCTION_NAME"); - put(4, "COLUMN_NAME"); - put(5, "COLUMN_TYPE"); - put(6, "DATA_TYPE"); - put(7, "TYPE_NAME"); - put(8, "PRECISION"); - put(9, "LENGTH"); - put(10, "SCALE"); - put(11, "RADIX"); - put(12, "NULLABLE"); - put(13, "REMARKS"); - put(14, "CHAR_OCTET_LENGTH"); - put(15, "ORDINAL_POSITION"); - put(16, "IS_NULLABLE"); - put(17, "SPECIFIC_NAME"); - } - }; - testEmptyResultSet(resultSet, expectedGetFunctionColumnsSchema); - } - } - - @Test - public void testGetPseudoColumns() throws SQLException { - try (ResultSet resultSet = connection.getMetaData().getPseudoColumns(null, null, null, null)) { - // Maps ordinal index to column name according to JDBC documentation - final Map expectedGetPseudoColumnsSchema = - new HashMap() { - { - put(1, "TABLE_CAT"); - put(2, "TABLE_SCHEM"); - put(3, "TABLE_NAME"); - put(4, "COLUMN_NAME"); - put(5, "DATA_TYPE"); - put(6, "COLUMN_SIZE"); - put(7, "DECIMAL_DIGITS"); - put(8, "NUM_PREC_RADIX"); - put(9, "COLUMN_USAGE"); - put(10, "REMARKS"); - put(11, "CHAR_OCTET_LENGTH"); - put(12, "IS_NULLABLE"); - } - }; - testEmptyResultSet(resultSet, expectedGetPseudoColumnsSchema); - } - } - - private void testEmptyResultSet( - final ResultSet resultSet, final Map expectedResultSetSchema) - throws SQLException { - assertFalse(resultSet.next()); - final ResultSetMetaData resultSetMetaData = resultSet.getMetaData(); - for (final Map.Entry entry : expectedResultSetSchema.entrySet()) { - assertEquals(entry.getValue(), resultSetMetaData.getColumnLabel(entry.getKey())); - } - } - - @Test - public void testGetColumnSize() { - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_BYTE), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Int(Byte.SIZE, true))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_SHORT), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Int(Short.SIZE, true))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_INT), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Int(Integer.SIZE, true))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_LONG), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Int(Long.SIZE, true))); - - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_VARCHAR_AND_BINARY), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Utf8())); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_VARCHAR_AND_BINARY), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Binary())); - - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_TIMESTAMP_SECONDS), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Timestamp(TimeUnit.SECOND, null))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_TIMESTAMP_MILLISECONDS), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Timestamp(TimeUnit.MILLISECOND, null))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_TIMESTAMP_MICROSECONDS), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Timestamp(TimeUnit.MICROSECOND, null))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_TIMESTAMP_NANOSECONDS), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null))); - - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_TIME), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Time(TimeUnit.SECOND, Integer.SIZE))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_TIME_MILLISECONDS), - ArrowDatabaseMetadata.getColumnSize( - new ArrowType.Time(TimeUnit.MILLISECOND, Integer.SIZE))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_TIME_MICROSECONDS), - ArrowDatabaseMetadata.getColumnSize( - new ArrowType.Time(TimeUnit.MICROSECOND, Integer.SIZE))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_TIME_NANOSECONDS), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Time(TimeUnit.NANOSECOND, Integer.SIZE))); - - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.COLUMN_SIZE_DATE), - ArrowDatabaseMetadata.getColumnSize(new ArrowType.Date(DateUnit.DAY))); - - assertNull( - ArrowDatabaseMetadata.getColumnSize( - new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))); - } - - @Test - public void testGetDecimalDigits() { - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.NO_DECIMAL_DIGITS), - ArrowDatabaseMetadata.getDecimalDigits(new ArrowType.Int(Byte.SIZE, true))); - - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.NO_DECIMAL_DIGITS), - ArrowDatabaseMetadata.getDecimalDigits(new ArrowType.Timestamp(TimeUnit.SECOND, null))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.DECIMAL_DIGITS_TIME_MILLISECONDS), - ArrowDatabaseMetadata.getDecimalDigits( - new ArrowType.Timestamp(TimeUnit.MILLISECOND, null))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.DECIMAL_DIGITS_TIME_MICROSECONDS), - ArrowDatabaseMetadata.getDecimalDigits( - new ArrowType.Timestamp(TimeUnit.MICROSECOND, null))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.DECIMAL_DIGITS_TIME_NANOSECONDS), - ArrowDatabaseMetadata.getDecimalDigits(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null))); - - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.NO_DECIMAL_DIGITS), - ArrowDatabaseMetadata.getDecimalDigits(new ArrowType.Time(TimeUnit.SECOND, Integer.SIZE))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.DECIMAL_DIGITS_TIME_MILLISECONDS), - ArrowDatabaseMetadata.getDecimalDigits( - new ArrowType.Time(TimeUnit.MILLISECOND, Integer.SIZE))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.DECIMAL_DIGITS_TIME_MICROSECONDS), - ArrowDatabaseMetadata.getDecimalDigits( - new ArrowType.Time(TimeUnit.MICROSECOND, Integer.SIZE))); - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.DECIMAL_DIGITS_TIME_NANOSECONDS), - ArrowDatabaseMetadata.getDecimalDigits( - new ArrowType.Time(TimeUnit.NANOSECOND, Integer.SIZE))); - - assertEquals( - Integer.valueOf(ArrowDatabaseMetadata.NO_DECIMAL_DIGITS), - ArrowDatabaseMetadata.getDecimalDigits(new ArrowType.Date(DateUnit.DAY))); - - assertNull(ArrowDatabaseMetadata.getDecimalDigits(new ArrowType.Utf8())); - } - - @Test - public void testSqlToRegexLike() { - assertEquals(".*", ArrowDatabaseMetadata.sqlToRegexLike("%")); - assertEquals(".", ArrowDatabaseMetadata.sqlToRegexLike("_")); - assertEquals("\\*", ArrowDatabaseMetadata.sqlToRegexLike("*")); - assertEquals("T\\*E.S.*T", ArrowDatabaseMetadata.sqlToRegexLike("T*E_S%T")); - } - - @Test - public void testEmptySqlInfo() throws Exception { - try (final Connection testConnection = - FLIGHT_SERVER_EMPTY_SQLINFO_TEST_RULE.getConnection(false)) { - final DatabaseMetaData metaData = testConnection.getMetaData(); - assertThat(metaData.getSQLKeywords(), is("")); - assertThat(metaData.getNumericFunctions(), is("")); - assertThat(metaData.getStringFunctions(), is("")); - assertThat(metaData.getSystemFunctions(), is("")); - assertThat(metaData.getTimeDateFunctions(), is("")); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java deleted file mode 100644 index 06d101724cd0d..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.sql.Types; -import java.util.Arrays; -import java.util.HashMap; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -public class ArrowFlightJdbcArrayTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - IntVector dataVector; - - @BeforeEach - public void setup() { - dataVector = rootAllocatorTestExtension.createIntVector(); - } - - @AfterEach - public void tearDown() { - this.dataVector.close(); - } - - @Test - public void testShouldGetBaseTypeNameReturnCorrectTypeName() { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - assertEquals("INTEGER", arrowFlightJdbcArray.getBaseTypeName()); - } - - @Test - public void testShouldGetBaseTypeReturnCorrectType() { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - assertEquals(Types.INTEGER, arrowFlightJdbcArray.getBaseType()); - } - - @Test - public void testShouldGetArrayReturnValidArray() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - Object[] array = (Object[]) arrowFlightJdbcArray.getArray(); - - Object[] expected = new Object[dataVector.getValueCount()]; - for (int i = 0; i < expected.length; i++) { - expected[i] = dataVector.getObject(i); - } - assertArrayEquals(array, expected); - } - - @Test - public void testShouldGetArrayReturnValidArrayWithOffsets() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - Object[] array = (Object[]) arrowFlightJdbcArray.getArray(1, 5); - - Object[] expected = new Object[5]; - for (int i = 0; i < expected.length; i++) { - expected[i] = dataVector.getObject(i + 1); - } - assertArrayEquals(array, expected); - } - - @Test - public void testShouldGetArrayWithOffsetsThrowArrayIndexOutOfBoundsException() - throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - assertThrows( - ArrayIndexOutOfBoundsException.class, - () -> arrowFlightJdbcArray.getArray(0, dataVector.getValueCount() + 1)); - } - - @Test - public void testShouldGetArrayWithMapNotBeSupported() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - HashMap> map = new HashMap<>(); - assertThrows(SQLFeatureNotSupportedException.class, () -> arrowFlightJdbcArray.getArray(map)); - } - - @Test - public void testShouldGetArrayWithOffsetsAndMapNotBeSupported() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - HashMap> map = new HashMap<>(); - assertThrows( - SQLFeatureNotSupportedException.class, () -> arrowFlightJdbcArray.getArray(0, 5, map)); - } - - @Test - public void testShouldGetResultSetReturnValidResultSet() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - try (ResultSet resultSet = arrowFlightJdbcArray.getResultSet()) { - int count = 0; - while (resultSet.next()) { - assertEquals((Object) resultSet.getInt(1), dataVector.getObject(count)); - count++; - } - } - } - - @Test - public void testShouldGetResultSetReturnValidResultSetWithOffsets() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - try (ResultSet resultSet = arrowFlightJdbcArray.getResultSet(3, 5)) { - int count = 0; - while (resultSet.next()) { - assertEquals((Object) resultSet.getInt(1), dataVector.getObject(count + 3)); - count++; - } - assertEquals(5, count); - } - } - - @Test - public void testToString() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - - JsonStringArrayList array = new JsonStringArrayList<>(); - array.addAll(Arrays.asList((Object[]) arrowFlightJdbcArray.getArray())); - - assertEquals(array.toString(), arrowFlightJdbcArray.toString()); - } - - @Test - public void testShouldGetResultSetWithMapNotBeSupported() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - HashMap> map = new HashMap<>(); - assertThrows( - SQLFeatureNotSupportedException.class, () -> arrowFlightJdbcArray.getResultSet(map)); - } - - @Test - public void testShouldGetResultSetWithOffsetsAndMapNotBeSupported() throws SQLException { - ArrowFlightJdbcArray arrowFlightJdbcArray = - new ArrowFlightJdbcArray(dataVector, 0, dataVector.getValueCount()); - HashMap> map = new HashMap<>(); - assertThrows( - SQLFeatureNotSupportedException.class, () -> arrowFlightJdbcArray.getResultSet(0, 5, map)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionCookieTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionCookieTest.java deleted file mode 100644 index 1977b61392eb4..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionCookieTest.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; - -import java.sql.Connection; -import java.sql.SQLException; -import java.sql.Statement; -import org.apache.arrow.driver.jdbc.utils.CoreMockedSqlProducers; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcConnectionCookieTest { - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION = - FlightServerTestExtension.createStandardTestExtension( - CoreMockedSqlProducers.getLegacyProducer()); - - @Test - public void testCookies() throws SQLException { - try (Connection connection = FLIGHT_SERVER_TEST_EXTENSION.getConnection(false); - Statement statement = connection.createStatement()) { - - // Expect client didn't receive cookies before any operation - assertNull(FLIGHT_SERVER_TEST_EXTENSION.getMiddlewareCookieFactory().getCookie()); - - // Run another action for check if the cookies was sent by the server. - statement.execute(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD); - assertEquals("k=v", FLIGHT_SERVER_TEST_EXTENSION.getMiddlewareCookieFactory().getCookie()); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSourceTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSourceTest.java deleted file mode 100644 index 18ff45bbde6f6..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcConnectionPoolDataSourceTest.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.sql.Connection; -import javax.sql.PooledConnection; -import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; -import org.apache.arrow.driver.jdbc.utils.ConnectionWrapper; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcConnectionPoolDataSourceTest { - - @RegisterExtension public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION; - - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - - static { - UserPasswordAuthentication authentication = - new UserPasswordAuthentication.Builder() - .user("user1", "pass1") - .user("user2", "pass2") - .build(); - - FLIGHT_SERVER_TEST_EXTENSION = - new FlightServerTestExtension.Builder() - .authentication(authentication) - .producer(PRODUCER) - .build(); - } - - private ArrowFlightJdbcConnectionPoolDataSource dataSource; - - @BeforeEach - public void setUp() { - dataSource = FLIGHT_SERVER_TEST_EXTENSION.createConnectionPoolDataSource(false); - } - - @AfterEach - public void tearDown() throws Exception { - dataSource.close(); - } - - @Test - public void testShouldInnerConnectionIsClosedReturnCorrectly() throws Exception { - PooledConnection pooledConnection = dataSource.getPooledConnection(); - Connection connection = pooledConnection.getConnection(); - assertFalse(connection.isClosed()); - connection.close(); - assertTrue(connection.isClosed()); - } - - @Test - public void testShouldInnerConnectionShouldIgnoreDoubleClose() throws Exception { - PooledConnection pooledConnection = dataSource.getPooledConnection(); - Connection connection = pooledConnection.getConnection(); - assertFalse(connection.isClosed()); - connection.close(); - assertTrue(connection.isClosed()); - } - - @Test - public void testShouldInnerConnectionIsClosedReturnTrueIfPooledConnectionCloses() - throws Exception { - PooledConnection pooledConnection = dataSource.getPooledConnection(); - Connection connection = pooledConnection.getConnection(); - assertFalse(connection.isClosed()); - pooledConnection.close(); - assertTrue(connection.isClosed()); - } - - @Test - public void testShouldReuseConnectionsOnPool() throws Exception { - PooledConnection pooledConnection = dataSource.getPooledConnection("user1", "pass1"); - ConnectionWrapper connection = ((ConnectionWrapper) pooledConnection.getConnection()); - assertFalse(connection.isClosed()); - connection.close(); - assertTrue(connection.isClosed()); - assertFalse(connection.unwrap(ArrowFlightConnection.class).isClosed()); - - PooledConnection pooledConnection2 = dataSource.getPooledConnection("user1", "pass1"); - ConnectionWrapper connection2 = ((ConnectionWrapper) pooledConnection2.getConnection()); - assertFalse(connection2.isClosed()); - connection2.close(); - assertTrue(connection2.isClosed()); - assertFalse(connection2.unwrap(ArrowFlightConnection.class).isClosed()); - - assertSame(pooledConnection, pooledConnection2); - assertNotSame(connection, connection2); - assertSame( - connection.unwrap(ArrowFlightConnection.class), - connection2.unwrap(ArrowFlightConnection.class)); - } - - @Test - public void testShouldNotMixConnectionsForDifferentUsers() throws Exception { - PooledConnection pooledConnection = dataSource.getPooledConnection("user1", "pass1"); - ConnectionWrapper connection = ((ConnectionWrapper) pooledConnection.getConnection()); - assertFalse(connection.isClosed()); - connection.close(); - assertTrue(connection.isClosed()); - assertFalse(connection.unwrap(ArrowFlightConnection.class).isClosed()); - - PooledConnection pooledConnection2 = dataSource.getPooledConnection("user2", "pass2"); - ConnectionWrapper connection2 = ((ConnectionWrapper) pooledConnection2.getConnection()); - assertFalse(connection2.isClosed()); - connection2.close(); - assertTrue(connection2.isClosed()); - assertFalse(connection2.unwrap(ArrowFlightConnection.class).isClosed()); - - assertNotSame(pooledConnection, pooledConnection2); - assertNotSame(connection, connection2); - assertNotSame( - connection.unwrap(ArrowFlightConnection.class), - connection2.unwrap(ArrowFlightConnection.class)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursorTest.java deleted file mode 100644 index 5cfe8ae74e567..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcCursorTest.java +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.common.collect.ImmutableList; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.calcite.avatica.util.Cursor; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Test; - -/** Tests for {@link ArrowFlightJdbcCursor}. */ -public class ArrowFlightJdbcCursorTest { - - ArrowFlightJdbcCursor cursor; - BufferAllocator allocator; - - @AfterEach - public void cleanUp() { - allocator.close(); - cursor.close(); - } - - @Test - public void testBinaryVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = getVectorSchemaRoot("Binary", new ArrowType.Binary(), null); - ((VarBinaryVector) root.getVector("Binary")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testDateVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot("Date", new ArrowType.Date(DateUnit.DAY), null); - ((DateDayVector) root.getVector("Date")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testDurationVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot("Duration", new ArrowType.Duration(TimeUnit.MILLISECOND), null); - ((DurationVector) root.getVector("Duration")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testDateInternalNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot("Interval", new ArrowType.Interval(IntervalUnit.DAY_TIME), null); - ((IntervalDayVector) root.getVector("Interval")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testTimeStampVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot("TimeStamp", new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), null); - ((TimeStampMilliVector) root.getVector("TimeStamp")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testTimeVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot("Time", new ArrowType.Time(TimeUnit.MILLISECOND, 32), null); - ((TimeMilliVector) root.getVector("Time")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testFixedSizeListVectorNullTrue() throws SQLException { - List fieldList = new ArrayList<>(); - fieldList.add(new Field("Null", new FieldType(true, new ArrowType.Null(), null), null)); - final VectorSchemaRoot root = - getVectorSchemaRoot("FixedSizeList", new ArrowType.FixedSizeList(10), fieldList); - ((FixedSizeListVector) root.getVector("FixedSizeList")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testLargeListVectorNullTrue() throws SQLException { - List fieldList = new ArrayList<>(); - fieldList.add(new Field("Null", new FieldType(true, new ArrowType.Null(), null), null)); - final VectorSchemaRoot root = - getVectorSchemaRoot("LargeList", new ArrowType.LargeList(), fieldList); - ((LargeListVector) root.getVector("LargeList")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testListVectorNullTrue() throws SQLException { - List fieldList = new ArrayList<>(); - fieldList.add(new Field("Null", new FieldType(true, new ArrowType.Null(), null), null)); - final VectorSchemaRoot root = getVectorSchemaRoot("List", new ArrowType.List(), fieldList); - ((ListVector) root.getVector("List")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testMapVectorNullTrue() throws SQLException { - List structChildren = new ArrayList<>(); - structChildren.add(new Field("Key", new FieldType(false, new ArrowType.Utf8(), null), null)); - structChildren.add(new Field("Value", new FieldType(false, new ArrowType.Utf8(), null), null)); - List fieldList = new ArrayList<>(); - fieldList.add( - new Field("Struct", new FieldType(false, new ArrowType.Struct(), null), structChildren)); - final VectorSchemaRoot root = getVectorSchemaRoot("Map", new ArrowType.Map(false), fieldList); - ((MapVector) root.getVector("Map")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testStructVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = getVectorSchemaRoot("Struct", new ArrowType.Struct(), null); - ((StructVector) root.getVector("Struct")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testBaseIntVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot("BaseInt", new ArrowType.Int(32, false), null); - ((UInt4Vector) root.getVector("BaseInt")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testBitVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = getVectorSchemaRoot("Bit", new ArrowType.Bool(), null); - ((BitVector) root.getVector("Bit")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testDecimalVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot("Decimal", new ArrowType.Decimal(2, 2, 128), null); - ((DecimalVector) root.getVector("Decimal")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testFloat4VectorNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot( - "Float4", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null); - ((Float4Vector) root.getVector("Float4")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testFloat8VectorNullTrue() throws SQLException { - final VectorSchemaRoot root = - getVectorSchemaRoot( - "Float8", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null); - ((Float8Vector) root.getVector("Float8")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testVarCharVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = getVectorSchemaRoot("VarChar", new ArrowType.Utf8(), null); - ((VarCharVector) root.getVector("VarChar")).setNull(0); - testCursorWasNull(root); - } - - @Test - public void testNullVectorNullTrue() throws SQLException { - final VectorSchemaRoot root = getVectorSchemaRoot("Null", new ArrowType.Null(), null); - testCursorWasNull(root); - } - - private VectorSchemaRoot getVectorSchemaRoot( - String name, ArrowType arrowType, List children) { - final Schema schema = - new Schema( - ImmutableList.of(new Field(name, new FieldType(true, arrowType, null), children))); - allocator = new RootAllocator(Long.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - root.allocateNew(); - return root; - } - - private void testCursorWasNull(VectorSchemaRoot root) throws SQLException { - root.setRowCount(1); - cursor = new ArrowFlightJdbcCursor(root); - cursor.next(); - List accessorList = cursor.createAccessors(null, null, null); - accessorList.get(0).getObject(); - assertTrue(cursor.wasNull()); - root.close(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java deleted file mode 100644 index ae355829d7ecc..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.sql.Connection; -import java.sql.Driver; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.util.Collection; -import java.util.Map; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Tests for {@link ArrowFlightJdbcDriver}. */ -public class ArrowFlightJdbcDriverTest { - - @RegisterExtension public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION; - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - - static { - UserPasswordAuthentication authentication = - new UserPasswordAuthentication.Builder() - .user("user1", "pass1") - .user("user2", "pass2") - .build(); - - FLIGHT_SERVER_TEST_EXTENSION = - new FlightServerTestExtension.Builder() - .authentication(authentication) - .producer(PRODUCER) - .build(); - } - - private BufferAllocator allocator; - private ArrowFlightJdbcConnectionPoolDataSource dataSource; - - @BeforeEach - public void setUp() throws Exception { - allocator = new RootAllocator(Long.MAX_VALUE); - dataSource = FLIGHT_SERVER_TEST_EXTENSION.createConnectionPoolDataSource(); - } - - @AfterEach - public void tearDown() throws Exception { - Collection childAllocators = allocator.getChildAllocators(); - AutoCloseables.close(childAllocators.toArray(new AutoCloseable[0])); - AutoCloseables.close(dataSource, allocator); - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} is registered in the {@link DriverManager}. - * - * @throws SQLException If an error occurs. (This is not supposed to happen.) - */ - @Test - public void testDriverIsRegisteredInDriverManager() throws Exception { - assertTrue( - DriverManager.getDriver("jdbc:arrow-flight://localhost:32010") - instanceof ArrowFlightJdbcDriver); - assertTrue( - DriverManager.getDriver("jdbc:arrow-flight-sql://localhost:32010") - instanceof ArrowFlightJdbcDriver); - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} returns null when provided with an unsupported - * URL prefix. - */ - @Test - public void testShouldDeclineUrlWithUnsupportedPrefix() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - - assertNull( - driver.connect( - "jdbc:mysql://localhost:32010", dataSource.getProperties("flight", "flight123"))); - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} can establish a successful connection to the - * Arrow Flight client. - * - * @throws Exception If the connection fails to be established. - */ - @Test - public void testShouldConnectWhenProvidedWithValidUrl() throws Exception { - // Get the Arrow Flight JDBC driver by providing a URL with a valid prefix. - final Driver driver = new ArrowFlightJdbcDriver(); - - try (Connection connection = - driver.connect( - "jdbc:arrow-flight://" - + dataSource.getConfig().getHost() - + ":" - + dataSource.getConfig().getPort() - + "?" - + "useEncryption=false", - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { - assertTrue(connection.isValid(300)); - } - try (Connection connection = - driver.connect( - "jdbc:arrow-flight-sql://" - + dataSource.getConfig().getHost() - + ":" - + dataSource.getConfig().getPort() - + "?" - + "useEncryption=false", - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { - assertTrue(connection.isValid(300)); - } - } - - @Test - public void testConnectWithInsensitiveCasePropertyKeys() throws Exception { - // Get the Arrow Flight JDBC driver by providing a URL with insensitive case property keys. - final Driver driver = new ArrowFlightJdbcDriver(); - - try (Connection connection = - driver.connect( - "jdbc:arrow-flight://" - + dataSource.getConfig().getHost() - + ":" - + dataSource.getConfig().getPort() - + "?" - + "UseEncryptIon=false", - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { - assertTrue(connection.isValid(300)); - } - try (Connection connection = - driver.connect( - "jdbc:arrow-flight-sql://" - + dataSource.getConfig().getHost() - + ":" - + dataSource.getConfig().getPort() - + "?" - + "UseEncryptIon=false", - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) { - assertTrue(connection.isValid(300)); - } - } - - @Test - public void testConnectWithInsensitiveCasePropertyKeys2() throws Exception { - // Get the Arrow Flight JDBC driver by providing a property object with insensitive case keys. - final Driver driver = new ArrowFlightJdbcDriver(); - Properties properties = - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()); - properties.put("UseEncryptIon", "false"); - - try (Connection connection = - driver.connect( - "jdbc:arrow-flight://" - + dataSource.getConfig().getHost() - + ":" - + dataSource.getConfig().getPort(), - properties)) { - assertTrue(connection.isValid(300)); - } - try (Connection connection = - driver.connect( - "jdbc:arrow-flight-sql://" - + dataSource.getConfig().getHost() - + ":" - + dataSource.getConfig().getPort(), - properties)) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Tests whether an exception is thrown upon attempting to connect to a malformed URI. - * - * @throws SQLException If an error occurs. - */ - @Test - public void testShouldThrowExceptionWhenAttemptingToConnectToMalformedUrl() throws SQLException { - final Driver driver = new ArrowFlightJdbcDriver(); - final String malformedUri = "yes:??/chainsaw.i=T333"; - - assertThrows( - SQLException.class, - () -> driver.connect(malformedUri, dataSource.getProperties("flight", "flight123"))); - } - - /** - * Tests whether an exception is thrown upon attempting to connect to a malformed URI. - * - * @throws SQLException If an error occurs. - */ - @Test - public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoPrefix() throws SQLException { - final Driver driver = new ArrowFlightJdbcDriver(); - final String malformedUri = "localhost:32010"; - - assertThrows( - SQLException.class, - () -> - driver.connect( - malformedUri, - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))); - } - - /** Tests whether an exception is thrown upon attempting to connect to a malformed URI. */ - @Test - public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoPort() { - final Driver driver = new ArrowFlightJdbcDriver(); - SQLException e = - assertThrows( - SQLException.class, - () -> { - Properties properties = - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()); - Connection conn = driver.connect("jdbc:arrow-flight://localhost", properties); - conn.close(); - }); - assertTrue(e.getMessage().contains("URL must have a port")); - e = - assertThrows( - SQLException.class, - () -> { - Properties properties = - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()); - Connection conn = driver.connect("jdbc:arrow-flight-sql://localhost", properties); - conn.close(); - }); - assertTrue(e.getMessage().contains("URL must have a port")); - } - - /** Tests whether an exception is thrown upon attempting to connect to a malformed URI. */ - @Test - public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoHost() { - final Driver driver = new ArrowFlightJdbcDriver(); - SQLException e = - assertThrows( - SQLException.class, - () -> { - Properties properties = - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()); - Connection conn = driver.connect("jdbc:arrow-flight://32010:localhost", properties); - conn.close(); - }); - assertTrue(e.getMessage().contains("URL must have a host")); - - e = - assertThrows( - SQLException.class, - () -> { - Properties properties = - dataSource.getProperties( - dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()); - Connection conn = - driver.connect("jdbc:arrow-flight-sql://32010:localhost", properties); - conn.close(); - }); - assertTrue(e.getMessage().contains("URL must have a host")); - } - - /** - * Tests whether {@link ArrowFlightJdbcDriver#getUrlsArgs} returns the correct URL parameters. - * - * @throws Exception If an error occurs. - */ - @Test - public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrl() throws Exception { - final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); - - final Map parsedArgs = - driver - .getUrlsArgs("jdbc:arrow-flight-sql://localhost:2222/?key1=value1&key2=value2&a=b") - .orElseThrow(() -> new RuntimeException("URL was rejected")); - - // Check size == the amount of args provided (scheme not included) - assertEquals(5, parsedArgs.size()); - - // Check host == the provided host - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.HOST.camelName()), "localhost"); - - // Check port == the provided port - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.PORT.camelName()), 2222); - - // Check all other non-default arguments - assertEquals(parsedArgs.get("key1"), "value1"); - assertEquals(parsedArgs.get("key2"), "value2"); - assertEquals(parsedArgs.get("a"), "b"); - } - - @Test - public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrlWithSemicolon() - throws Exception { - final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); - final Map parsedArgs = - driver - .getUrlsArgs("jdbc:arrow-flight-sql://localhost:2222/;key1=value1;key2=value2;a=b") - .orElseThrow(() -> new RuntimeException("URL was rejected")); - - // Check size == the amount of args provided (scheme not included) - assertEquals(5, parsedArgs.size()); - - // Check host == the provided host - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.HOST.camelName()), "localhost"); - - // Check port == the provided port - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.PORT.camelName()), 2222); - - // Check all other non-default arguments - assertEquals(parsedArgs.get("key1"), "value1"); - assertEquals(parsedArgs.get("key2"), "value2"); - assertEquals(parsedArgs.get("a"), "b"); - } - - @Test - public void testDriverUrlParsingMechanismShouldReturnTheDesiredArgsFromUrlWithOneSemicolon() - throws Exception { - final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); - final Map parsedArgs = - driver - .getUrlsArgs("jdbc:arrow-flight-sql://localhost:2222/;key1=value1") - .orElseThrow(() -> new RuntimeException("URL was rejected")); - - // Check size == the amount of args provided (scheme not included) - assertEquals(3, parsedArgs.size()); - - // Check host == the provided host - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.HOST.camelName()), "localhost"); - - // Check port == the provided port - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.PORT.camelName()), 2222); - - // Check all other non-default arguments - assertEquals(parsedArgs.get("key1"), "value1"); - } - - @Test - public void testDriverUrlParsingMechanismShouldReturnEmptyOptionalForUnknownScheme() - throws SQLException { - final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); - assertFalse(driver.getUrlsArgs("jdbc:malformed-url-flight://localhost:2222").isPresent()); - } - - /** - * Tests whether {@code ArrowFlightJdbcDriverTest#getUrlsArgs} returns the correct URL parameters - * when the host is an IP Address. - * - * @throws Exception If an error occurs. - */ - @Test - public void testDriverUrlParsingMechanismShouldWorkWithIPAddress() throws Exception { - final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); - final Map parsedArgs = - driver - .getUrlsArgs("jdbc:arrow-flight-sql://0.0.0.0:2222") - .orElseThrow(() -> new RuntimeException("URL was rejected")); - - // Check size == the amount of args provided (scheme not included) - assertEquals(2, parsedArgs.size()); - - // Check host == the provided host - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.HOST.camelName()), "0.0.0.0"); - - // Check port == the provided port - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.PORT.camelName()), 2222); - } - - /** - * Tests whether {@code ArrowFlightJdbcDriverTest#getUrlsArgs} escape especial characters and - * returns the correct URL parameters when the especial character '&' is embedded in the query - * parameters values. - * - * @throws Exception If an error occurs. - */ - @Test - public void testDriverUrlParsingMechanismShouldWorkWithEmbeddedEspecialCharacter() - throws Exception { - final ArrowFlightJdbcDriver driver = new ArrowFlightJdbcDriver(); - final Map parsedArgs = - driver - .getUrlsArgs( - "jdbc:arrow-flight-sql://0.0.0.0:2222?test1=test1value&test2%26continue=test2value&test3=test3value") - .orElseThrow(() -> new RuntimeException("URL was rejected")); - - // Check size == the amount of args provided (scheme not included) - assertEquals(5, parsedArgs.size()); - - // Check host == the provided host - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.HOST.camelName()), "0.0.0.0"); - - // Check port == the provided port - assertEquals(parsedArgs.get(ArrowFlightConnectionProperty.PORT.camelName()), 2222); - - // Check all other non-default arguments - assertEquals(parsedArgs.get("test1"), "test1value"); - assertEquals(parsedArgs.get("test2&continue"), "test2value"); - assertEquals(parsedArgs.get("test3"), "test3value"); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java deleted file mode 100644 index d3b0c3700846a..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcFactoryTest.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import com.google.common.collect.ImmutableMap; -import java.lang.reflect.Constructor; -import java.sql.Connection; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.calcite.avatica.UnregisteredDriver; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Tests for {@link ArrowFlightJdbcDriver}. */ -public class ArrowFlightJdbcFactoryTest { - - @RegisterExtension public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION; - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - - static { - UserPasswordAuthentication authentication = - new UserPasswordAuthentication.Builder() - .user("user1", "pass1") - .user("user2", "pass2") - .build(); - - FLIGHT_SERVER_TEST_EXTENSION = - new FlightServerTestExtension.Builder() - .authentication(authentication) - .producer(PRODUCER) - .build(); - } - - private BufferAllocator allocator; - private ArrowFlightJdbcConnectionPoolDataSource dataSource; - - @BeforeEach - public void setUp() throws Exception { - allocator = new RootAllocator(Long.MAX_VALUE); - dataSource = FLIGHT_SERVER_TEST_EXTENSION.createConnectionPoolDataSource(); - } - - @AfterEach - public void tearDown() throws Exception { - AutoCloseables.close(dataSource, allocator); - } - - @Test - public void testShouldBeAbleToEstablishAConnectionSuccessfully() throws Exception { - UnregisteredDriver driver = new ArrowFlightJdbcDriver(); - Constructor constructor = ArrowFlightJdbcFactory.class.getConstructor(); - constructor.setAccessible(true); - ArrowFlightJdbcFactory factory = constructor.newInstance(); - - final Properties properties = new Properties(); - properties.putAll( - ImmutableMap.of( - ArrowFlightConnectionProperty.HOST.camelName(), - "localhost", - ArrowFlightConnectionProperty.PORT.camelName(), - 32010, - ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), - false)); - - try (Connection connection = - factory.newConnection( - driver, - constructor.newInstance(), - "jdbc:arrow-flight-sql://localhost:32010", - properties)) { - assert connection.isValid(300); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTimeTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTimeTest.java deleted file mode 100644 index aa499d805bcf2..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcTimeTest.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.hamcrest.CoreMatchers.endsWith; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.time.LocalTime; -import java.util.concurrent.TimeUnit; -import org.junit.jupiter.api.Test; - -public class ArrowFlightJdbcTimeTest { - - final int hour = 5; - final int minute = 6; - final int second = 7; - - @Test - public void testPrintingMillisNoLeadingZeroes() { - // testing the regular case where the precision of the millisecond is 3 - LocalTime dateTime = - LocalTime.of(hour, minute, second, (int) TimeUnit.MILLISECONDS.toNanos(999)); - ArrowFlightJdbcTime time = new ArrowFlightJdbcTime(dateTime); - assertThat(time.toString(), endsWith(".999")); - assertThat(time.getHours(), is(hour)); - assertThat(time.getMinutes(), is(minute)); - assertThat(time.getSeconds(), is(second)); - } - - @Test - public void testPrintingMillisOneLeadingZeroes() { - // test case where one leading zero needs to be added - LocalTime dateTime = - LocalTime.of(hour, minute, second, (int) TimeUnit.MILLISECONDS.toNanos(99)); - ArrowFlightJdbcTime time = new ArrowFlightJdbcTime(dateTime); - assertThat(time.toString(), endsWith(".099")); - assertThat(time.getHours(), is(hour)); - assertThat(time.getMinutes(), is(minute)); - assertThat(time.getSeconds(), is(second)); - } - - @Test - public void testPrintingMillisTwoLeadingZeroes() { - // test case where two leading zeroes needs to be added - LocalTime dateTime = LocalTime.of(hour, minute, second, (int) TimeUnit.MILLISECONDS.toNanos(1)); - ArrowFlightJdbcTime time = new ArrowFlightJdbcTime(dateTime); - assertThat(time.toString(), endsWith(".001")); - assertThat(time.getHours(), is(hour)); - assertThat(time.getMinutes(), is(minute)); - assertThat(time.getSeconds(), is(second)); - } - - @Test - public void testEquality() { - // tests #equals and #hashCode for coverage checks - LocalTime dateTime = LocalTime.of(hour, minute, second, (int) TimeUnit.MILLISECONDS.toNanos(1)); - ArrowFlightJdbcTime time1 = new ArrowFlightJdbcTime(dateTime); - ArrowFlightJdbcTime time2 = new ArrowFlightJdbcTime(dateTime); - assertThat(time1, is(time2)); - assertThat(time1.hashCode(), is(time2.hashCode())); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatementTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatementTest.java deleted file mode 100644 index 774ad0081e846..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightPreparedStatementTest.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertAll; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.charset.StandardCharsets; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.driver.jdbc.utils.CoreMockedSqlProducers; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.flight.sql.FlightSqlUtils; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightPreparedStatementTest { - - public static final MockFlightSqlProducer PRODUCER = CoreMockedSqlProducers.getLegacyProducer(); - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION = - FlightServerTestExtension.createStandardTestExtension(PRODUCER); - - private static Connection connection; - - @BeforeAll - public static void setup() throws SQLException { - connection = FLIGHT_SERVER_TEST_EXTENSION.getConnection(false); - } - - @AfterAll - public static void tearDown() throws SQLException { - connection.close(); - } - - @BeforeEach - public void before() { - PRODUCER.clearActionTypeCounter(); - } - - @Test - public void testSimpleQueryNoParameterBinding() throws SQLException { - final String query = CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD; - try (final PreparedStatement preparedStatement = connection.prepareStatement(query); - final ResultSet resultSet = preparedStatement.executeQuery()) { - CoreMockedSqlProducers.assertLegacyRegularSqlResultSet(resultSet); - } - } - - @Test - public void testQueryWithParameterBinding() throws SQLException { - final String query = "Fake query with parameters"; - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("", Types.MinorType.INT.getType()))); - final Schema parameterSchema = - new Schema( - Arrays.asList( - Field.nullable("", ArrowType.Utf8.INSTANCE), - new Field( - "", - FieldType.nullable(ArrowType.List.INSTANCE), - Collections.singletonList(Field.nullable("", Types.MinorType.INT.getType()))))); - final List> expected = - Collections.singletonList(Arrays.asList(new Text("foo"), new Integer[] {1, 2, null})); - - PRODUCER.addSelectQuery( - query, - schema, - Collections.singletonList( - listener -> { - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - ((IntVector) root.getVector(0)).setSafe(0, 10); - root.setRowCount(1); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - })); - - PRODUCER.addExpectedParameters(query, parameterSchema, expected); - - try (final PreparedStatement preparedStatement = connection.prepareStatement(query)) { - preparedStatement.setString(1, "foo"); - preparedStatement.setArray( - 2, connection.createArrayOf("INTEGER", new Integer[] {1, 2, null})); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - resultSet.next(); - assert true; - } - } - } - - @Test - @Disabled("https://github.com/apache/arrow/issues/34741: flaky test") - public void testPreparedStatementExecutionOnce() throws SQLException { - final PreparedStatement statement = - connection.prepareStatement(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD); - // Expect that there is one entry in the map -- {prepared statement action type, invocation - // count}. - assertEquals(PRODUCER.getActionTypeCounter().size(), 1); - // Expect that the prepared statement was executed exactly once. - assertEquals( - PRODUCER - .getActionTypeCounter() - .get(FlightSqlUtils.FLIGHT_SQL_CREATE_PREPARED_STATEMENT.getType()), - 1); - statement.close(); - } - - @Test - public void testReturnColumnCount() throws SQLException { - final String query = CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD; - try (final PreparedStatement psmt = connection.prepareStatement(query)) { - assertAll( - "Column count is as expected", - () -> assertThat("ID", equalTo(psmt.getMetaData().getColumnName(1))), - () -> assertThat("Name", equalTo(psmt.getMetaData().getColumnName(2))), - () -> assertThat("Age", equalTo(psmt.getMetaData().getColumnName(3))), - () -> assertThat("Salary", equalTo(psmt.getMetaData().getColumnName(4))), - () -> assertThat("Hire Date", equalTo(psmt.getMetaData().getColumnName(5))), - () -> assertThat("Last Sale", equalTo(psmt.getMetaData().getColumnName(6))), - () -> assertThat(6, equalTo(psmt.getMetaData().getColumnCount()))); - } - } - - @Test - public void testUpdateQuery() throws SQLException { - String query = "Fake update"; - PRODUCER.addUpdateQuery(query, /*updatedRows*/ 42); - try (final PreparedStatement stmt = connection.prepareStatement(query)) { - int updated = stmt.executeUpdate(); - assertEquals(42, updated); - } - } - - @Test - public void testUpdateQueryWithParameters() throws SQLException { - String query = "Fake update with parameters"; - PRODUCER.addUpdateQuery(query, /*updatedRows*/ 42); - PRODUCER.addExpectedParameters( - query, - new Schema(Collections.singletonList(Field.nullable("", ArrowType.Utf8.INSTANCE))), - Collections.singletonList( - Collections.singletonList(new Text("foo".getBytes(StandardCharsets.UTF_8))))); - try (final PreparedStatement stmt = connection.prepareStatement(query)) { - // TODO: make sure this is validated on the server too - stmt.setString(1, "foo"); - int updated = stmt.executeUpdate(); - assertEquals(42, updated); - } - } - - @Test - public void testUpdateQueryWithBatchedParameters() throws SQLException { - String query = "Fake update with batched parameters"; - Schema parameterSchema = - new Schema( - Arrays.asList( - Field.nullable("", ArrowType.Utf8.INSTANCE), - new Field( - "", - FieldType.nullable(ArrowType.List.INSTANCE), - Collections.singletonList(Field.nullable("", Types.MinorType.INT.getType()))))); - List> expected = - Arrays.asList( - Arrays.asList(new Text("foo"), new Integer[] {1, 2, null}), - Arrays.asList(new Text("bar"), new Integer[] {0, -1, 100000})); - - PRODUCER.addUpdateQuery(query, /*updatedRows*/ 42); - PRODUCER.addExpectedParameters(query, parameterSchema, expected); - - try (final PreparedStatement stmt = connection.prepareStatement(query)) { - // TODO: make sure this is validated on the server too - stmt.setString(1, "foo"); - stmt.setArray(2, connection.createArrayOf("INTEGER", new Integer[] {1, 2, null})); - stmt.addBatch(); - stmt.setString(1, "bar"); - stmt.setArray(2, connection.createArrayOf("INTEGER", new Integer[] {0, -1, 100000})); - stmt.addBatch(); - int[] updated = stmt.executeBatch(); - assertEquals(42, updated[0]); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteTest.java deleted file mode 100644 index 632cb0ba5619b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteTest.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.hamcrest.CoreMatchers.allOf; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.not; -import static org.hamcrest.CoreMatchers.nullValue; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Collections; -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.calcite.avatica.AvaticaUtils; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Tests for {@link ArrowFlightStatement#execute}. */ -public class ArrowFlightStatementExecuteTest { - private static final String SAMPLE_QUERY_CMD = "SELECT * FROM this_test"; - private static final int SAMPLE_QUERY_ROWS = Byte.MAX_VALUE; - private static final String VECTOR_NAME = "Unsigned Byte"; - private static final Schema SAMPLE_QUERY_SCHEMA = - new Schema(Collections.singletonList(Field.nullable(VECTOR_NAME, MinorType.UINT1.getType()))); - private static final String SAMPLE_UPDATE_QUERY = - "UPDATE this_table SET this_field = that_field FROM this_test WHERE this_condition"; - private static final long SAMPLE_UPDATE_COUNT = 100L; - private static final String SAMPLE_LARGE_UPDATE_QUERY = - "UPDATE this_large_table SET this_large_field = that_large_field FROM this_large_test WHERE this_large_condition"; - private static final long SAMPLE_LARGE_UPDATE_COUNT = Long.MAX_VALUE; - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION = - FlightServerTestExtension.createStandardTestExtension(PRODUCER); - - private Connection connection; - private Statement statement; - - @BeforeAll - public static void setUpBeforeClass() { - PRODUCER.addSelectQuery( - SAMPLE_QUERY_CMD, - SAMPLE_QUERY_SCHEMA, - Collections.singletonList( - listener -> { - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final VectorSchemaRoot root = - VectorSchemaRoot.create(SAMPLE_QUERY_SCHEMA, allocator)) { - final UInt1Vector vector = (UInt1Vector) root.getVector(VECTOR_NAME); - IntStream.range(0, SAMPLE_QUERY_ROWS) - .forEach(index -> vector.setSafe(index, index)); - vector.setValueCount(SAMPLE_QUERY_ROWS); - root.setRowCount(SAMPLE_QUERY_ROWS); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - })); - PRODUCER.addUpdateQuery(SAMPLE_UPDATE_QUERY, SAMPLE_UPDATE_COUNT); - PRODUCER.addUpdateQuery(SAMPLE_LARGE_UPDATE_QUERY, SAMPLE_LARGE_UPDATE_COUNT); - } - - @BeforeEach - public void setUp() throws SQLException { - connection = FLIGHT_SERVER_TEST_EXTENSION.getConnection(false); - statement = connection.createStatement(); - } - - @AfterEach - public void tearDown() throws Exception { - AutoCloseables.close(statement, connection); - } - - @AfterAll - public static void tearDownAfterClass() throws Exception { - AutoCloseables.close(PRODUCER); - } - - @Test - public void testExecuteShouldRunSelectQuery() throws SQLException { - assertThat(statement.execute(SAMPLE_QUERY_CMD), is(true)); // Means this is a SELECT query. - final Set numbers = - IntStream.range(0, SAMPLE_QUERY_ROWS) - .boxed() - .map(Integer::byteValue) - .collect(Collectors.toCollection(HashSet::new)); - try (final ResultSet resultSet = statement.getResultSet()) { - final int columnCount = resultSet.getMetaData().getColumnCount(); - assertThat(columnCount, is(1)); - int rowCount = 0; - for (; resultSet.next(); rowCount++) { - assertThat(numbers.remove(resultSet.getByte(1)), is(true)); - } - assertThat(rowCount, is(equalTo(SAMPLE_QUERY_ROWS))); - } - assertThat(numbers, is(Collections.emptySet())); - assertThat( - (long) statement.getUpdateCount(), - is(allOf(equalTo(statement.getLargeUpdateCount()), equalTo(-1L)))); - } - - @Test - public void testExecuteShouldRunUpdateQueryForSmallUpdate() throws SQLException { - assertThat(statement.execute(SAMPLE_UPDATE_QUERY), is(false)); // Means this is an UPDATE query. - assertThat( - (long) statement.getUpdateCount(), - is(allOf(equalTo(statement.getLargeUpdateCount()), equalTo(SAMPLE_UPDATE_COUNT)))); - assertThat(statement.getResultSet(), is(nullValue())); - } - - @Test - public void testExecuteShouldRunUpdateQueryForLargeUpdate() throws SQLException { - assertThat(statement.execute(SAMPLE_LARGE_UPDATE_QUERY), is(false)); // UPDATE query. - final long updateCountSmall = statement.getUpdateCount(); - final long updateCountLarge = statement.getLargeUpdateCount(); - assertThat(updateCountLarge, is(equalTo(SAMPLE_LARGE_UPDATE_COUNT))); - assertThat( - updateCountSmall, - is( - allOf( - equalTo((long) AvaticaUtils.toSaturatedInt(updateCountLarge)), - not(equalTo(updateCountLarge))))); - assertThat(statement.getResultSet(), is(nullValue())); - } - - @Test - public void testUpdateCountShouldStartOnZero() throws SQLException { - assertThat( - (long) statement.getUpdateCount(), - is(allOf(equalTo(statement.getLargeUpdateCount()), equalTo(0L)))); - assertThat(statement.getResultSet(), is(nullValue())); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteUpdateTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteUpdateTest.java deleted file mode 100644 index f7c31c590c556..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightStatementExecuteUpdateTest.java +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static java.lang.String.format; -import static org.hamcrest.CoreMatchers.allOf; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.not; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.sql.Connection; -import java.sql.SQLException; -import java.sql.SQLFeatureNotSupportedException; -import java.sql.Statement; -import java.util.Collections; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.calcite.avatica.AvaticaUtils; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Tests for {@link ArrowFlightStatement#executeUpdate}. */ -public class ArrowFlightStatementExecuteUpdateTest { - private static final String UPDATE_SAMPLE_QUERY = - "UPDATE sample_table SET sample_col = sample_val WHERE sample_condition"; - private static final int UPDATE_SAMPLE_QUERY_AFFECTED_COLS = 10; - private static final String LARGE_UPDATE_SAMPLE_QUERY = - "UPDATE large_sample_table SET large_sample_col = large_sample_val WHERE large_sample_condition"; - private static final long LARGE_UPDATE_SAMPLE_QUERY_AFFECTED_COLS = (long) Integer.MAX_VALUE + 1; - private static final String REGULAR_QUERY_SAMPLE = "SELECT * FROM NOT_UPDATE_QUERY"; - private static final Schema REGULAR_QUERY_SCHEMA = - new Schema( - Collections.singletonList(Field.nullable("placeholder", MinorType.VARCHAR.getType()))); - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION = - FlightServerTestExtension.createStandardTestExtension(PRODUCER); - - public Connection connection; - public Statement statement; - - @BeforeAll - public static void setUpBeforeClass() { - PRODUCER.addUpdateQuery(UPDATE_SAMPLE_QUERY, UPDATE_SAMPLE_QUERY_AFFECTED_COLS); - PRODUCER.addUpdateQuery(LARGE_UPDATE_SAMPLE_QUERY, LARGE_UPDATE_SAMPLE_QUERY_AFFECTED_COLS); - PRODUCER.addSelectQuery( - REGULAR_QUERY_SAMPLE, - REGULAR_QUERY_SCHEMA, - Collections.singletonList( - listener -> { - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final VectorSchemaRoot root = - VectorSchemaRoot.create(REGULAR_QUERY_SCHEMA, allocator)) { - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - })); - } - - @BeforeEach - public void setUp() throws SQLException { - connection = FLIGHT_SERVER_TEST_EXTENSION.getConnection(false); - statement = connection.createStatement(); - } - - @AfterEach - public void tearDown() throws Exception { - AutoCloseables.close(statement, connection); - } - - @AfterAll - public static void tearDownAfterClass() throws Exception { - AutoCloseables.close(PRODUCER); - } - - @Test - public void testExecuteUpdateShouldReturnNumColsAffectedForNumRowsFittingInt() - throws SQLException { - assertThat(statement.executeUpdate(UPDATE_SAMPLE_QUERY), is(UPDATE_SAMPLE_QUERY_AFFECTED_COLS)); - } - - @Test - public void testExecuteUpdateShouldReturnSaturatedNumColsAffectedIfDoesNotFitInInt() - throws SQLException { - final long result = statement.executeUpdate(LARGE_UPDATE_SAMPLE_QUERY); - final long expectedRowCountRaw = LARGE_UPDATE_SAMPLE_QUERY_AFFECTED_COLS; - assertThat( - result, - is( - allOf( - not(equalTo(expectedRowCountRaw)), - equalTo( - (long) - AvaticaUtils.toSaturatedInt( - expectedRowCountRaw))))); // Because of long-to-integer overflow. - } - - @Test - public void testExecuteLargeUpdateShouldReturnNumColsAffected() throws SQLException { - assertThat( - statement.executeLargeUpdate(LARGE_UPDATE_SAMPLE_QUERY), - is(LARGE_UPDATE_SAMPLE_QUERY_AFFECTED_COLS)); - } - - @Test - // TODO Implement `Statement#executeUpdate(String, int)` - public void testExecuteUpdateUnsupportedWithDriverFlag() throws SQLException { - assertThrows( - SQLFeatureNotSupportedException.class, - () -> { - assertThat( - statement.executeUpdate(UPDATE_SAMPLE_QUERY, Statement.NO_GENERATED_KEYS), - is(UPDATE_SAMPLE_QUERY_AFFECTED_COLS)); - }); - } - - @Test - // TODO Implement `Statement#executeUpdate(String, int[])` - public void testExecuteUpdateUnsupportedWithArrayOfInts() throws SQLException { - assertThrows( - SQLFeatureNotSupportedException.class, - () -> { - assertThat( - statement.executeUpdate(UPDATE_SAMPLE_QUERY, new int[0]), - is(UPDATE_SAMPLE_QUERY_AFFECTED_COLS)); - }); - } - - @Test - // TODO Implement `Statement#executeUpdate(String, String[])` - public void testExecuteUpdateUnsupportedWithArraysOfStrings() throws SQLException { - assertThrows( - SQLFeatureNotSupportedException.class, - () -> { - assertThat( - statement.executeUpdate(UPDATE_SAMPLE_QUERY, new String[0]), - is(UPDATE_SAMPLE_QUERY_AFFECTED_COLS)); - }); - } - - @Test - public void testExecuteShouldExecuteUpdateQueryAutomatically() throws SQLException { - assertThat( - statement.execute(UPDATE_SAMPLE_QUERY), is(false)); // Meaning there was an update query. - assertThat( - statement.execute(REGULAR_QUERY_SAMPLE), is(true)); // Meaning there was a select query. - } - - @Test - public void testShouldFailToPrepareStatementForNullQuery() { - int count = 0; - try { - assertThat(statement.execute(null), is(false)); - } catch (final SQLException e) { - count++; - assertThat(e.getCause(), is(instanceOf(NullPointerException.class))); - } - assertThat(count, is(1)); - } - - @Test - public void testShouldFailToPrepareStatementForClosedStatement() throws SQLException { - statement.close(); - assertThat(statement.isClosed(), is(true)); - int count = 0; - try { - statement.execute(UPDATE_SAMPLE_QUERY); - } catch (final SQLException e) { - count++; - assertThat(e.getMessage(), is("Statement closed")); - } - assertThat(count, is(1)); - } - - @Test - public void testShouldFailToPrepareStatementForBadStatement() { - final String badQuery = "BAD INVALID STATEMENT"; - int count = 0; - try { - statement.execute(badQuery); - } catch (final SQLException e) { - count++; - /* - * The error message is up to whatever implementation of `FlightSqlProducer` - * the driver is communicating with. However, for the purpose of this test, - * we simply throw an `IllegalArgumentException` for queries not registered - * in our `MockFlightSqlProducer`. - */ - assertThat( - e.getMessage(), - is(format("Error while executing SQL \"%s\": Query not found", badQuery))); - } - assertThat(count, is(1)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java deleted file mode 100644 index cc95115b9330b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java +++ /dev/null @@ -1,442 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.io.File; -import java.net.URLEncoder; -import java.sql.Connection; -import java.sql.Driver; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.apache.arrow.driver.jdbc.utils.FlightSqlTestCertificates; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Tests encrypted connections. */ -public class ConnectionMutualTlsTest { - - @RegisterExtension public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION; - private static final String tlsRootCertsPath; - private static final String clientMTlsCertPath; - private static final String badClientMTlsCertPath; - private static final String clientMTlsKeyPath; - private static final String badClientMTlsKeyPath; - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - private static final String userTest = "user1"; - private static final String passTest = "pass1"; - - static { - final FlightSqlTestCertificates.CertKeyPair certKey = - FlightSqlTestCertificates.exampleTlsCerts().get(0); - - tlsRootCertsPath = certKey.cert.getPath(); - - final File serverMTlsCACert = FlightSqlTestCertificates.exampleCACert(); - - final FlightSqlTestCertificates.CertKeyPair clientMTlsCertKey = - FlightSqlTestCertificates.exampleTlsCerts().get(1); - - clientMTlsCertPath = clientMTlsCertKey.cert.getPath(); - clientMTlsKeyPath = clientMTlsCertKey.key.getPath(); - - badClientMTlsCertPath = clientMTlsCertPath + ".bad"; - badClientMTlsKeyPath = clientMTlsKeyPath + ".bad"; - - UserPasswordAuthentication authentication = - new UserPasswordAuthentication.Builder().user(userTest, passTest).build(); - - FLIGHT_SERVER_TEST_EXTENSION = - new FlightServerTestExtension.Builder() - .authentication(authentication) - .useEncryption(certKey.cert, certKey.key) - .useMTlsClientVerification(serverMTlsCACert) - .producer(PRODUCER) - .build(); - } - - private BufferAllocator allocator; - - @BeforeEach - public void setUp() throws Exception { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() throws Exception { - allocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(allocator); - } - - /** - * Try to instantiate an encrypted FlightClient. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedClientAuthenticated() throws Exception { - - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withUsername(userTest) - .withPassword(passTest) - .withTlsRootCertificates(tlsRootCertsPath) - .withClientCertificate(clientMTlsCertPath) - .withClientKey(clientMTlsKeyPath) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - assertNotNull(client); - } - } - - /** - * Try to instantiate an encrypted FlightClient providing a bad mTLS Cert Path. It's expected to - * receive the SQLException. - */ - @Test - public void testGetEncryptedClientWithBadMTlsCertPath() { - - assertThrows( - SQLException.class, - () -> { - try (ArrowFlightSqlClientHandler handler = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withUsername(userTest) - .withPassword(passTest) - .withTlsRootCertificates(tlsRootCertsPath) - .withClientCertificate(badClientMTlsCertPath) - .withClientKey(clientMTlsKeyPath) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - fail(); - } - }); - } - - /** - * Try to instantiate an encrypted FlightClient providing a bad mTLS Key Path. It's expected to - * receive the SQLException. - */ - @Test - public void testGetEncryptedClientWithBadMTlsKeyPath() { - - assertThrows( - SQLException.class, - () -> { - try (ArrowFlightSqlClientHandler handler = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withUsername(userTest) - .withPassword(passTest) - .withTlsRootCertificates(tlsRootCertsPath) - .withClientCertificate(clientMTlsCertPath) - .withClientKey(badClientMTlsKeyPath) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - fail(); - } - }); - } - - /** - * Try to instantiate an encrypted FlightClient without credentials. - * - * @throws Exception on error. - */ - @Test - public void testGetNonAuthenticatedEncryptedClientNoAuth() throws Exception { - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withTlsRootCertificates(tlsRootCertsPath) - .withClientCertificate(clientMTlsCertPath) - .withClientKey(clientMTlsKeyPath) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - assertNotNull(client); - } - } - - /** - * Check if an encrypted connection can be established successfully when the provided valid - * credentials and a valid TLS Root Certs path. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedConnectionWithValidCredentialsAndTlsRootsPath() throws Exception { - final Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.HOST.camelName(), "localhost"); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - properties.put( - ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.camelName(), clientMTlsCertPath); - properties.put(ArrowFlightConnectionProperty.CLIENT_KEY.camelName(), clientMTlsKeyPath); - - final ArrowFlightJdbcDataSource dataSource = - ArrowFlightJdbcDataSource.createNewDataSource(properties); - try (final Connection connection = dataSource.getConnection()) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Check if an encrypted connection can be established successfully when not providing - * authentication. - * - * @throws Exception on error. - */ - @Test - public void testGetNonAuthenticatedEncryptedConnection() throws Exception { - final Properties properties = new Properties(); - - properties.put( - ArrowFlightConnectionProperty.HOST.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getHost()); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), true); - properties.put(ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - properties.put( - ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.camelName(), clientMTlsCertPath); - properties.put(ArrowFlightConnectionProperty.CLIENT_KEY.camelName(), clientMTlsKeyPath); - - final ArrowFlightJdbcDataSource dataSource = - ArrowFlightJdbcDataSource.createNewDataSource(properties); - try (final Connection connection = dataSource.getConnection()) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using just a connection url. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueCorrectCastUrlWithDriverManager() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - final String jdbcUrl = - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s" - + "&useEncryption=true&%s=%s&%s=%s&%s=%s", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), - userTest, - passTest, - ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), - URLEncoder.encode(tlsRootCertsPath, "UTF-8"), - ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.camelName(), - URLEncoder.encode(clientMTlsCertPath, "UTF-8"), - ArrowFlightConnectionProperty.CLIENT_KEY.camelName(), - URLEncoder.encode(clientMTlsKeyPath, "UTF-8")); - - try (Connection connection = DriverManager.getConnection(jdbcUrl)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with String K-V pairs. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty( - ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - properties.setProperty(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), "true"); - properties.setProperty( - ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.camelName(), clientMTlsCertPath); - properties.setProperty(ArrowFlightConnectionProperty.CLIENT_KEY.camelName(), clientMTlsKeyPath); - - final String jdbcUrl = - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()); - - try (Connection connection = DriverManager.getConnection(jdbcUrl, properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with Object K-V pairs. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), true); - properties.put(ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - properties.put( - ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.camelName(), clientMTlsCertPath); - properties.put(ArrowFlightConnectionProperty.CLIENT_KEY.camelName(), clientMTlsKeyPath); - - final String jdbcUrl = - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()); - - try (Connection connection = DriverManager.getConnection(jdbcUrl, properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using just a connection url and using 0 and 1 as ssl values. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueIntegerCorrectCastUrlWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - final String jdbcUrl = - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s" - + "&useEncryption=1&useSystemTrustStore=0&%s=%s&%s=%s&%s=%s", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), - userTest, - passTest, - ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), - URLEncoder.encode(tlsRootCertsPath, "UTF-8"), - ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.camelName(), - URLEncoder.encode(clientMTlsCertPath, "UTF-8"), - ArrowFlightConnectionProperty.CLIENT_KEY.camelName(), - URLEncoder.encode(clientMTlsKeyPath, "UTF-8")); - - try (Connection connection = DriverManager.getConnection(jdbcUrl)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with String K-V pairs and using 0 and 1 as - * ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueIntegerCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty( - ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - properties.setProperty(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), "1"); - properties.setProperty( - ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.camelName(), clientMTlsCertPath); - properties.setProperty(ArrowFlightConnectionProperty.CLIENT_KEY.camelName(), clientMTlsKeyPath); - - final String jdbcUrl = - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()); - - try (Connection connection = DriverManager.getConnection(jdbcUrl, properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with Object K-V pairs and using 0 and 1 as - * ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueIntegerCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), 1); - properties.put(ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - properties.put( - ArrowFlightConnectionProperty.CLIENT_CERTIFICATE.camelName(), clientMTlsCertPath); - properties.put(ArrowFlightConnectionProperty.CLIENT_KEY.camelName(), clientMTlsKeyPath); - - final String jdbcUrl = - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()); - - try (Connection connection = DriverManager.getConnection(jdbcUrl, properties)) { - assertTrue(connection.isValid(0)); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java deleted file mode 100644 index 8e872a11671c5..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTest.java +++ /dev/null @@ -1,579 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.net.URISyntaxException; -import java.sql.Connection; -import java.sql.Driver; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Tests for {@link Connection}. */ -public class ConnectionTest { - - @RegisterExtension public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION; - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - private static final String userTest = "user1"; - private static final String passTest = "pass1"; - - static { - UserPasswordAuthentication authentication = - new UserPasswordAuthentication.Builder().user(userTest, passTest).build(); - - FLIGHT_SERVER_TEST_EXTENSION = - new FlightServerTestExtension.Builder() - .authentication(authentication) - .producer(PRODUCER) - .build(); - } - - private BufferAllocator allocator; - - @BeforeEach - public void setUp() throws Exception { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() throws Exception { - allocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(allocator); - } - - /** - * Checks if an unencrypted connection can be established successfully when the provided valid - * credentials. - * - * @throws SQLException on error. - */ - @Test - public void testUnencryptedConnectionShouldOpenSuccessfullyWhenProvidedValidCredentials() - throws Exception { - final Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.HOST.camelName(), "localhost"); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put("useEncryption", false); - - try (Connection connection = - DriverManager.getConnection( - "jdbc:arrow-flight-sql://" - + FLIGHT_SERVER_TEST_EXTENSION.getHost() - + ":" - + FLIGHT_SERVER_TEST_EXTENSION.getPort(), - properties)) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Checks if a token is provided it takes precedence over username/pass. In this case, the - * connection should fail if a token is passed in. - */ - @Test - public void testTokenOverridesUsernameAndPasswordAuth() { - final Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.HOST.camelName(), "localhost"); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.TOKEN.camelName(), "token"); - properties.put("useEncryption", false); - - SQLException e = - assertThrows( - SQLException.class, - () -> { - try (Connection conn = - DriverManager.getConnection( - "jdbc:arrow-flight-sql://" - + FLIGHT_SERVER_TEST_EXTENSION.getHost() - + ":" - + FLIGHT_SERVER_TEST_EXTENSION.getPort(), - properties)) { - fail(); - } - }); - assertTrue(e.getMessage().contains("UNAUTHENTICATED")); - } - - /** - * Checks if the exception SQLException is thrown when trying to establish a connection without a - * host. - * - * @throws SQLException on error. - */ - @Test - public void testUnencryptedConnectionWithEmptyHost() throws Exception { - final Properties properties = new Properties(); - - properties.put("user", userTest); - properties.put("password", passTest); - final String invalidUrl = "jdbc:arrow-flight-sql://"; - - assertThrows( - SQLException.class, - () -> { - try (Connection conn = DriverManager.getConnection(invalidUrl, properties)) { - fail("Expected SQLException."); - } - }); - } - - /** - * Try to instantiate a basic FlightClient. - * - * @throws URISyntaxException on error. - */ - @Test - public void testGetBasicClientAuthenticatedShouldOpenConnection() throws Exception { - - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withEncryption(false) - .withUsername(userTest) - .withPassword(passTest) - .withBufferAllocator(allocator) - .build()) { - - assertNotNull(client); - } - } - - /** - * Checks if the exception IllegalArgumentException is thrown when trying to establish an - * unencrypted connection providing with an invalid port. - * - * @throws SQLException on error. - */ - @Test - public void testUnencryptedConnectionProvidingInvalidPort() throws Exception { - final Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.HOST.camelName(), "localhost"); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), false); - final String invalidUrl = - "jdbc:arrow-flight-sql://" + FLIGHT_SERVER_TEST_EXTENSION.getHost() + ":" + 65537; - - assertThrows( - SQLException.class, - () -> { - try (Connection conn = DriverManager.getConnection(invalidUrl, properties)) { - fail("Expected SQLException"); - } - }); - } - - /** - * Try to instantiate a basic FlightClient. - * - * @throws URISyntaxException on error. - */ - @Test - public void testGetBasicClientNoAuthShouldOpenConnection() throws Exception { - - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withBufferAllocator(allocator) - .withEncryption(false) - .build()) { - assertNotNull(client); - } - } - - /** - * Checks if an unencrypted connection can be established successfully when not providing - * credentials. - * - * @throws SQLException on error. - */ - @Test - public void testUnencryptedConnectionShouldOpenSuccessfullyWithoutAuthentication() - throws Exception { - final Properties properties = new Properties(); - properties.put(ArrowFlightConnectionProperty.HOST.camelName(), "localhost"); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), false); - try (Connection connection = - DriverManager.getConnection("jdbc:arrow-flight-sql://localhost:32010", properties)) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Check if an unencrypted connection throws an exception when provided with invalid credentials. - * - * @throws SQLException The exception expected to be thrown. - */ - @Test - public void testUnencryptedConnectionShouldThrowExceptionWhenProvidedWithInvalidCredentials() - throws Exception { - - final Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.HOST.camelName(), "localhost"); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), "invalidUser"); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), false); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), "invalidPassword"); - - assertThrows( - SQLException.class, - () -> { - try (Connection ignored = - DriverManager.getConnection("jdbc:arrow-flight-sql://localhost:32010", properties)) { - fail(); - } - }); - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using just a connection url. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyFalseCorrectCastUrlWithDriverManager() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s&useEncryption=false", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), userTest, passTest))) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using a connection url and properties with String K-V pairs. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyFalseCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), "false"); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using a connection url and properties with Object K-V pairs. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyFalseCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), false); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using just a connection url and using 0 and 1 as ssl values. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyFalseIntegerCorrectCastUrlWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s&useEncryption=0", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), userTest, passTest))) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using a connection url and properties with String K-V pairs and using 0 and 1 - * as ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyFalseIntegerCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), "0"); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using a connection url and properties with Object K-V pairs and using 0 and 1 - * as ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyFalseIntegerCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), 0); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using just a connection url. - * - * @throws Exception on error. - */ - @Test - public void testThreadPoolSizeConnectionPropertyCorrectCastUrlWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s&threadPoolSize=1&useEncryption=%s", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), userTest, passTest, false))) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using a connection url and properties with String K-V pairs and using 0 and 1 - * as ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testThreadPoolSizeConnectionPropertyCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty(ArrowFlightConnectionProperty.THREAD_POOL_SIZE.camelName(), "1"); - properties.put("useEncryption", false); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using a connection url and properties with Object K-V pairs and using 0 and 1 - * as ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testThreadPoolSizeConnectionPropertyCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.THREAD_POOL_SIZE.camelName(), 1); - properties.put("useEncryption", false); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using just a connection url. - * - * @throws Exception on error. - */ - @Test - public void testPasswordConnectionPropertyIntegerCorrectCastUrlWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s&useEncryption=%s", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), userTest, passTest, false))) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using a connection url and properties with String K-V pairs and using 0 and 1 - * as ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testPasswordConnectionPropertyIntegerCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put("useEncryption", false); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an non-encrypted connection can be established successfully when connecting through - * the DriverManager using a connection url and properties with Object K-V pairs and using 0 and 1 - * as ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testPasswordConnectionPropertyIntegerCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put("useEncryption", false); - - try (Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsRootCertsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsRootCertsTest.java deleted file mode 100644 index f46ab1fa1c373..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsRootCertsTest.java +++ /dev/null @@ -1,363 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.net.URLEncoder; -import java.sql.Connection; -import java.sql.Driver; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.apache.arrow.driver.jdbc.utils.FlightSqlTestCertificates; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Tests encrypted connections. */ -public class ConnectionTlsRootCertsTest { - - @RegisterExtension public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION; - private static final String tlsRootCertsPath; - private static final String badTlsRootCertsPath; - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - private static final String userTest = "user1"; - private static final String passTest = "pass1"; - - static { - final FlightSqlTestCertificates.CertKeyPair certKey = - FlightSqlTestCertificates.exampleTlsCerts().get(0); - - tlsRootCertsPath = certKey.cert.getPath(); - - badTlsRootCertsPath = certKey.cert.getPath() + ".bad"; - - UserPasswordAuthentication authentication = - new UserPasswordAuthentication.Builder().user(userTest, passTest).build(); - - FLIGHT_SERVER_TEST_EXTENSION = - new FlightServerTestExtension.Builder() - .authentication(authentication) - .useEncryption(certKey.cert, certKey.key) - .producer(PRODUCER) - .build(); - } - - private BufferAllocator allocator; - - @BeforeEach - public void setUp() throws Exception { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() throws Exception { - allocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(allocator); - } - - /** - * Try to instantiate an encrypted FlightClient. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedClientAuthenticated() throws Exception { - - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withUsername(userTest) - .withPassword(passTest) - .withTlsRootCertificates(tlsRootCertsPath) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - assertNotNull(client); - } - } - - /** - * Try to instantiate an encrypted FlightClient providing a bad TLS Root Certs Path. It's expected - * to receive the SQLException. - */ - @Test - public void testGetEncryptedClientWithNoCertificateOnKeyStore() { - assertThrows( - SQLException.class, - () -> { - try (ArrowFlightSqlClientHandler handler = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withTlsRootCertificates(badTlsRootCertsPath) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - fail(); - } - }); - } - - /** - * Try to instantiate an encrypted FlightClient without credentials. - * - * @throws Exception on error. - */ - @Test - public void testGetNonAuthenticatedEncryptedClientNoAuth() throws Exception { - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withTlsRootCertificates(tlsRootCertsPath) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - assertNotNull(client); - } - } - - /** - * Check if an encrypted connection can be established successfully when the provided valid - * credentials and a valid TLS Root Certs path. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedConnectionWithValidCredentialsAndTlsRootsPath() throws Exception { - final Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.HOST.camelName(), "localhost"); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - - final ArrowFlightJdbcDataSource dataSource = - ArrowFlightJdbcDataSource.createNewDataSource(properties); - try (final Connection connection = dataSource.getConnection()) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Check if an encrypted connection can be established successfully when not providing - * authentication. - * - * @throws Exception on error. - */ - @Test - public void testGetNonAuthenticatedEncryptedConnection() throws Exception { - final Properties properties = new Properties(); - - properties.put( - ArrowFlightConnectionProperty.HOST.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getHost()); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), true); - properties.put(ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - - final ArrowFlightJdbcDataSource dataSource = - ArrowFlightJdbcDataSource.createNewDataSource(properties); - try (final Connection connection = dataSource.getConnection()) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using just a connection url. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueCorrectCastUrlWithDriverManager() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s" - + "&useEncryption=true&%s=%s", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), - userTest, - passTest, - ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), - URLEncoder.encode(tlsRootCertsPath, "UTF-8")))) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with String K-V pairs. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty( - ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - properties.setProperty(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), "true"); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with Object K-V pairs. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), true); - properties.put(ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using just a connection url and using 0 and 1 as ssl values. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueIntegerCorrectCastUrlWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s" - + "&useEncryption=1&useSystemTrustStore=0&%s=%s", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), - userTest, - passTest, - ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), - URLEncoder.encode(tlsRootCertsPath, "UTF-8")))) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with String K-V pairs and using 0 and 1 as - * ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueIntegerCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty( - ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - properties.setProperty(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), "1"); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with Object K-V pairs and using 0 and 1 as - * ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueIntegerCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), 1); - properties.put(ArrowFlightConnectionProperty.TLS_ROOT_CERTS.camelName(), tlsRootCertsPath); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java deleted file mode 100644 index 387436afe99d2..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.net.URLEncoder; -import java.nio.file.Paths; -import java.sql.Connection; -import java.sql.Driver; -import java.sql.DriverManager; -import java.sql.SQLException; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; -import org.apache.arrow.driver.jdbc.client.ArrowFlightSqlClientHandler; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.apache.arrow.driver.jdbc.utils.FlightSqlTestCertificates; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Tests encrypted connections. */ -public class ConnectionTlsTest { - - @RegisterExtension public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION; - private static final MockFlightSqlProducer PRODUCER = new MockFlightSqlProducer(); - private static final String userTest = "user1"; - private static final String passTest = "pass1"; - - static { - final FlightSqlTestCertificates.CertKeyPair certKey = - FlightSqlTestCertificates.exampleTlsCerts().get(0); - - UserPasswordAuthentication authentication = - new UserPasswordAuthentication.Builder().user(userTest, passTest).build(); - - FLIGHT_SERVER_TEST_EXTENSION = - new FlightServerTestExtension.Builder() - .authentication(authentication) - .useEncryption(certKey.cert, certKey.key) - .producer(PRODUCER) - .build(); - } - - private String trustStorePath; - private String noCertificateKeyStorePath; - private final String trustStorePass = "flight"; - private BufferAllocator allocator; - - @BeforeEach - public void setUp() throws Exception { - trustStorePath = - Paths.get(Preconditions.checkNotNull(getClass().getResource("/keys/keyStore.jks")).toURI()) - .toString(); - noCertificateKeyStorePath = - Paths.get( - Preconditions.checkNotNull(getClass().getResource("/keys/noCertificate.jks")) - .toURI()) - .toString(); - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void tearDown() throws Exception { - allocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(allocator); - } - - /** - * Try to instantiate an encrypted FlightClient. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedClientAuthenticatedWithDisableCertVerification() throws Exception { - - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withUsername(userTest) - .withPassword(passTest) - .withDisableCertificateVerification(true) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - assertNotNull(client); - } - } - - /** - * Try to instantiate an encrypted FlightClient. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedClientAuthenticated() throws Exception { - - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withSystemTrustStore(false) - .withUsername(userTest) - .withPassword(passTest) - .withTrustStorePath(trustStorePath) - .withTrustStorePassword(trustStorePass) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - assertNotNull(client); - } - } - - /** - * Try to instantiate an encrypted FlightClient providing a keystore without certificate. It's - * expected to receive the SQLException. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedClientWithNoCertificateOnKeyStore() throws Exception { - final String noCertificateKeyStorePassword = "flight1"; - - assertThrows( - SQLException.class, - () -> { - try (ArrowFlightSqlClientHandler ignored = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withTrustStorePath(noCertificateKeyStorePath) - .withTrustStorePassword(noCertificateKeyStorePassword) - .withSystemTrustStore(false) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - fail(); - } - }); - } - - /** - * Try to instantiate an encrypted FlightClient without credentials. - * - * @throws Exception on error. - */ - @Test - public void testGetNonAuthenticatedEncryptedClientNoAuth() throws Exception { - try (ArrowFlightSqlClientHandler client = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withSystemTrustStore(false) - .withTrustStorePath(trustStorePath) - .withTrustStorePassword(trustStorePass) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - assertNotNull(client); - } - } - - /** - * Try to instantiate an encrypted FlightClient with an invalid password to the keystore file. - * It's expected to receive the SQLException. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedClientWithKeyStoreBadPasswordAndNoAuth() throws Exception { - String keyStoreBadPassword = "badPassword"; - - assertThrows( - SQLException.class, - () -> { - try (ArrowFlightSqlClientHandler ignored = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withSystemTrustStore(false) - .withTrustStorePath(trustStorePath) - .withTrustStorePassword(keyStoreBadPassword) - .withBufferAllocator(allocator) - .withEncryption(true) - .build()) { - fail(); - } - }); - } - - /** - * Check if an encrypted connection can be established successfully when the provided valid - * credentials and a valid Keystore. - * - * @throws Exception on error. - */ - @Test - public void testGetEncryptedConnectionWithValidCredentialsAndKeyStore() throws Exception { - final Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.HOST.camelName(), "localhost"); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE.camelName(), trustStorePath); - properties.put(ArrowFlightConnectionProperty.USE_SYSTEM_TRUST_STORE.camelName(), false); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), trustStorePass); - - final ArrowFlightJdbcDataSource dataSource = - ArrowFlightJdbcDataSource.createNewDataSource(properties); - try (final Connection connection = dataSource.getConnection()) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Check if the SQLException is thrown when trying to establish an encrypted connection providing - * valid credentials but invalid password to the Keystore. - * - * @throws SQLException on error. - */ - @Test - public void testGetAuthenticatedEncryptedConnectionWithKeyStoreBadPassword() throws Exception { - final Properties properties = new Properties(); - - properties.put( - ArrowFlightConnectionProperty.HOST.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getHost()); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), true); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE.camelName(), trustStorePath); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), "badpassword"); - - final ArrowFlightJdbcDataSource dataSource = - ArrowFlightJdbcDataSource.createNewDataSource(properties); - - assertThrows( - SQLException.class, - () -> { - try (final Connection ignored = dataSource.getConnection()) { - fail(); - } - }); - } - - /** - * Check if an encrypted connection can be established successfully when not providing - * authentication. - * - * @throws Exception on error. - */ - @Test - public void testGetNonAuthenticatedEncryptedConnection() throws Exception { - final Properties properties = new Properties(); - - properties.put( - ArrowFlightConnectionProperty.HOST.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getHost()); - properties.put( - ArrowFlightConnectionProperty.PORT.camelName(), FLIGHT_SERVER_TEST_EXTENSION.getPort()); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), true); - properties.put(ArrowFlightConnectionProperty.USE_SYSTEM_TRUST_STORE.camelName(), false); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE.camelName(), trustStorePath); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), trustStorePass); - - final ArrowFlightJdbcDataSource dataSource = - ArrowFlightJdbcDataSource.createNewDataSource(properties); - try (final Connection connection = dataSource.getConnection()) { - assertTrue(connection.isValid(300)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using just a connection url. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueCorrectCastUrlWithDriverManager() throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s" - + "&useEncryption=true&useSystemTrustStore=false&%s=%s&%s=%s", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), - userTest, - passTest, - ArrowFlightConnectionProperty.TRUST_STORE.camelName(), - URLEncoder.encode(trustStorePath, "UTF-8"), - ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), - URLEncoder.encode(trustStorePass, "UTF-8")))) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with String K-V pairs. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty(ArrowFlightConnectionProperty.TRUST_STORE.camelName(), trustStorePath); - properties.setProperty( - ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), trustStorePass); - properties.setProperty(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), "true"); - properties.setProperty( - ArrowFlightConnectionProperty.USE_SYSTEM_TRUST_STORE.camelName(), "false"); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with Object K-V pairs. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), true); - properties.put(ArrowFlightConnectionProperty.USE_SYSTEM_TRUST_STORE.camelName(), false); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE.camelName(), trustStorePath); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), trustStorePass); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using just a connection url and using 0 and 1 as ssl values. - * - * @throws Exception on error. - */ - @Test - public void testTLSConnectionPropertyTrueIntegerCorrectCastUrlWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s?user=%s&password=%s" - + "&useEncryption=1&useSystemTrustStore=0&%s=%s&%s=%s", - FLIGHT_SERVER_TEST_EXTENSION.getPort(), - userTest, - passTest, - ArrowFlightConnectionProperty.TRUST_STORE.camelName(), - URLEncoder.encode(trustStorePath, "UTF-8"), - ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), - URLEncoder.encode(trustStorePass, "UTF-8")))) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with String K-V pairs and using 0 and 1 as - * ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueIntegerCorrectCastUrlAndPropertiesUsingSetPropertyWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.setProperty(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.setProperty(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.setProperty(ArrowFlightConnectionProperty.TRUST_STORE.camelName(), trustStorePath); - properties.setProperty( - ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), trustStorePass); - properties.setProperty(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), "1"); - properties.setProperty(ArrowFlightConnectionProperty.USE_SYSTEM_TRUST_STORE.camelName(), "0"); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } - - /** - * Check if an encrypted connection can be established successfully when connecting through the - * DriverManager using a connection url and properties with Object K-V pairs and using 0 and 1 as - * ssl values. - * - * @throws Exception on error. - */ - @Test - public void - testTLSConnectionPropertyTrueIntegerCorrectCastUrlAndPropertiesUsingPutWithDriverManager() - throws Exception { - final Driver driver = new ArrowFlightJdbcDriver(); - DriverManager.registerDriver(driver); - - Properties properties = new Properties(); - - properties.put(ArrowFlightConnectionProperty.USER.camelName(), userTest); - properties.put(ArrowFlightConnectionProperty.PASSWORD.camelName(), passTest); - properties.put(ArrowFlightConnectionProperty.USE_ENCRYPTION.camelName(), 1); - properties.put(ArrowFlightConnectionProperty.USE_SYSTEM_TRUST_STORE.camelName(), 0); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE.camelName(), trustStorePath); - properties.put(ArrowFlightConnectionProperty.TRUST_STORE_PASSWORD.camelName(), trustStorePass); - - try (final Connection connection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://localhost:%s", FLIGHT_SERVER_TEST_EXTENSION.getPort()), - properties)) { - assertTrue(connection.isValid(0)); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/FlightServerTestExtension.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/FlightServerTestExtension.java deleted file mode 100644 index aa586651f569f..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/FlightServerTestExtension.java +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.apache.arrow.driver.jdbc.utils.FlightSqlTestCertificates.CertKeyPair; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.Method; -import java.sql.Connection; -import java.sql.SQLException; -import java.util.ArrayDeque; -import java.util.Deque; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.authentication.Authentication; -import org.apache.arrow.driver.jdbc.authentication.TokenAuthentication; -import org.apache.arrow.driver.jdbc.authentication.UserPasswordAuthentication; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallInfo; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightServerMiddleware; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.RequestContext; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.junit.jupiter.api.extension.AfterAllCallback; -import org.junit.jupiter.api.extension.BeforeAllCallback; -import org.junit.jupiter.api.extension.ExtensionContext; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Utility class for unit tests that need to instantiate a {@link FlightServer} and interact with - * it. - */ -public class FlightServerTestExtension - implements BeforeAllCallback, AfterAllCallback, AutoCloseable { - public static final String DEFAULT_USER = "flight-test-user"; - public static final String DEFAULT_PASSWORD = "flight-test-password"; - - private static final Logger LOGGER = LoggerFactory.getLogger(FlightServerTestExtension.class); - - private final Properties properties; - private final ArrowFlightConnectionConfigImpl config; - private final BufferAllocator allocator; - private final FlightSqlProducer producer; - private final Authentication authentication; - private final CertKeyPair certKeyPair; - private final File mTlsCACert; - - private final MiddlewareCookie.Factory middlewareCookieFactory = new MiddlewareCookie.Factory(); - - private FlightServerTestExtension( - final Properties properties, - final ArrowFlightConnectionConfigImpl config, - final BufferAllocator allocator, - final FlightSqlProducer producer, - final Authentication authentication, - final CertKeyPair certKeyPair, - final File mTlsCACert) { - this.properties = Preconditions.checkNotNull(properties); - this.config = Preconditions.checkNotNull(config); - this.allocator = Preconditions.checkNotNull(allocator); - this.producer = Preconditions.checkNotNull(producer); - this.authentication = authentication; - this.certKeyPair = certKeyPair; - this.mTlsCACert = mTlsCACert; - } - - /** - * Create a {@link FlightServerTestExtension} with standard values such as: user, password, - * localhost. - * - * @param producer the producer used to create the FlightServerTestExtension. - * @return the FlightServerTestExtension. - */ - public static FlightServerTestExtension createStandardTestExtension( - final FlightSqlProducer producer) { - UserPasswordAuthentication authentication = - new UserPasswordAuthentication.Builder().user(DEFAULT_USER, DEFAULT_PASSWORD).build(); - - return new Builder().authentication(authentication).producer(producer).build(); - } - - ArrowFlightJdbcDataSource createDataSource() { - return ArrowFlightJdbcDataSource.createNewDataSource(properties); - } - - public ArrowFlightJdbcConnectionPoolDataSource createConnectionPoolDataSource() { - return ArrowFlightJdbcConnectionPoolDataSource.createNewDataSource(properties); - } - - public ArrowFlightJdbcConnectionPoolDataSource createConnectionPoolDataSource( - boolean useEncryption) { - setUseEncryption(useEncryption); - return ArrowFlightJdbcConnectionPoolDataSource.createNewDataSource(properties); - } - - public Connection getConnection(boolean useEncryption, String token) throws SQLException { - properties.put("token", token); - - return getConnection(useEncryption); - } - - public Connection getConnection(boolean useEncryption) throws SQLException { - setUseEncryption(useEncryption); - return this.createDataSource().getConnection(); - } - - private void setUseEncryption(boolean useEncryption) { - properties.put("useEncryption", useEncryption); - } - - public MiddlewareCookie.Factory getMiddlewareCookieFactory() { - return middlewareCookieFactory; - } - - @FunctionalInterface - public interface CheckedFunction { - R apply(T t) throws IOException; - } - - private FlightServer initiateServer(Location location) throws IOException { - FlightServer.Builder builder = - FlightServer.builder(allocator, location, producer) - .headerAuthenticator(authentication.authenticate()) - .middleware(FlightServerMiddleware.Key.of("KEY"), middlewareCookieFactory); - if (certKeyPair != null) { - builder.useTls(certKeyPair.cert, certKeyPair.key); - } - if (mTlsCACert != null) { - builder.useMTlsClientVerification(mTlsCACert); - } - return builder.build(); - } - - @Override - public void beforeAll(ExtensionContext context) throws Exception { - try { - FlightServer flightServer = getStartServer(this::initiateServer, 3); - properties.put("port", flightServer.getPort()); - LOGGER.info("Started " + FlightServer.class.getName() + " as " + flightServer); - context.getStore(ExtensionContext.Namespace.GLOBAL).put("flightServer", flightServer); - } catch (Exception e) { - LOGGER.error("Failed to start FlightServer", e); - throw e; - } - } - - @Override - public void afterAll(ExtensionContext context) throws Exception { - FlightServer flightServer = - context.getStore(ExtensionContext.Namespace.GLOBAL).get("flightServer", FlightServer.class); - if (flightServer != null) { - flightServer.close(); - } - close(); - } - - private FlightServer getStartServer( - CheckedFunction newServerFromLocation, int retries) - throws IOException { - final Deque exceptions = new ArrayDeque<>(); - for (; retries > 0; retries--) { - final FlightServer server = - newServerFromLocation.apply(Location.forGrpcInsecure("localhost", 0)); - try { - Method start = server.getClass().getMethod("start"); - start.setAccessible(true); - start.invoke(server); - return server; - } catch (ReflectiveOperationException e) { - exceptions.add(e); - } - } - exceptions.forEach(e -> LOGGER.error("Failed to start FlightServer", e)); - throw new IOException(exceptions.pop().getCause()); - } - - /** - * Sets a port to be used. - * - * @return the port value. - */ - public int getPort() { - return config.getPort(); - } - - /** - * Sets a host to be used. - * - * @return the host value. - */ - public String getHost() { - return config.getHost(); - } - - @Override - public void close() throws Exception { - allocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(allocator); - } - - /** Builder for {@link FlightServerTestExtension}. */ - public static final class Builder { - private final Properties properties; - private FlightSqlProducer producer; - private Authentication authentication; - private CertKeyPair certKeyPair; - private File mTlsCACert; - - public Builder() { - this.properties = new Properties(); - this.properties.put("host", "localhost"); - } - - /** - * Sets the producer that will be used in the server rule. - * - * @param producer the flight sql producer. - * @return the Builder. - */ - public Builder producer(final FlightSqlProducer producer) { - this.producer = producer; - return this; - } - - /** - * Sets the type of the authentication that will be used in the server rules. There are two - * types of authentication: {@link UserPasswordAuthentication} and {@link TokenAuthentication}. - * - * @param authentication the type of authentication. - * @return the Builder. - */ - public Builder authentication(final Authentication authentication) { - this.authentication = authentication; - return this; - } - - /** - * Enable TLS on the server. - * - * @param certChain The certificate chain to use. - * @param key The private key to use. - * @return the Builder. - */ - public Builder useEncryption(final File certChain, final File key) { - certKeyPair = new CertKeyPair(certChain, key); - return this; - } - - /** - * Enable Client Verification via mTLS on the server. - * - * @param mTlsCACert The CA certificate to use for client verification. - * @return the Builder. - */ - public Builder useMTlsClientVerification(final File mTlsCACert) { - this.mTlsCACert = mTlsCACert; - return this; - } - - /** - * Builds the {@link FlightServerTestExtension} using the provided values. - * - * @return a {@link FlightServerTestExtension}. - */ - public FlightServerTestExtension build() { - authentication.populateProperties(properties); - return new FlightServerTestExtension( - properties, - new ArrowFlightConnectionConfigImpl(properties), - new RootAllocator(Long.MAX_VALUE), - producer, - authentication, - certKeyPair, - mTlsCACert); - } - } - - /** - * A middleware to handle with the cookies in the server. It is used to test if cookies are being - * sent properly. - */ - static class MiddlewareCookie implements FlightServerMiddleware { - - private final Factory factory; - - public MiddlewareCookie(Factory factory) { - this.factory = factory; - } - - @Override - public void onBeforeSendingHeaders(CallHeaders callHeaders) { - if (!factory.receivedCookieHeader) { - callHeaders.insert("Set-Cookie", "k=v"); - } - } - - @Override - public void onCallCompleted(CallStatus callStatus) {} - - @Override - public void onCallErrored(Throwable throwable) {} - - /** A factory for the MiddlewareCookie. */ - static class Factory implements FlightServerMiddleware.Factory { - - private boolean receivedCookieHeader = false; - private String cookie; - - @Override - public MiddlewareCookie onCallStarted( - CallInfo callInfo, CallHeaders callHeaders, RequestContext requestContext) { - cookie = callHeaders.get("Cookie"); - receivedCookieHeader = null != cookie; - return new MiddlewareCookie(this); - } - - public String getCookie() { - return cookie; - } - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetMetadataTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetMetadataTest.java deleted file mode 100644 index 4583194f50897..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetMetadataTest.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.notNullValue; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import org.apache.arrow.driver.jdbc.utils.CoreMockedSqlProducers; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ResultSetMetadataTest { - private static ResultSetMetaData metadata; - - private static Connection connection; - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION = - FlightServerTestExtension.createStandardTestExtension( - CoreMockedSqlProducers.getLegacyProducer()); - - @BeforeAll - public static void setup() throws SQLException { - connection = FLIGHT_SERVER_TEST_EXTENSION.getConnection(false); - - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_METADATA_SQL_CMD)) { - metadata = resultSet.getMetaData(); - } - } - - @AfterAll - public static void teardown() throws SQLException { - connection.close(); - } - - /** Test if {@link ResultSetMetaData} object is not null. */ - @Test - public void testShouldGetResultSetMetadata() { - assertThat(metadata, CoreMatchers.is(notNullValue())); - } - - /** - * Test if {@link ResultSetMetaData#getColumnCount()} returns the correct values. - * - * @throws SQLException in case of error. - */ - @Test - public void testShouldGetColumnCount() throws SQLException { - final int columnCount = metadata.getColumnCount(); - - assert columnCount == 3; - } - - /** - * Test if {@link ResultSetMetaData#getColumnTypeName(int)} returns the correct type name for each - * column. - * - * @throws SQLException in case of error. - */ - @Test - public void testShouldGetColumnTypesName() throws SQLException { - final String firstColumn = metadata.getColumnTypeName(1); - final String secondColumn = metadata.getColumnTypeName(2); - final String thirdColumn = metadata.getColumnTypeName(3); - - assertThat(firstColumn, equalTo("BIGINT")); - assertThat(secondColumn, equalTo("VARCHAR")); - assertThat(thirdColumn, equalTo("FLOAT")); - } - - /** - * Test if {@link ResultSetMetaData#getColumnTypeName(int)} passing an column index that does not - * exist. - */ - @Test - public void testShouldGetColumnTypesNameFromOutOfBoundIndex() { - assertThrows( - IndexOutOfBoundsException.class, - () -> { - metadata.getColumnTypeName(4); - }); - } - - /** - * Test if {@link ResultSetMetaData#getColumnName(int)} returns the correct name for each column. - * - * @throws SQLException in case of error. - */ - @Test - public void testShouldGetColumnNames() throws SQLException { - final String firstColumn = metadata.getColumnName(1); - final String secondColumn = metadata.getColumnName(2); - final String thirdColumn = metadata.getColumnName(3); - - assertThat(firstColumn, equalTo("integer0")); - assertThat(secondColumn, equalTo("string1")); - assertThat(thirdColumn, equalTo("float2")); - } - - /** - * Test {@link ResultSetMetaData#getColumnTypeName(int)} passing an column index that does not - * exist. - */ - @Test - public void testShouldGetColumnNameFromOutOfBoundIndex() { - assertThrows(IndexOutOfBoundsException.class, () -> metadata.getColumnName(4)); - } - - /** - * Test if {@link ResultSetMetaData#getColumnType(int)}returns the correct values. - * - * @throws SQLException in case of error. - */ - @Test - public void testShouldGetColumnType() throws SQLException { - final int firstColumn = metadata.getColumnType(1); - final int secondColumn = metadata.getColumnType(2); - final int thirdColumn = metadata.getColumnType(3); - - assertThat(firstColumn, equalTo(Types.BIGINT)); - assertThat(secondColumn, equalTo(Types.VARCHAR)); - assertThat(thirdColumn, equalTo(Types.FLOAT)); - } - - @Test - public void testShouldGetPrecision() throws SQLException { - assertThat(metadata.getPrecision(1), equalTo(10)); - assertThat(metadata.getPrecision(2), equalTo(65535)); - assertThat(metadata.getPrecision(3), equalTo(15)); - } - - @Test - public void testShouldGetScale() throws SQLException { - assertThat(metadata.getScale(1), equalTo(0)); - assertThat(metadata.getScale(2), equalTo(0)); - assertThat(metadata.getScale(3), equalTo(20)); - } - - @Test - public void testShouldGetCatalogName() throws SQLException { - assertThat(metadata.getCatalogName(1), equalTo("CATALOG_NAME_1")); - assertThat(metadata.getCatalogName(2), equalTo("CATALOG_NAME_2")); - assertThat(metadata.getCatalogName(3), equalTo("CATALOG_NAME_3")); - } - - @Test - public void testShouldGetSchemaName() throws SQLException { - assertThat(metadata.getSchemaName(1), equalTo("SCHEMA_NAME_1")); - assertThat(metadata.getSchemaName(2), equalTo("SCHEMA_NAME_2")); - assertThat(metadata.getSchemaName(3), equalTo("SCHEMA_NAME_3")); - } - - @Test - public void testShouldGetTableName() throws SQLException { - assertThat(metadata.getTableName(1), equalTo("TABLE_NAME_1")); - assertThat(metadata.getTableName(2), equalTo("TABLE_NAME_2")); - assertThat(metadata.getTableName(3), equalTo("TABLE_NAME_3")); - } - - @Test - public void testShouldIsAutoIncrement() throws SQLException { - assertThat(metadata.isAutoIncrement(1), equalTo(true)); - assertThat(metadata.isAutoIncrement(2), equalTo(false)); - assertThat(metadata.isAutoIncrement(3), equalTo(false)); - } - - @Test - public void testShouldIsCaseSensitive() throws SQLException { - assertThat(metadata.isCaseSensitive(1), equalTo(false)); - assertThat(metadata.isCaseSensitive(2), equalTo(true)); - assertThat(metadata.isCaseSensitive(3), equalTo(false)); - } - - @Test - public void testShouldIsReadonly() throws SQLException { - assertThat(metadata.isReadOnly(1), equalTo(true)); - assertThat(metadata.isReadOnly(2), equalTo(false)); - assertThat(metadata.isReadOnly(3), equalTo(false)); - } - - @Test - public void testShouldIsSearchable() throws SQLException { - assertThat(metadata.isSearchable(1), equalTo(true)); - assertThat(metadata.isSearchable(2), equalTo(true)); - assertThat(metadata.isSearchable(3), equalTo(true)); - } - - /** - * Test if {@link ResultSetMetaData#getColumnTypeName(int)} passing an column index that does not - * exist. - */ - @Test - public void testShouldGetColumnTypesFromOutOfBoundIndex() { - assertThrows(IndexOutOfBoundsException.class, () -> metadata.getColumnType(4)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java deleted file mode 100644 index a8d04dfc83ac0..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java +++ /dev/null @@ -1,671 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static java.lang.String.format; -import static java.util.Collections.synchronizedSet; -import static org.apache.arrow.flight.Location.forGrpcInsecure; -import static org.hamcrest.CoreMatchers.allOf; -import static org.hamcrest.CoreMatchers.anyOf; -import static org.hamcrest.CoreMatchers.containsString; -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import com.google.common.collect.ImmutableSet; -import java.nio.charset.StandardCharsets; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.SQLTimeoutException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.concurrent.CountDownLatch; -import org.apache.arrow.driver.jdbc.utils.CoreMockedSqlProducers; -import org.apache.arrow.driver.jdbc.utils.FallbackFlightSqlProducer; -import org.apache.arrow.driver.jdbc.utils.PartitionedFlightSqlProducer; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStatusCode; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ResultSetTest { - private static final Random RANDOM = new Random(10); - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION = - FlightServerTestExtension.createStandardTestExtension( - CoreMockedSqlProducers.getLegacyProducer()); - - private static Connection connection; - - @BeforeAll - public static void setup() throws SQLException { - connection = FLIGHT_SERVER_TEST_EXTENSION.getConnection(false); - } - - @AfterAll - public static void tearDown() throws SQLException { - connection.close(); - } - - private static void resultSetNextUntilDone(ResultSet resultSet) throws SQLException { - while (resultSet.next()) { - // TODO: implement resultSet.last() - // Pass to the next until resultSet is done - } - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} can run a query successfully. - * - * @throws Exception If the connection fails to be established. - */ - @Test - public void testShouldRunSelectQuery() throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - CoreMockedSqlProducers.assertLegacyRegularSqlResultSet(resultSet); - } - } - - @Test - public void testShouldExecuteQueryNotBlockIfClosedBeforeEnd() throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - - for (int i = 0; i < 7500; i++) { - assertTrue(resultSet.next()); - } - } - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} query only returns only the amount of value set - * by {@link org.apache.calcite.avatica.AvaticaStatement#setMaxRows(int)}. - * - * @throws Exception If the connection fails to be established. - */ - @Test - public void testShouldRunSelectQuerySettingMaxRowLimit() throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - - final int maxRowsLimit = 3; - statement.setMaxRows(maxRowsLimit); - - assertThat(statement.getMaxRows(), is(maxRowsLimit)); - - int count = 0; - int columns = 6; - for (; resultSet.next(); count++) { - for (int column = 1; column <= columns; column++) { - resultSet.getObject(column); - } - assertThat("Test Name #" + count, is(resultSet.getString(2))); - } - - assertThat(maxRowsLimit, is(count)); - } - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} fails upon attempting to run an invalid query. - * - * @throws Exception If the connection fails to be established. - */ - @Test - public void testShouldThrowExceptionUponAttemptingToExecuteAnInvalidSelectQuery() { - assertThrows( - SQLException.class, - () -> { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = statement.executeQuery("SELECT * FROM SHOULD-FAIL")) { - fail(); - } - }); - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} query only returns only the amount of value set - * by {@link org.apache.calcite.avatica.AvaticaStatement#setLargeMaxRows(long)} (int)}. - * - * @throws Exception If the connection fails to be established. - */ - @Test - public void testShouldRunSelectQuerySettingLargeMaxRowLimit() throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - final long maxRowsLimit = 3; - statement.setLargeMaxRows(maxRowsLimit); - - assertThat(statement.getLargeMaxRows(), is(maxRowsLimit)); - - int count = 0; - int columns = resultSet.getMetaData().getColumnCount(); - for (; resultSet.next(); count++) { - for (int column = 1; column <= columns; column++) { - resultSet.getObject(column); - } - assertEquals("Test Name #" + count, resultSet.getString(2)); - } - - assertEquals(maxRowsLimit, count); - } - } - - @Test - public void testColumnCountShouldRemainConsistentForResultSetThroughoutEntireDuration() - throws SQLException { - final Set counts = new HashSet<>(); - try (final Statement statement = connection.createStatement(); - final ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - while (resultSet.next()) { - counts.add(resultSet.getMetaData().getColumnCount()); - } - } - assertThat(counts, is(ImmutableSet.of(6))); - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} close the statement after complete ResultSet - * when call {@link org.apache.calcite.avatica.AvaticaStatement#closeOnCompletion()}. - * - * @throws Exception If the connection fails to be established. - */ - @Test - public void testShouldCloseStatementWhenIsCloseOnCompletion() throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - - statement.closeOnCompletion(); - - resultSetNextUntilDone(resultSet); - - assertThat(statement.isClosed(), is(true)); - } - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} close the statement after complete ResultSet - * with max rows limit when call {@link - * org.apache.calcite.avatica.AvaticaStatement#closeOnCompletion()}. - * - * @throws Exception If the connection fails to be established. - */ - @Test - public void testShouldCloseStatementWhenIsCloseOnCompletionWithMaxRowsLimit() throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - - final long maxRowsLimit = 3; - statement.setLargeMaxRows(maxRowsLimit); - statement.closeOnCompletion(); - - resultSetNextUntilDone(resultSet); - - assertThat(statement.isClosed(), is(true)); - } - } - - /** - * Tests whether the {@link ArrowFlightJdbcDriver} not close the statement after complete - * ResultSet with max rows limit when call {@link - * org.apache.calcite.avatica.AvaticaStatement#closeOnCompletion()}. - * - * @throws Exception If the connection fails to be established. - */ - @Test - public void testShouldNotCloseStatementWhenIsNotCloseOnCompletionWithMaxRowsLimit() - throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - - final long maxRowsLimit = 3; - statement.setLargeMaxRows(maxRowsLimit); - - assertThat(statement.isClosed(), is(false)); - resultSetNextUntilDone(resultSet); - assertThat(resultSet.isClosed(), is(false)); - assertThat(resultSet, is(instanceOf(ArrowFlightJdbcFlightStreamResultSet.class))); - } - } - - @Test - public void testShouldCancelQueryUponCancelAfterQueryingResultSet() throws SQLException { - try (final Statement statement = connection.createStatement(); - final ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - final int column = RANDOM.nextInt(resultSet.getMetaData().getColumnCount()) + 1; - assertThat(resultSet.isClosed(), is(false)); - assertThat(resultSet.next(), is(true)); - assertDoesNotThrow(() -> resultSet.getObject(column)); - statement.cancel(); - // Should reset `ResultSet`; keep both `ResultSet` and `Connection` open. - assertThat(statement.isClosed(), is(false)); - assertThat(resultSet.isClosed(), is(false)); - assertThat(resultSet.getMetaData().getColumnCount(), is(0)); - } - } - - @Test - public void testShouldInterruptFlightStreamsIfQueryIsCancelledMidQuerying() - throws SQLException, InterruptedException { - try (final Statement statement = connection.createStatement()) { - final CountDownLatch latch = new CountDownLatch(1); - final Set exceptions = synchronizedSet(new HashSet<>(1)); - final Thread thread = - new Thread( - () -> { - try (final ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - final int cachedColumnCount = resultSet.getMetaData().getColumnCount(); - Thread.sleep(300); - while (resultSet.next()) { - resultSet.getObject(RANDOM.nextInt(cachedColumnCount) + 1); - } - } catch (final SQLException | InterruptedException e) { - exceptions.add(e); - } finally { - latch.countDown(); - } - }); - thread.setName("Test Case: interrupt query execution before first retrieval"); - thread.start(); - statement.cancel(); - thread.join(); - assertThat( - exceptions.stream() - .map(Exception::getMessage) - .map(StringBuilder::new) - .reduce(StringBuilder::append) - .orElseThrow(IllegalArgumentException::new) - .toString(), - is("Statement canceled")); - } - } - - @Test - public void - testShouldInterruptFlightStreamsIfQueryIsCancelledMidProcessingForTimeConsumingQueries() - throws SQLException, InterruptedException { - final String query = CoreMockedSqlProducers.LEGACY_CANCELLATION_SQL_CMD; - try (final Statement statement = connection.createStatement()) { - final Set exceptions = synchronizedSet(new HashSet<>(1)); - final Thread thread = - new Thread( - () -> { - try (final ResultSet ignored = statement.executeQuery(query)) { - fail(); - } catch (final SQLException e) { - exceptions.add(e); - } - }); - thread.setName("Test Case: interrupt query execution mid-process"); - thread.setPriority(Thread.MAX_PRIORITY); - thread.start(); - Thread.sleep(5000); // Let the other thread attempt to retrieve results. - statement.cancel(); - thread.join(); - assertThat( - exceptions.stream() - .map(Exception::getMessage) - .map(StringBuilder::new) - .reduce(StringBuilder::append) - .orElseThrow(IllegalStateException::new) - .toString(), - anyOf( - is(format("Error while executing SQL \"%s\": Query canceled", query)), - allOf( - containsString(format("Error while executing SQL \"%s\"", query)), - anyOf(containsString("CANCELLED"), containsString("Cancelling"))))); - } - } - - @Test - public void testShouldInterruptFlightStreamsIfQueryTimeoutIsOver() throws SQLException { - final String query = CoreMockedSqlProducers.LEGACY_CANCELLATION_SQL_CMD; - final int timeoutValue = 2; - final String timeoutUnit = "SECONDS"; - try (final Statement statement = connection.createStatement()) { - statement.setQueryTimeout(timeoutValue); - final Set exceptions = new HashSet<>(1); - try { - statement.executeQuery(query); - } catch (final Exception e) { - exceptions.add(e); - } - final Throwable comparisonCause = - exceptions.stream().findFirst().orElseThrow(RuntimeException::new).getCause().getCause(); - assertThat(comparisonCause, is(instanceOf(SQLTimeoutException.class))); - assertThat( - comparisonCause.getMessage(), - is(format("Query timed out after %d %s", timeoutValue, timeoutUnit))); - } - } - - @Test - public void testFlightStreamsQueryShouldNotTimeout() throws SQLException { - final String query = CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD; - final int timeoutValue = 5; - try (Statement statement = connection.createStatement()) { - statement.setQueryTimeout(timeoutValue); - try (ResultSet resultSet = statement.executeQuery(query)) { - CoreMockedSqlProducers.assertLegacyRegularSqlResultSet(resultSet); - } - } - } - - @Test - public void testPartitionedFlightServer() throws Exception { - // Arrange - final Schema schema = - new Schema( - Arrays.asList(Field.nullablePrimitive("int_column", new ArrowType.Int(32, true)))); - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - VectorSchemaRoot firstPartition = VectorSchemaRoot.create(schema, allocator); - VectorSchemaRoot secondPartition = VectorSchemaRoot.create(schema, allocator)) { - firstPartition.setRowCount(1); - ((IntVector) firstPartition.getVector(0)).set(0, 1); - secondPartition.setRowCount(1); - ((IntVector) secondPartition.getVector(0)).set(0, 2); - - // Construct the data-only nodes first. - FlightProducer firstProducer = - new PartitionedFlightSqlProducer.DataOnlyFlightSqlProducer( - new Ticket("first".getBytes(StandardCharsets.UTF_8)), firstPartition); - FlightProducer secondProducer = - new PartitionedFlightSqlProducer.DataOnlyFlightSqlProducer( - new Ticket("second".getBytes(StandardCharsets.UTF_8)), secondPartition); - - final FlightServer.Builder firstBuilder = - FlightServer.builder(allocator, forGrpcInsecure("localhost", 0), firstProducer); - - final FlightServer.Builder secondBuilder = - FlightServer.builder(allocator, forGrpcInsecure("localhost", 0), secondProducer); - - // Run the data-only nodes so that we can get the Locations they are running at. - try (FlightServer firstServer = firstBuilder.build(); - FlightServer secondServer = secondBuilder.build()) { - firstServer.start(); - secondServer.start(); - final FlightEndpoint firstEndpoint = - new FlightEndpoint( - new Ticket("first".getBytes(StandardCharsets.UTF_8)), firstServer.getLocation()); - - final FlightEndpoint secondEndpoint = - new FlightEndpoint( - new Ticket("second".getBytes(StandardCharsets.UTF_8)), secondServer.getLocation()); - - // Finally start the root node. - try (final PartitionedFlightSqlProducer rootProducer = - new PartitionedFlightSqlProducer(schema, firstEndpoint, secondEndpoint); - FlightServer rootServer = - FlightServer.builder(allocator, forGrpcInsecure("localhost", 0), rootProducer) - .build() - .start(); - Connection newConnection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false", - rootServer.getLocation().getUri().getHost(), rootServer.getPort())); - Statement newStatement = newConnection.createStatement(); - // Act - ResultSet result = newStatement.executeQuery("Select partitioned_data")) { - List resultData = new ArrayList<>(); - while (result.next()) { - resultData.add(result.getInt(1)); - } - - // Assert - assertEquals( - firstPartition.getRowCount() + secondPartition.getRowCount(), resultData.size()); - assertTrue(resultData.contains(((IntVector) firstPartition.getVector(0)).get(0))); - assertTrue(resultData.contains(((IntVector) secondPartition.getVector(0)).get(0))); - } - } - } - } - - @Test - public void testPartitionedFlightServerIgnoreFailure() throws Exception { - final Schema schema = - new Schema( - Collections.singletonList( - Field.nullablePrimitive("int_column", new ArrowType.Int(32, true)))); - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { - final FlightEndpoint firstEndpoint = - new FlightEndpoint( - new Ticket("first".getBytes(StandardCharsets.UTF_8)), - Location.forGrpcInsecure("127.0.0.2", 1234), - Location.forGrpcInsecure("127.0.0.3", 1234)); - - try (final PartitionedFlightSqlProducer rootProducer = - new PartitionedFlightSqlProducer(schema, firstEndpoint); - FlightServer rootServer = - FlightServer.builder(allocator, forGrpcInsecure("localhost", 0), rootProducer) - .build() - .start(); - Connection newConnection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false", - rootServer.getLocation().getUri().getHost(), rootServer.getPort())); - Statement newStatement = newConnection.createStatement()) { - final SQLException e = - assertThrows( - SQLException.class, - () -> { - ResultSet result = newStatement.executeQuery("Select partitioned_data"); - while (result.next()) {} - }); - final Throwable cause = e.getCause(); - assertTrue(cause instanceof FlightRuntimeException); - final FlightRuntimeException fre = (FlightRuntimeException) cause; - assertEquals(FlightStatusCode.UNAVAILABLE, fre.status().code()); - } - } - } - - @Test - public void testPartitionedFlightServerAllFailure() throws Exception { - // Arrange - final Schema schema = - new Schema( - Collections.singletonList( - Field.nullablePrimitive("int_column", new ArrowType.Int(32, true)))); - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - VectorSchemaRoot firstPartition = VectorSchemaRoot.create(schema, allocator)) { - firstPartition.setRowCount(1); - ((IntVector) firstPartition.getVector(0)).set(0, 1); - - // Construct the data-only nodes first. - FlightProducer firstProducer = - new PartitionedFlightSqlProducer.DataOnlyFlightSqlProducer( - new Ticket("first".getBytes(StandardCharsets.UTF_8)), firstPartition); - - final FlightServer.Builder firstBuilder = - FlightServer.builder(allocator, forGrpcInsecure("localhost", 0), firstProducer); - - // Run the data-only nodes so that we can get the Locations they are running at. - try (FlightServer firstServer = firstBuilder.build()) { - firstServer.start(); - final Location badLocation = Location.forGrpcInsecure("127.0.0.2", 1234); - final FlightEndpoint firstEndpoint = - new FlightEndpoint( - new Ticket("first".getBytes(StandardCharsets.UTF_8)), - badLocation, - firstServer.getLocation()); - - // Finally start the root node. - try (final PartitionedFlightSqlProducer rootProducer = - new PartitionedFlightSqlProducer(schema, firstEndpoint); - FlightServer rootServer = - FlightServer.builder(allocator, forGrpcInsecure("localhost", 0), rootProducer) - .build() - .start(); - Connection newConnection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false", - rootServer.getLocation().getUri().getHost(), rootServer.getPort())); - Statement newStatement = newConnection.createStatement(); - // Act - ResultSet result = newStatement.executeQuery("Select partitioned_data")) { - List resultData = new ArrayList<>(); - while (result.next()) { - resultData.add(result.getInt(1)); - } - - // Assert - assertEquals(firstPartition.getRowCount(), resultData.size()); - assertTrue(resultData.contains(((IntVector) firstPartition.getVector(0)).get(0))); - } - } - } - } - - @Test - public void testFallbackFlightServer() throws Exception { - final Schema schema = - new Schema( - Collections.singletonList(Field.nullable("int_column", Types.MinorType.INT.getType()))); - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - VectorSchemaRoot resultData = VectorSchemaRoot.create(schema, allocator)) { - resultData.setRowCount(1); - ((IntVector) resultData.getVector(0)).set(0, 1); - - try (final FallbackFlightSqlProducer rootProducer = - new FallbackFlightSqlProducer(resultData); - FlightServer rootServer = - FlightServer.builder(allocator, forGrpcInsecure("localhost", 0), rootProducer) - .build() - .start(); - Connection newConnection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false", - rootServer.getLocation().getUri().getHost(), rootServer.getPort())); - Statement newStatement = newConnection.createStatement(); - ResultSet result = newStatement.executeQuery("fallback")) { - List actualData = new ArrayList<>(); - while (result.next()) { - actualData.add(result.getInt(1)); - } - - // Assert - assertEquals(resultData.getRowCount(), actualData.size()); - assertTrue(actualData.contains(((IntVector) resultData.getVector(0)).get(0))); - } - } - } - - @Test - public void testFallbackSecondFlightServer() throws Exception { - final Schema schema = - new Schema( - Collections.singletonList(Field.nullable("int_column", Types.MinorType.INT.getType()))); - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - VectorSchemaRoot resultData = VectorSchemaRoot.create(schema, allocator)) { - resultData.setRowCount(1); - ((IntVector) resultData.getVector(0)).set(0, 1); - - try (final FallbackFlightSqlProducer rootProducer = - new FallbackFlightSqlProducer(resultData); - FlightServer rootServer = - FlightServer.builder(allocator, forGrpcInsecure("localhost", 0), rootProducer) - .build() - .start(); - Connection newConnection = - DriverManager.getConnection( - String.format( - "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false", - rootServer.getLocation().getUri().getHost(), rootServer.getPort())); - Statement newStatement = newConnection.createStatement(); - ResultSet result = newStatement.executeQuery("fallback with error")) { - List actualData = new ArrayList<>(); - while (result.next()) { - actualData.add(result.getInt(1)); - } - - // Assert - assertEquals(resultData.getRowCount(), actualData.size()); - assertTrue(actualData.contains(((IntVector) resultData.getVector(0)).get(0))); - } - } - } - - @Test - public void testShouldRunSelectQueryWithEmptyVectorsEmbedded() throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_WITH_EMPTY_SQL_CMD)) { - long rowCount = 0; - while (resultSet.next()) { - ++rowCount; - } - assertEquals(2, rowCount); - } - } - - @Test - public void testResultSetAppMetadata() throws Exception { - try (Statement statement = connection.createStatement(); - ResultSet resultSet = - statement.executeQuery(CoreMockedSqlProducers.LEGACY_REGULAR_SQL_CMD)) { - assertArrayEquals( - ((ArrowFlightJdbcFlightStreamResultSet) resultSet).getAppMetadata(), - "foo".getBytes(StandardCharsets.UTF_8)); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/TokenAuthenticationTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/TokenAuthenticationTest.java deleted file mode 100644 index 1dfb1efcf16ce..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/TokenAuthenticationTest.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.sql.Connection; -import java.sql.SQLException; -import org.apache.arrow.driver.jdbc.authentication.TokenAuthentication; -import org.apache.arrow.driver.jdbc.utils.MockFlightSqlProducer; -import org.apache.arrow.util.AutoCloseables; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class TokenAuthenticationTest { - private static final MockFlightSqlProducer FLIGHT_SQL_PRODUCER = new MockFlightSqlProducer(); - - @RegisterExtension public static FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION; - - static { - FLIGHT_SERVER_TEST_EXTENSION = - new FlightServerTestExtension.Builder() - .authentication(new TokenAuthentication.Builder().token("1234").build()) - .producer(FLIGHT_SQL_PRODUCER) - .build(); - } - - @AfterAll - public static void tearDownAfterClass() { - AutoCloseables.closeNoChecked(FLIGHT_SQL_PRODUCER); - } - - @Test - public void connectUsingTokenAuthenticationShouldFail() throws SQLException { - assertThrows( - SQLException.class, () -> FLIGHT_SERVER_TEST_EXTENSION.getConnection(false, "invalid")); - } - - @Test - public void connectUsingTokenAuthenticationShouldSuccess() throws SQLException { - try (Connection connection = FLIGHT_SERVER_TEST_EXTENSION.getConnection(false, "1234")) { - assertFalse(connection.isClosed()); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactoryTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactoryTest.java deleted file mode 100644 index b56bf3c63d8e3..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorFactoryTest.java +++ /dev/null @@ -1,474 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.impl.binary.ArrowFlightJdbcBinaryVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDurationVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcIntervalVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcDenseUnionVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcFixedSizeListVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcLargeListVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcListVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcMapVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcStructVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.complex.ArrowFlightJdbcUnionVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcBaseIntVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcBitVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcDecimalVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcFloat4VectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.numeric.ArrowFlightJdbcFloat8VectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.text.ArrowFlightJdbcVarCharVectorAccessor; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcAccessorFactoryTest { - public static final IntSupplier GET_CURRENT_ROW = () -> 0; - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - @Test - public void createAccessorForUInt1Vector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createUInt1Vector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBaseIntVectorAccessor); - } - } - - @Test - public void createAccessorForUInt2Vector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createUInt2Vector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBaseIntVectorAccessor); - } - } - - @Test - public void createAccessorForUInt4Vector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createUInt4Vector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBaseIntVectorAccessor); - } - } - - @Test - public void createAccessorForUInt8Vector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createUInt8Vector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBaseIntVectorAccessor); - } - } - - @Test - public void createAccessorForTinyIntVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createTinyIntVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBaseIntVectorAccessor); - } - } - - @Test - public void createAccessorForSmallIntVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createSmallIntVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBaseIntVectorAccessor); - } - } - - @Test - public void createAccessorForIntVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createIntVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBaseIntVectorAccessor); - } - } - - @Test - public void createAccessorForBigIntVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createBigIntVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBaseIntVectorAccessor); - } - } - - @Test - public void createAccessorForFloat4Vector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createFloat4Vector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcFloat4VectorAccessor); - } - } - - @Test - public void createAccessorForFloat8Vector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createFloat8Vector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcFloat8VectorAccessor); - } - } - - @Test - public void createAccessorForBitVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createBitVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBitVectorAccessor); - } - } - - @Test - public void createAccessorForDecimalVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createDecimalVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcDecimalVectorAccessor); - } - } - - @Test - public void createAccessorForDecimal256Vector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createDecimal256Vector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcDecimalVectorAccessor); - } - } - - @Test - public void createAccessorForVarBinaryVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createVarBinaryVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBinaryVectorAccessor); - } - } - - @Test - public void createAccessorForLargeVarBinaryVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createLargeVarBinaryVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBinaryVectorAccessor); - } - } - - @Test - public void createAccessorForFixedSizeBinaryVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createFixedSizeBinaryVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcBinaryVectorAccessor); - } - } - - @Test - public void createAccessorForTimeStampVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createTimeStampMilliVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcTimeStampVectorAccessor); - } - } - - @Test - public void createAccessorForTimeNanoVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createTimeNanoVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcTimeVectorAccessor); - } - } - - @Test - public void createAccessorForTimeMicroVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createTimeMicroVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcTimeVectorAccessor); - } - } - - @Test - public void createAccessorForTimeMilliVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createTimeMilliVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcTimeVectorAccessor); - } - } - - @Test - public void createAccessorForTimeSecVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createTimeSecVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcTimeVectorAccessor); - } - } - - @Test - public void createAccessorForDateDayVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createDateDayVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcDateVectorAccessor); - } - } - - @Test - public void createAccessorForDateMilliVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createDateMilliVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcDateVectorAccessor); - } - } - - @Test - public void createAccessorForVarCharVector() { - try (ValueVector valueVector = - new VarCharVector("", rootAllocatorTestExtension.getRootAllocator())) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcVarCharVectorAccessor); - } - } - - @Test - public void createAccessorForLargeVarCharVector() { - try (ValueVector valueVector = - new LargeVarCharVector("", rootAllocatorTestExtension.getRootAllocator())) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcVarCharVectorAccessor); - } - } - - @Test - public void createAccessorForDurationVector() { - try (ValueVector valueVector = - new DurationVector( - "", - new FieldType(true, new ArrowType.Duration(TimeUnit.MILLISECOND), null), - rootAllocatorTestExtension.getRootAllocator())) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcDurationVectorAccessor); - } - } - - @Test - public void createAccessorForIntervalDayVector() { - try (ValueVector valueVector = - new IntervalDayVector("", rootAllocatorTestExtension.getRootAllocator())) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcIntervalVectorAccessor); - } - } - - @Test - public void createAccessorForIntervalYearVector() { - try (ValueVector valueVector = - new IntervalYearVector("", rootAllocatorTestExtension.getRootAllocator())) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcIntervalVectorAccessor); - } - } - - @Test - public void createAccessorForIntervalMonthDayNanoVector() { - try (ValueVector valueVector = - new IntervalMonthDayNanoVector("", rootAllocatorTestExtension.getRootAllocator())) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcIntervalVectorAccessor); - } - } - - @Test - public void createAccessorForUnionVector() { - try (ValueVector valueVector = - new UnionVector("", rootAllocatorTestExtension.getRootAllocator(), null, null)) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcUnionVectorAccessor); - } - } - - @Test - public void createAccessorForDenseUnionVector() { - try (ValueVector valueVector = - new DenseUnionVector("", rootAllocatorTestExtension.getRootAllocator(), null, null)) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcDenseUnionVectorAccessor); - } - } - - @Test - public void createAccessorForStructVector() { - try (ValueVector valueVector = - StructVector.empty("", rootAllocatorTestExtension.getRootAllocator())) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcStructVectorAccessor); - } - } - - @Test - public void createAccessorForListVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createListVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcListVectorAccessor); - } - } - - @Test - public void createAccessorForLargeListVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createLargeListVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcLargeListVectorAccessor); - } - } - - @Test - public void createAccessorForFixedSizeListVector() { - try (ValueVector valueVector = rootAllocatorTestExtension.createFixedSizeListVector()) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcFixedSizeListVectorAccessor); - } - } - - @Test - public void createAccessorForMapVector() { - try (ValueVector valueVector = - MapVector.empty("", rootAllocatorTestExtension.getRootAllocator(), true)) { - ArrowFlightJdbcAccessor accessor = - ArrowFlightJdbcAccessorFactory.createAccessor( - valueVector, GET_CURRENT_ROW, (boolean wasNull) -> {}); - - assertTrue(accessor instanceof ArrowFlightJdbcMapVectorAccessor); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java deleted file mode 100644 index 6a101e90d3a27..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java +++ /dev/null @@ -1,356 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.sql.SQLException; -import java.util.HashMap; -import java.util.Map; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -public class ArrowFlightJdbcAccessorTest { - - static class MockedArrowFlightJdbcAccessor extends ArrowFlightJdbcAccessor { - - protected MockedArrowFlightJdbcAccessor() { - super(() -> 0, (boolean wasNull) -> {}); - } - - @Override - public Class getObjectClass() { - return Long.class; - } - } - - @Mock MockedArrowFlightJdbcAccessor accessor; - - @Test - public void testShouldGetObjectWithByteClassReturnGetByte() throws SQLException { - byte expected = Byte.MAX_VALUE; - when(accessor.getByte()).thenReturn(expected); - - when(accessor.getObject(Byte.class)).thenCallRealMethod(); - - assertEquals(accessor.getObject(Byte.class), (Object) expected); - verify(accessor).getByte(); - } - - @Test - public void testShouldGetObjectWithShortClassReturnGetShort() throws SQLException { - short expected = Short.MAX_VALUE; - when(accessor.getShort()).thenReturn(expected); - - when(accessor.getObject(Short.class)).thenCallRealMethod(); - - assertEquals(accessor.getObject(Short.class), (Object) expected); - verify(accessor).getShort(); - } - - @Test - public void testShouldGetObjectWithIntegerClassReturnGetInt() throws SQLException { - int expected = Integer.MAX_VALUE; - when(accessor.getInt()).thenReturn(expected); - - when(accessor.getObject(Integer.class)).thenCallRealMethod(); - - assertEquals(accessor.getObject(Integer.class), (Object) expected); - verify(accessor).getInt(); - } - - @Test - public void testShouldGetObjectWithLongClassReturnGetLong() throws SQLException { - long expected = Long.MAX_VALUE; - when(accessor.getLong()).thenReturn(expected); - - when(accessor.getObject(Long.class)).thenCallRealMethod(); - - assertEquals(accessor.getObject(Long.class), (Object) expected); - verify(accessor).getLong(); - } - - @Test - public void testShouldGetObjectWithFloatClassReturnGetFloat() throws SQLException { - float expected = Float.MAX_VALUE; - when(accessor.getFloat()).thenReturn(expected); - - when(accessor.getObject(Float.class)).thenCallRealMethod(); - - assertEquals(accessor.getObject(Float.class), (Object) expected); - verify(accessor).getFloat(); - } - - @Test - public void testShouldGetObjectWithDoubleClassReturnGetDouble() throws SQLException { - double expected = Double.MAX_VALUE; - when(accessor.getDouble()).thenReturn(expected); - - when(accessor.getObject(Double.class)).thenCallRealMethod(); - - assertEquals(accessor.getObject(Double.class), (Object) expected); - verify(accessor).getDouble(); - } - - @Test - public void testShouldGetObjectWithBooleanClassReturnGetBoolean() throws SQLException { - when(accessor.getBoolean()).thenReturn(true); - - when(accessor.getObject(Boolean.class)).thenCallRealMethod(); - - assertEquals(true, accessor.getObject(Boolean.class)); - verify(accessor).getBoolean(); - } - - @Test - public void testShouldGetObjectWithBigDecimalClassReturnGetBigDecimal() throws SQLException { - BigDecimal expected = BigDecimal.TEN; - when(accessor.getBigDecimal()).thenReturn(expected); - - when(accessor.getObject(BigDecimal.class)).thenCallRealMethod(); - - assertEquals(expected, accessor.getObject(BigDecimal.class)); - verify(accessor).getBigDecimal(); - } - - @Test - public void testShouldGetObjectWithStringClassReturnGetString() throws SQLException { - String expected = "STRING_VALUE"; - when(accessor.getString()).thenReturn(expected); - - when(accessor.getObject(String.class)).thenCallRealMethod(); - - assertEquals(expected, accessor.getObject(String.class)); - verify(accessor).getString(); - } - - @Test - public void testShouldGetObjectWithByteArrayClassReturnGetBytes() throws SQLException { - byte[] expected = "STRING_VALUE".getBytes(StandardCharsets.UTF_8); - when(accessor.getBytes()).thenReturn(expected); - - when(accessor.getObject(byte[].class)).thenCallRealMethod(); - - assertArrayEquals(accessor.getObject(byte[].class), expected); - verify(accessor).getBytes(); - } - - @Test - public void testShouldGetObjectWithObjectClassReturnGetObject() throws SQLException { - Object expected = new Object(); - when(accessor.getObject()).thenReturn(expected); - - when(accessor.getObject(Object.class)).thenCallRealMethod(); - - assertEquals(expected, accessor.getObject(Object.class)); - verify(accessor).getObject(); - } - - @Test - public void testShouldGetObjectWithAccessorsObjectClassReturnGetObject() throws SQLException { - Class objectClass = Long.class; - - when(accessor.getObject(objectClass)).thenCallRealMethod(); - - accessor.getObject(objectClass); - verify(accessor).getObject(objectClass); - } - - @Test - public void testShouldFailToGetBoolean() throws SQLException { - when(accessor.getBoolean()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getBoolean()); - } - - @Test - public void testShouldFailToGetByte() throws SQLException { - when(accessor.getByte()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getByte()); - } - - @Test - public void testShouldFailToGetShort() throws SQLException { - when(accessor.getShort()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getShort()); - } - - @Test - public void testShouldFailToGetInt() throws SQLException { - when(accessor.getInt()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getInt()); - } - - @Test - public void testShouldFailToGetLong() throws SQLException { - when(accessor.getLong()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getLong()); - } - - @Test - public void testShouldFailToGetFloat() throws SQLException { - when(accessor.getFloat()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getFloat()); - } - - @Test - public void testShouldFailToGetDouble() throws SQLException { - when(accessor.getDouble()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getDouble()); - } - - @Test - public void testShouldFailToGetBigDecimal() throws SQLException { - when(accessor.getBigDecimal()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getBigDecimal()); - } - - @Test - public void testShouldFailToGetBytes() throws SQLException { - when(accessor.getBytes()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getBytes()); - } - - @Test - public void testShouldFailToGetAsciiStream() throws SQLException { - when(accessor.getAsciiStream()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getAsciiStream()); - } - - @Test - public void testShouldFailToGetUnicodeStream() throws SQLException { - when(accessor.getUnicodeStream()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getUnicodeStream()); - } - - @Test - public void testShouldFailToGetBinaryStream() throws SQLException { - when(accessor.getBinaryStream()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getBinaryStream()); - } - - @Test - public void testShouldFailToGetObject() throws SQLException { - when(accessor.getObject()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getObject()); - } - - @Test - public void testShouldFailToGetObjectMap() throws SQLException { - Map> map = new HashMap<>(); - when(accessor.getObject(map)).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getObject(map)); - } - - @Test - public void testShouldFailToGetCharacterStream() throws SQLException { - when(accessor.getCharacterStream()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getCharacterStream()); - } - - @Test - public void testShouldFailToGetRef() throws SQLException { - when(accessor.getRef()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getRef()); - } - - @Test - public void testShouldFailToGetBlob() throws SQLException { - when(accessor.getBlob()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getBlob()); - } - - @Test - public void testShouldFailToGetClob() throws SQLException { - when(accessor.getClob()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getClob()); - } - - @Test - public void testShouldFailToGetArray() throws SQLException { - when(accessor.getArray()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getArray()); - } - - @Test - public void testShouldFailToGetStruct() throws SQLException { - when(accessor.getStruct()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getStruct()); - } - - @Test - public void testShouldFailToGetURL() throws SQLException { - when(accessor.getURL()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getURL()); - } - - @Test - public void testShouldFailToGetNClob() throws SQLException { - when(accessor.getNClob()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getNClob()); - } - - @Test - public void testShouldFailToGetSQLXML() throws SQLException { - when(accessor.getSQLXML()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getSQLXML()); - } - - @Test - public void testShouldFailToGetNString() throws SQLException { - when(accessor.getNString()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getNString()); - } - - @Test - public void testShouldFailToGetNCharacterStream() throws SQLException { - when(accessor.getNCharacterStream()).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getNCharacterStream()); - } - - @Test - public void testShouldFailToGetDate() throws SQLException { - when(accessor.getDate(null)).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getDate(null)); - } - - @Test - public void testShouldFailToGetTime() throws SQLException { - when(accessor.getTime(null)).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getTime(null)); - } - - @Test - public void testShouldFailToGetTimestamp() throws SQLException { - when(accessor.getTimestamp(null)).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getTimestamp(null)); - } - - @Test - public void testShouldFailToGetBigDecimalWithValue() throws SQLException { - when(accessor.getBigDecimal(0)).thenCallRealMethod(); - assertThrows(SQLException.class, () -> accessor.getBigDecimal(0)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessorTest.java deleted file mode 100644 index 2db957badf02d..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/ArrowFlightJdbcNullVectorAccessorTest.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl; - -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.Test; - -public class ArrowFlightJdbcNullVectorAccessorTest { - - ArrowFlightJdbcNullVectorAccessor accessor = - new ArrowFlightJdbcNullVectorAccessor((boolean wasNull) -> {}); - - @Test - public void testShouldWasNullReturnTrue() { - assertTrue(accessor.wasNull()); - } - - @Test - public void testShouldGetObjectReturnNull() { - assertNull(accessor.getObject()); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessorTest.java deleted file mode 100644 index f445fb15a46ae..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/binary/ArrowFlightJdbcBinaryVectorAccessorTest.java +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.binary; - -import static java.nio.charset.StandardCharsets.US_ASCII; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.io.InputStream; -import java.io.Reader; -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.commons.io.IOUtils; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class ArrowFlightJdbcBinaryVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private ValueVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> { - ArrowFlightJdbcAccessorFactory.WasNullConsumer noOpWasNullConsumer = - (boolean wasNull) -> {}; - if (vector instanceof VarBinaryVector) { - return new ArrowFlightJdbcBinaryVectorAccessor( - ((VarBinaryVector) vector), getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof LargeVarBinaryVector) { - return new ArrowFlightJdbcBinaryVectorAccessor( - ((LargeVarBinaryVector) vector), getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof FixedSizeBinaryVector) { - return new ArrowFlightJdbcBinaryVectorAccessor( - ((FixedSizeBinaryVector) vector), getCurrentRow, noOpWasNullConsumer); - } - return null; - }; - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - public static Stream data() { - return Stream.of( - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createVarBinaryVector(), - "VarBinaryVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createLargeVarBinaryVector(), - "LargeVarBinaryVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createFixedSizeBinaryVector(), - "FixedSizeBinaryVector")); - } - - public void setup(Supplier vectorSupplier) { - this.vector = vectorSupplier.get(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringReturnExpectedString(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBinaryVectorAccessor::getString, - (accessor) -> is(new String(accessor.getBytes(), UTF_8))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringReturnNull(Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - vector.reset(); - vector.setValueCount(5); - - accessorIterator.assertAccessorGetter( - vector, ArrowFlightJdbcBinaryVectorAccessor::getString, CoreMatchers.nullValue()); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBytesReturnExpectedByteArray(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBinaryVectorAccessor::getBytes, - (accessor, currentRow) -> { - if (vector instanceof VarBinaryVector) { - return is(((VarBinaryVector) vector).get(currentRow)); - } else if (vector instanceof LargeVarBinaryVector) { - return is(((LargeVarBinaryVector) vector).get(currentRow)); - } else if (vector instanceof FixedSizeBinaryVector) { - return is(((FixedSizeBinaryVector) vector).get(currentRow)); - } - return null; - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBytesReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.reset(); - vector.setValueCount(5); - - ArrowFlightJdbcBinaryVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getBytes(), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectReturnAsGetBytes(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBinaryVectorAccessor::getObject, - (accessor) -> is(accessor.getBytes())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.reset(); - vector.setValueCount(5); - - ArrowFlightJdbcBinaryVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getObject(), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetUnicodeStreamReturnCorrectInputStream( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - InputStream inputStream = accessor.getUnicodeStream(); - String actualString = IOUtils.toString(inputStream, UTF_8); - assertThat(accessor.wasNull(), is(false)); - assertThat(actualString, is(accessor.getString())); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetUnicodeStreamReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.reset(); - vector.setValueCount(5); - - ArrowFlightJdbcBinaryVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getUnicodeStream(), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetAsciiStreamReturnCorrectInputStream(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - InputStream inputStream = accessor.getAsciiStream(); - String actualString = IOUtils.toString(inputStream, US_ASCII); - assertThat(accessor.wasNull(), is(false)); - assertThat(actualString, is(accessor.getString())); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetAsciiStreamReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.reset(); - vector.setValueCount(5); - - ArrowFlightJdbcBinaryVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getAsciiStream(), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBinaryStreamReturnCurrentInputStream( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - InputStream inputStream = accessor.getBinaryStream(); - String actualString = IOUtils.toString(inputStream, UTF_8); - assertThat(accessor.wasNull(), is(false)); - assertThat(actualString, is(accessor.getString())); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBinaryStreamReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.reset(); - vector.setValueCount(5); - - ArrowFlightJdbcBinaryVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getBinaryStream(), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetCharacterStreamReturnCorrectReader(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - Reader characterStream = accessor.getCharacterStream(); - String actualString = IOUtils.toString(characterStream); - assertThat(accessor.wasNull(), is(false)); - assertThat(actualString, is(accessor.getString())); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetCharacterStreamReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.reset(); - vector.setValueCount(5); - - ArrowFlightJdbcBinaryVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getCharacterStream(), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessorTest.java deleted file mode 100644 index 3bbe3e1711124..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDateVectorAccessorTest.java +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorAccessor.getTimeUnitForVector; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.not; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.Date; -import java.sql.Timestamp; -import java.time.LocalDateTime; -import java.util.Calendar; -import java.util.TimeZone; -import java.util.concurrent.TimeUnit; -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.accessor.impl.text.ArrowFlightJdbcVarCharVectorAccessor; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.util.Text; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class ArrowFlightJdbcDateVectorAccessorTest { - - public static final String AMERICA_VANCOUVER = "America/Vancouver"; - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private BaseFixedWidthVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> { - if (vector instanceof DateDayVector) { - return new ArrowFlightJdbcDateVectorAccessor( - (DateDayVector) vector, getCurrentRow, (boolean wasNull) -> {}); - } else if (vector instanceof DateMilliVector) { - return new ArrowFlightJdbcDateVectorAccessor( - (DateMilliVector) vector, getCurrentRow, (boolean wasNull) -> {}); - } - return null; - }; - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - public static Stream data() { - return Stream.of( - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createDateDayVector(), - "DateDayVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createDateMilliVector(), - "DateMilliVector")); - } - - public void setup(Supplier vectorSupplier) { - this.vector = vectorSupplier.get(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnValidTimestampWithoutCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getTimestamp(null), - (accessor, currentRow) -> is(getTimestampForVector(currentRow))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectWithDateClassReturnValidDateWithoutCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getObject(Date.class), - (accessor, currentRow) -> is(new Date(getTimestampForVector(currentRow).getTime()))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnValidTimestampWithCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - TimeZone timeZone = TimeZone.getTimeZone(AMERICA_VANCOUVER); - Calendar calendar = Calendar.getInstance(timeZone); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final Timestamp resultWithoutCalendar = accessor.getTimestamp(null); - final Timestamp result = accessor.getTimestamp(calendar); - - long offset = timeZone.getOffset(resultWithoutCalendar.getTime()); - - assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); - assertThat(accessor.wasNull(), is(false)); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.setNull(0); - ArrowFlightJdbcDateVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getTimestamp(null), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetDateReturnValidDateWithoutCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getDate(null), - (accessor, currentRow) -> is(new Date(getTimestampForVector(currentRow).getTime()))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetDateReturnValidDateWithCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - TimeZone timeZone = TimeZone.getTimeZone(AMERICA_VANCOUVER); - Calendar calendar = Calendar.getInstance(timeZone); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final Date resultWithoutCalendar = accessor.getDate(null); - final Date result = accessor.getDate(calendar); - - long offset = timeZone.getOffset(resultWithoutCalendar.getTime()); - - assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); - assertThat(accessor.wasNull(), is(false)); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetDateReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.setNull(0); - ArrowFlightJdbcDateVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getDate(null), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - private Timestamp getTimestampForVector(int currentRow) { - Object object = vector.getObject(currentRow); - - Timestamp expectedTimestamp = null; - if (object instanceof LocalDateTime) { - expectedTimestamp = Timestamp.valueOf((LocalDateTime) object); - } else if (object instanceof Number) { - long value = ((Number) object).longValue(); - TimeUnit timeUnit = getTimeUnitForVector(vector); - long millis = timeUnit.toMillis(value); - expectedTimestamp = new Timestamp(millis); - } - return expectedTimestamp; - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectClass(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, ArrowFlightJdbcDateVectorAccessor::getObjectClass, equalTo(Date.class)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringBeConsistentWithVarCharAccessorWithoutCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - assertGetStringIsConsistentWithVarCharAccessor(null); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringBeConsistentWithVarCharAccessorWithCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(AMERICA_VANCOUVER)); - assertGetStringIsConsistentWithVarCharAccessor(calendar); - } - - @ParameterizedTest - @MethodSource("data") - public void testValidateGetStringTimeZoneConsistency( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final TimeZone defaultTz = TimeZone.getDefault(); - try { - final String string = - accessor.getString(); // Should always be UTC as no calendar is provided - - // Validate with UTC - Date date = accessor.getDate(null); - TimeZone.setDefault(TimeZone.getTimeZone("UTC")); - assertThat(date.toString(), is(string)); - - // Validate with different TZ - TimeZone.setDefault(TimeZone.getTimeZone(AMERICA_VANCOUVER)); - assertThat(date.toString(), not(string)); - - assertThat(accessor.wasNull(), is(false)); - } finally { - // Set default Tz back - TimeZone.setDefault(defaultTz); - } - }); - } - - private void assertGetStringIsConsistentWithVarCharAccessor(Calendar calendar) throws Exception { - try (VarCharVector varCharVector = - new VarCharVector("", rootAllocatorTestExtension.getRootAllocator())) { - varCharVector.allocateNew(1); - ArrowFlightJdbcVarCharVectorAccessor varCharVectorAccessor = - new ArrowFlightJdbcVarCharVectorAccessor(varCharVector, () -> 0, (boolean wasNull) -> {}); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final String string = accessor.getString(); - varCharVector.set(0, new Text(string)); - varCharVector.setValueCount(1); - - Date dateFromVarChar = varCharVectorAccessor.getDate(calendar); - Date date = accessor.getDate(calendar); - - assertThat(date, is(dateFromVarChar)); - assertThat(accessor.wasNull(), is(false)); - }); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessorTest.java deleted file mode 100644 index 48492e21224b2..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcDurationVectorAccessorTest.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; - -import java.time.Duration; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcDurationVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private DurationVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> - new ArrowFlightJdbcDurationVectorAccessor( - (DurationVector) vector, getCurrentRow, (boolean wasNull) -> {}); - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - @BeforeEach - public void setup() { - FieldType fieldType = new FieldType(true, new ArrowType.Duration(TimeUnit.MILLISECOND), null); - this.vector = new DurationVector("", fieldType, rootAllocatorTestExtension.getRootAllocator()); - - int valueCount = 10; - this.vector.setValueCount(valueCount); - for (int i = 0; i < valueCount; i++) { - this.vector.set(i, java.util.concurrent.TimeUnit.DAYS.toMillis(i + 1)); - } - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @Test - public void getObject() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDurationVectorAccessor::getObject, - (accessor, currentRow) -> is(Duration.ofDays(currentRow + 1))); - } - - @Test - public void getObjectForNull() throws Exception { - int valueCount = vector.getValueCount(); - for (int i = 0; i < valueCount; i++) { - vector.setNull(i); - } - - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDurationVectorAccessor::getObject, - (accessor, currentRow) -> equalTo(null)); - } - - @Test - public void getString() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcAccessor::getString, - (accessor, currentRow) -> is(Duration.ofDays(currentRow + 1).toString())); - } - - @Test - public void getStringForNull() throws Exception { - int valueCount = vector.getValueCount(); - for (int i = 0; i < valueCount; i++) { - vector.setNull(i); - } - - accessorIterator.assertAccessorGetter( - vector, ArrowFlightJdbcAccessor::getString, (accessor, currentRow) -> equalTo(null)); - } - - @Test - public void testShouldGetObjectClass() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcAccessor::getObjectClass, - (accessor, currentRow) -> equalTo(Duration.class)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessorTest.java deleted file mode 100644 index 521debc323e83..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcIntervalVectorAccessorTest.java +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.apache.arrow.driver.jdbc.utils.IntervalStringUtils.formatIntervalDay; -import static org.apache.arrow.driver.jdbc.utils.IntervalStringUtils.formatIntervalYear; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.time.Duration; -import java.time.Period; -import java.time.format.DateTimeParseException; -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.PeriodDuration; -import org.apache.arrow.vector.ValueVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class ArrowFlightJdbcIntervalVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private ValueVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> { - ArrowFlightJdbcAccessorFactory.WasNullConsumer noOpWasNullConsumer = - (boolean wasNull) -> {}; - if (vector instanceof IntervalDayVector) { - return new ArrowFlightJdbcIntervalVectorAccessor( - (IntervalDayVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof IntervalYearVector) { - return new ArrowFlightJdbcIntervalVectorAccessor( - (IntervalYearVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof IntervalMonthDayNanoVector) { - return new ArrowFlightJdbcIntervalVectorAccessor( - (IntervalMonthDayNanoVector) vector, getCurrentRow, noOpWasNullConsumer); - } - return null; - }; - - final AccessorTestUtils.AccessorIterator accessorIterator = - new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - public static Stream data() { - return Stream.of( - Arguments.of( - (Supplier) - () -> { - IntervalDayVector vector = - new IntervalDayVector("", rootAllocatorTestExtension.getRootAllocator()); - - int valueCount = 10; - vector.setValueCount(valueCount); - for (int i = 0; i < valueCount; i++) { - vector.set(i, i + 1, (i + 1) * 1000); - } - return vector; - }, - "IntervalDayVector"), - Arguments.of( - (Supplier) - () -> { - IntervalYearVector vector = - new IntervalYearVector("", rootAllocatorTestExtension.getRootAllocator()); - - int valueCount = 10; - vector.setValueCount(valueCount); - for (int i = 0; i < valueCount; i++) { - vector.set(i, i + 1); - } - return vector; - }, - "IntervalYearVector"), - Arguments.of( - (Supplier) - () -> { - IntervalMonthDayNanoVector vector = - new IntervalMonthDayNanoVector( - "", rootAllocatorTestExtension.getRootAllocator()); - - int valueCount = 10; - vector.setValueCount(valueCount); - for (int i = 0; i < valueCount; i++) { - vector.set(i, i + 1, (i + 1) * 10, (i + 1) * 100); - } - return vector; - }, - "IntervalMonthDayNanoVector")); - } - - public void setup(Supplier vectorSupplier) { - this.vector = vectorSupplier.get(); - } - - @AfterEach - public void tearDown() { - if (this.vector != null) { - this.vector.close(); - } - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectReturnValidObject(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcIntervalVectorAccessor::getObject, - (accessor, currentRow) -> is(getExpectedObject(vector, currentRow))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectPassingObjectClassAsParameterReturnValidObject( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - Class objectClass = getExpectedObjectClassForVector(vector); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getObject(objectClass), - (accessor, currentRow) -> is(getExpectedObject(vector, currentRow))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectReturnNull(Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - setAllNullOnVector(vector); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcIntervalVectorAccessor::getObject, - (accessor, currentRow) -> equalTo(null)); - } - - private String getStringOnVector(ValueVector vector, int index) { - Object object = getExpectedObject(vector, index); - if (object == null) { - return null; - } else if (vector instanceof IntervalDayVector) { - return formatIntervalDay(Duration.parse(object.toString())); - } else if (vector instanceof IntervalYearVector) { - return formatIntervalYear(Period.parse(object.toString())); - } else if (vector instanceof IntervalMonthDayNanoVector) { - String iso8601IntervalString = ((PeriodDuration) object).toISO8601IntervalString(); - String[] periodAndDuration = iso8601IntervalString.split("T"); - if (periodAndDuration.length == 1) { - // If there is no 'T', then either Period or Duration is zero, and the other one will - // successfully parse it - String periodOrDuration = periodAndDuration[0]; - try { - return new PeriodDuration(Period.parse(periodOrDuration), Duration.ZERO) - .toISO8601IntervalString(); - } catch (DateTimeParseException e) { - return new PeriodDuration(Period.ZERO, Duration.parse(periodOrDuration)) - .toISO8601IntervalString(); - } - } else { - // If there is a 'T', both Period and Duration are non-zero, and we just need to prepend the - // 'PT' to the - // duration for both to parse successfully - Period parse = Period.parse(periodAndDuration[0]); - Duration duration = Duration.parse("PT" + periodAndDuration[1]); - return new PeriodDuration(parse, duration).toISO8601IntervalString(); - } - } - return null; - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetIntervalYear(Supplier vectorSupplier) { - setup(vectorSupplier); - assertEquals("-002-00", formatIntervalYear(Period.parse("P-2Y"))); - assertEquals("-001-01", formatIntervalYear(Period.parse("P-1Y-1M"))); - assertEquals("-001-02", formatIntervalYear(Period.parse("P-1Y-2M"))); - assertEquals("-002-03", formatIntervalYear(Period.parse("P-2Y-3M"))); - assertEquals("-002-04", formatIntervalYear(Period.parse("P-2Y-4M"))); - assertEquals("-011-01", formatIntervalYear(Period.parse("P-11Y-1M"))); - assertEquals("+002-00", formatIntervalYear(Period.parse("P+2Y"))); - assertEquals("+001-01", formatIntervalYear(Period.parse("P+1Y1M"))); - assertEquals("+001-02", formatIntervalYear(Period.parse("P+1Y2M"))); - assertEquals("+002-03", formatIntervalYear(Period.parse("P+2Y3M"))); - assertEquals("+002-04", formatIntervalYear(Period.parse("P+2Y4M"))); - assertEquals("+011-01", formatIntervalYear(Period.parse("P+11Y1M"))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetIntervalDay(Supplier vectorSupplier) { - setup(vectorSupplier); - assertEquals("-001 00:00:00.000", formatIntervalDay(Duration.parse("PT-24H"))); - assertEquals("+001 00:00:00.000", formatIntervalDay(Duration.parse("PT+24H"))); - assertEquals("-000 01:00:00.000", formatIntervalDay(Duration.parse("PT-1H"))); - // "JDK-8054978: java.time.Duration.parse() fails for negative duration with 0 seconds and - // nanos" not fixed on JDK8 - // assertEquals("-000 01:00:00.001", formatIntervalDay(Duration.parse("PT-1H-0M-00.001S"))); - assertEquals("-000 01:00:00.001", formatIntervalDay(Duration.ofHours(-1).minusMillis(1))); - assertEquals("-000 01:01:01.000", formatIntervalDay(Duration.parse("PT-1H-1M-1S"))); - assertEquals("-000 02:02:02.002", formatIntervalDay(Duration.parse("PT-2H-2M-02.002S"))); - assertEquals("-000 23:59:59.999", formatIntervalDay(Duration.parse("PT-23H-59M-59.999S"))); - // "JDK-8054978: java.time.Duration.parse() fails for negative duration with 0 seconds and - // nanos" not fixed on JDK8 - // assertEquals("-000 11:59:00.100", formatIntervalDay(Duration.parse("PT-11H-59M-00.100S"))); - assertEquals( - "-000 11:59:00.100", - formatIntervalDay(Duration.ofHours(-11).minusMinutes(59).minusMillis(100))); - assertEquals("-000 05:02:03.000", formatIntervalDay(Duration.parse("PT-5H-2M-3S"))); - assertEquals("-000 22:22:22.222", formatIntervalDay(Duration.parse("PT-22H-22M-22.222S"))); - assertEquals("+000 01:00:00.000", formatIntervalDay(Duration.parse("PT+1H"))); - assertEquals("+000 01:00:00.001", formatIntervalDay(Duration.parse("PT+1H0M00.001S"))); - assertEquals("+000 01:01:01.000", formatIntervalDay(Duration.parse("PT+1H1M1S"))); - assertEquals("+000 02:02:02.002", formatIntervalDay(Duration.parse("PT+2H2M02.002S"))); - assertEquals("+000 23:59:59.999", formatIntervalDay(Duration.parse("PT+23H59M59.999S"))); - assertEquals("+000 11:59:00.100", formatIntervalDay(Duration.parse("PT+11H59M00.100S"))); - assertEquals("+000 05:02:03.000", formatIntervalDay(Duration.parse("PT+5H2M3S"))); - assertEquals("+000 22:22:22.222", formatIntervalDay(Duration.parse("PT+22H22M22.222S"))); - } - - @ParameterizedTest - @MethodSource("data") - public void testIntervalDayWithJodaPeriodObject(Supplier vectorSupplier) { - setup(vectorSupplier); - assertEquals("+1567 00:00:00.000", formatIntervalDay(Duration.ofDays(1567))); - assertEquals("-1567 00:00:00.000", formatIntervalDay(Duration.ofDays(-1567))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringReturnCorrectString(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcIntervalVectorAccessor::getString, - (accessor, currentRow) -> is(getStringOnVector(vector, currentRow))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringReturnNull(Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - setAllNullOnVector(vector); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcIntervalVectorAccessor::getString, - (accessor, currentRow) -> equalTo(null)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectClassReturnCorrectClass(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - Class expectedObjectClass = getExpectedObjectClassForVector(vector); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcIntervalVectorAccessor::getObjectClass, - (accessor, currentRow) -> equalTo(expectedObjectClass)); - } - - private Class getExpectedObjectClassForVector(ValueVector vector) { - if (vector instanceof IntervalDayVector) { - return Duration.class; - } else if (vector instanceof IntervalYearVector) { - return Period.class; - } else if (vector instanceof IntervalMonthDayNanoVector) { - return PeriodDuration.class; - } - return null; - } - - private void setAllNullOnVector(ValueVector vector) { - int valueCount = vector.getValueCount(); - if (vector instanceof IntervalDayVector) { - for (int i = 0; i < valueCount; i++) { - ((IntervalDayVector) vector).setNull(i); - } - } else if (vector instanceof IntervalYearVector) { - for (int i = 0; i < valueCount; i++) { - ((IntervalYearVector) vector).setNull(i); - } - } else if (vector instanceof IntervalMonthDayNanoVector) { - for (int i = 0; i < valueCount; i++) { - ((IntervalMonthDayNanoVector) vector).setNull(i); - } - } - } - - private Object getExpectedObject(ValueVector vector, int currentRow) { - if (vector instanceof IntervalDayVector) { - return Duration.ofDays(currentRow + 1).plusMillis((currentRow + 1) * 1000L); - } else if (vector instanceof IntervalYearVector) { - return Period.ofMonths(currentRow + 1); - } else if (vector instanceof IntervalMonthDayNanoVector) { - Period period = Period.ofMonths(currentRow + 1).plusDays((currentRow + 1) * 10L); - Duration duration = Duration.ofNanos((currentRow + 1) * 100L); - return new PeriodDuration(period, duration); - } - return null; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java deleted file mode 100644 index 2e329f148e66c..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java +++ /dev/null @@ -1,376 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor.getTimeUnitForVector; -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor.getTimeZoneForVector; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.Date; -import java.sql.Time; -import java.sql.Timestamp; -import java.time.LocalDateTime; -import java.util.Calendar; -import java.util.TimeZone; -import java.util.concurrent.TimeUnit; -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.accessor.impl.text.ArrowFlightJdbcVarCharVectorAccessor; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.util.Text; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assumptions; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class ArrowFlightJdbcTimeStampVectorAccessorTest { - - public static final String AMERICA_VANCOUVER = "America/Vancouver"; - public static final String ASIA_BANGKOK = "Asia/Bangkok"; - public static final String AMERICA_SAO_PAULO = "America/Sao_Paulo"; - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private TimeStampVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> - new ArrowFlightJdbcTimeStampVectorAccessor( - (TimeStampVector) vector, getCurrentRow, (boolean wasNull) -> {}); - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - public static Stream data() { - return Stream.of( - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampNanoVector(), - "TimeStampNanoVector", - null), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampNanoTZVector("UTC"), - "TimeStampNanoTZVector", - "UTC"), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampNanoTZVector(AMERICA_VANCOUVER), - "TimeStampNanoTZVector", - AMERICA_VANCOUVER), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampNanoTZVector(ASIA_BANGKOK), - "TimeStampNanoTZVector", - ASIA_BANGKOK), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampMicroVector(), - "TimeStampMicroVector", - null), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampMicroTZVector("UTC"), - "TimeStampMicroTZVector", - "UTC"), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampMicroTZVector(AMERICA_VANCOUVER), - "TimeStampMicroTZVector", - AMERICA_VANCOUVER), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampMicroTZVector(ASIA_BANGKOK), - "TimeStampMicroTZVector", - ASIA_BANGKOK), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampMilliVector(), - "TimeStampMilliVector", - null), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampMilliTZVector("UTC"), - "TimeStampMilliTZVector", - "UTC"), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampMilliTZVector(AMERICA_VANCOUVER), - "TimeStampMilliTZVector", - AMERICA_VANCOUVER), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampMilliTZVector(ASIA_BANGKOK), - "TimeStampMilliTZVector", - ASIA_BANGKOK), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createTimeStampSecVector(), - "TimeStampSecVector", - null), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampSecTZVector("UTC"), - "TimeStampSecTZVector", - "UTC"), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampSecTZVector(AMERICA_VANCOUVER), - "TimeStampSecTZVector", - AMERICA_VANCOUVER), - Arguments.of( - (Supplier) - () -> rootAllocatorTestExtension.createTimeStampSecTZVector(ASIA_BANGKOK), - "TimeStampSecTZVector", - ASIA_BANGKOK)); - } - - public void setup(Supplier vectorSupplier) { - this.vector = vectorSupplier.get(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnValidTimestampWithoutCalendar( - Supplier vectorSupplier, String vectorType, String timeZone) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getTimestamp(null), - (accessor, currentRow) -> is(getTimestampForVector(currentRow, timeZone))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnValidTimestampWithCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO); - Calendar calendar = Calendar.getInstance(timeZone); - - TimeZone timeZoneForVector = getTimeZoneForVector(vector); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final Timestamp resultWithoutCalendar = accessor.getTimestamp(null); - final Timestamp result = accessor.getTimestamp(calendar); - - long offset = - (long) timeZone.getOffset(resultWithoutCalendar.getTime()) - - timeZoneForVector.getOffset(resultWithoutCalendar.getTime()); - - assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); - assertThat(accessor.wasNull(), is(false)); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.setNull(0); - ArrowFlightJdbcTimeStampVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getTimestamp(null), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetDateReturnValidDateWithoutCalendar( - Supplier vectorSupplier, String vectorType, String timeZone) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getDate(null), - (accessor, currentRow) -> - is(new Date(getTimestampForVector(currentRow, timeZone).getTime()))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetDateReturnValidDateWithCalendar(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO); - Calendar calendar = Calendar.getInstance(timeZone); - - TimeZone timeZoneForVector = getTimeZoneForVector(vector); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final Date resultWithoutCalendar = accessor.getDate(null); - final Date result = accessor.getDate(calendar); - - long offset = - (long) timeZone.getOffset(resultWithoutCalendar.getTime()) - - timeZoneForVector.getOffset(resultWithoutCalendar.getTime()); - - assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); - assertThat(accessor.wasNull(), is(false)); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetDateReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.setNull(0); - ArrowFlightJdbcTimeStampVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getDate(null), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimeReturnValidTimeWithoutCalendar( - Supplier vectorSupplier, String vectorType, String timeZone) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getTime(null), - (accessor, currentRow) -> - is(new Time(getTimestampForVector(currentRow, timeZone).getTime()))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimeReturnValidTimeWithCalendar(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - TimeZone timeZone = TimeZone.getTimeZone(AMERICA_SAO_PAULO); - Calendar calendar = Calendar.getInstance(timeZone); - - TimeZone timeZoneForVector = getTimeZoneForVector(vector); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final Time resultWithoutCalendar = accessor.getTime(null); - final Time result = accessor.getTime(calendar); - - long offset = - (long) timeZone.getOffset(resultWithoutCalendar.getTime()) - - timeZoneForVector.getOffset(resultWithoutCalendar.getTime()); - - assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); - assertThat(accessor.wasNull(), is(false)); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimeReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.setNull(0); - ArrowFlightJdbcTimeStampVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getTime(null), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - private Timestamp getTimestampForVector(int currentRow, String timeZone) { - Object object = vector.getObject(currentRow); - - Timestamp expectedTimestamp = null; - if (object instanceof LocalDateTime) { - expectedTimestamp = Timestamp.valueOf((LocalDateTime) object); - } else if (object instanceof Long) { - TimeUnit timeUnit = getTimeUnitForVector(vector); - long millis = timeUnit.toMillis((Long) object); - long offset = TimeZone.getTimeZone(timeZone).getOffset(millis); - expectedTimestamp = new Timestamp(millis + offset); - } - return expectedTimestamp; - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectClass(Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, ArrowFlightJdbcTimeStampVectorAccessor::getObjectClass, equalTo(Timestamp.class)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringBeConsistentWithVarCharAccessorWithoutCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - assertGetStringIsConsistentWithVarCharAccessor(null); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringBeConsistentWithVarCharAccessorWithCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - // Ignore for TimeStamp vectors with TZ, as VarChar accessor won't consider their TZ - Assumptions.assumeTrue( - vector instanceof TimeStampNanoVector - || vector instanceof TimeStampMicroVector - || vector instanceof TimeStampMilliVector - || vector instanceof TimeStampSecVector); - Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(AMERICA_VANCOUVER)); - assertGetStringIsConsistentWithVarCharAccessor(calendar); - } - - private void assertGetStringIsConsistentWithVarCharAccessor(Calendar calendar) throws Exception { - try (VarCharVector varCharVector = - new VarCharVector("", rootAllocatorTestExtension.getRootAllocator())) { - varCharVector.allocateNew(1); - ArrowFlightJdbcVarCharVectorAccessor varCharVectorAccessor = - new ArrowFlightJdbcVarCharVectorAccessor(varCharVector, () -> 0, (boolean wasNull) -> {}); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final String string = accessor.getString(); - varCharVector.set(0, new Text(string)); - varCharVector.setValueCount(1); - - Timestamp timestampFromVarChar = varCharVectorAccessor.getTimestamp(calendar); - Timestamp timestamp = accessor.getTimestamp(calendar); - - assertThat(timestamp, is(timestampFromVarChar)); - assertThat(accessor.wasNull(), is(false)); - }); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessorTest.java deleted file mode 100644 index 24971052db1be..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeVectorAccessorTest.java +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.calendar; - -import static org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeVectorAccessor.getTimeUnitForVector; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.not; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.Time; -import java.sql.Timestamp; -import java.time.LocalDateTime; -import java.util.Calendar; -import java.util.TimeZone; -import java.util.concurrent.TimeUnit; -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.accessor.impl.text.ArrowFlightJdbcVarCharVectorAccessor; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.util.Text; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class ArrowFlightJdbcTimeVectorAccessorTest { - - public static final String AMERICA_VANCOUVER = "America/Vancouver"; - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private BaseFixedWidthVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> { - ArrowFlightJdbcAccessorFactory.WasNullConsumer noOpWasNullConsumer = - (boolean wasNull) -> {}; - if (vector instanceof TimeNanoVector) { - return new ArrowFlightJdbcTimeVectorAccessor( - (TimeNanoVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof TimeMicroVector) { - return new ArrowFlightJdbcTimeVectorAccessor( - (TimeMicroVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof TimeMilliVector) { - return new ArrowFlightJdbcTimeVectorAccessor( - (TimeMilliVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof TimeSecVector) { - return new ArrowFlightJdbcTimeVectorAccessor( - (TimeSecVector) vector, getCurrentRow, noOpWasNullConsumer); - } - return null; - }; - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - public static Stream data() { - return Stream.of( - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createTimeNanoVector(), - "TimeNanoVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createTimeMicroVector(), - "TimeMicroVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createTimeMilliVector(), - "TimeMilliVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createTimeSecVector(), - "TimeSecVector")); - } - - public void setup(Supplier vectorSupplier) { - this.vector = vectorSupplier.get(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnValidTimestampWithoutCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getTimestamp(null), - (accessor, currentRow) -> is(getTimestampForVector(currentRow))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnValidTimestampWithCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - TimeZone timeZone = TimeZone.getTimeZone(AMERICA_VANCOUVER); - Calendar calendar = Calendar.getInstance(timeZone); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final Timestamp resultWithoutCalendar = accessor.getTimestamp(null); - final Timestamp result = accessor.getTimestamp(calendar); - - long offset = timeZone.getOffset(resultWithoutCalendar.getTime()); - - assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); - assertThat(accessor.wasNull(), is(false)); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimestampReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.setNull(0); - ArrowFlightJdbcTimeVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getTimestamp(null), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimeReturnValidTimeWithoutCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - accessor -> accessor.getTime(null), - (accessor, currentRow) -> { - Timestamp expectedTimestamp = getTimestampForVector(currentRow); - return is(new Time(expectedTimestamp.getTime())); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimeReturnValidTimeWithCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - TimeZone timeZone = TimeZone.getTimeZone(AMERICA_VANCOUVER); - Calendar calendar = Calendar.getInstance(timeZone); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final Time resultWithoutCalendar = accessor.getTime(null); - final Time result = accessor.getTime(calendar); - - long offset = timeZone.getOffset(resultWithoutCalendar.getTime()); - - assertThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset)); - assertThat(accessor.wasNull(), is(false)); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetTimeReturnNull(Supplier vectorSupplier) { - setup(vectorSupplier); - vector.setNull(0); - ArrowFlightJdbcTimeVectorAccessor accessor = accessorSupplier.supply(vector, () -> 0); - assertThat(accessor.getTime(null), CoreMatchers.equalTo(null)); - assertThat(accessor.wasNull(), is(true)); - } - - private Timestamp getTimestampForVector(int currentRow) { - Object object = vector.getObject(currentRow); - - Timestamp expectedTimestamp = null; - if (object instanceof LocalDateTime) { - expectedTimestamp = Timestamp.valueOf((LocalDateTime) object); - } else if (object instanceof Number) { - long value = ((Number) object).longValue(); - TimeUnit timeUnit = getTimeUnitForVector(vector); - long millis = timeUnit.toMillis(value); - expectedTimestamp = new Timestamp(millis); - } - return expectedTimestamp; - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectClass(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, ArrowFlightJdbcTimeVectorAccessor::getObjectClass, equalTo(Time.class)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringBeConsistentWithVarCharAccessorWithoutCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - assertGetStringIsConsistentWithVarCharAccessor(null); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringBeConsistentWithVarCharAccessorWithCalendar( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(AMERICA_VANCOUVER)); - assertGetStringIsConsistentWithVarCharAccessor(calendar); - } - - @ParameterizedTest - @MethodSource("data") - public void testValidateGetStringTimeZoneConsistency( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final TimeZone defaultTz = TimeZone.getDefault(); - try { - final String string = - accessor.getString(); // Should always be UTC as no calendar is provided - - // Validate with UTC - Time time = accessor.getTime(null); - TimeZone.setDefault(TimeZone.getTimeZone("UTC")); - assertThat(time.toString(), is(string)); - - // Validate with different TZ - TimeZone.setDefault(TimeZone.getTimeZone(AMERICA_VANCOUVER)); - assertThat(time.toString(), not(string)); - - assertThat(accessor.wasNull(), is(false)); - } finally { - // Set default Tz back - TimeZone.setDefault(defaultTz); - } - }); - } - - private void assertGetStringIsConsistentWithVarCharAccessor(Calendar calendar) throws Exception { - try (VarCharVector varCharVector = - new VarCharVector("", rootAllocatorTestExtension.getRootAllocator())) { - varCharVector.allocateNew(1); - ArrowFlightJdbcVarCharVectorAccessor varCharVectorAccessor = - new ArrowFlightJdbcVarCharVectorAccessor(varCharVector, () -> 0, (boolean wasNull) -> {}); - - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - final String string = accessor.getString(); - varCharVector.set(0, new Text(string)); - varCharVector.setValueCount(1); - - Time timeFromVarChar = varCharVectorAccessor.getTime(calendar); - Time time = accessor.getTime(calendar); - - assertThat(time, is(timeFromVarChar)); - assertThat(accessor.wasNull(), is(false)); - }); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java deleted file mode 100644 index ad689837e2201..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.Array; -import java.sql.ResultSet; -import java.util.Arrays; -import java.util.List; -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class AbstractArrowFlightJdbcListAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private ValueVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> { - ArrowFlightJdbcAccessorFactory.WasNullConsumer noOpWasNullConsumer = - (boolean wasNull) -> {}; - if (vector instanceof ListVector) { - return new ArrowFlightJdbcListVectorAccessor( - (ListVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof LargeListVector) { - return new ArrowFlightJdbcLargeListVectorAccessor( - (LargeListVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof FixedSizeListVector) { - return new ArrowFlightJdbcFixedSizeListVectorAccessor( - (FixedSizeListVector) vector, getCurrentRow, noOpWasNullConsumer); - } - return null; - }; - - final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - public static Stream data() { - return Stream.of( - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createListVector(), - "ListVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createLargeListVector(), - "LargeListVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createFixedSizeListVector(), - "FixedSizeListVector")); - } - - public void setup(Supplier vectorSupplier) { - this.vector = vectorSupplier.get(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectClassReturnCorrectClass(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - AbstractArrowFlightJdbcListVectorAccessor::getObjectClass, - (accessor, currentRow) -> equalTo(List.class)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectReturnValidList(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - AbstractArrowFlightJdbcListVectorAccessor::getObject, - (accessor, currentRow) -> - equalTo(Arrays.asList(0, currentRow, currentRow * 2, currentRow * 3, currentRow * 4))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectReturnNull(Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - vector.clear(); - vector.allocateNewSafe(); - vector.setValueCount(5); - - accessorIterator.assertAccessorGetter( - vector, - AbstractArrowFlightJdbcListVectorAccessor::getObject, - (accessor, currentRow) -> CoreMatchers.nullValue()); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetArrayReturnValidArray(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - Array array = accessor.getArray(); - assert array != null; - - Object[] arrayObject = (Object[]) array.getArray(); - - assertThat( - arrayObject, - equalTo( - new Object[] {0, currentRow, currentRow * 2, currentRow * 3, currentRow * 4})); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetArrayReturnNull(Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - vector.clear(); - vector.allocateNewSafe(); - vector.setValueCount(5); - - accessorIterator.assertAccessorGetter( - vector, AbstractArrowFlightJdbcListVectorAccessor::getArray, CoreMatchers.nullValue()); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetArrayReturnValidArrayPassingOffsets(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - Array array = accessor.getArray(); - assert array != null; - - Object[] arrayObject = (Object[]) array.getArray(1, 3); - - assertThat( - arrayObject, equalTo(new Object[] {currentRow, currentRow * 2, currentRow * 3})); - }); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetArrayGetResultSetReturnValidResultSet( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - Array array = accessor.getArray(); - assert array != null; - - try (ResultSet rs = array.getResultSet()) { - int count = 0; - while (rs.next()) { - final int value = rs.getInt(1); - assertThat(value, equalTo(currentRow * count)); - count++; - } - assertThat(count, equalTo(5)); - } - }); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessorTest.java deleted file mode 100644 index 6a143d2ee888f..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcUnionVectorAccessorTest.java +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.sql.SQLException; -import java.util.Calendar; -import java.util.Map; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.ArrowFlightJdbcNullVectorAccessor; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.ValueVector; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.Spy; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -public class AbstractArrowFlightJdbcUnionVectorAccessorTest { - - @Mock ArrowFlightJdbcAccessor innerAccessor; - @Spy AbstractArrowFlightJdbcUnionVectorAccessorMock accessor; - - @BeforeEach - public void setup() { - when(accessor.getAccessor()).thenReturn(innerAccessor); - } - - @Test - public void testGetNCharacterStreamUsesSpecificAccessor() throws SQLException { - accessor.getNCharacterStream(); - verify(innerAccessor).getNCharacterStream(); - } - - @Test - public void testGetNStringUsesSpecificAccessor() throws SQLException { - accessor.getNString(); - verify(innerAccessor).getNString(); - } - - @Test - public void testGetSQLXMLUsesSpecificAccessor() throws SQLException { - accessor.getSQLXML(); - verify(innerAccessor).getSQLXML(); - } - - @Test - public void testGetNClobUsesSpecificAccessor() throws SQLException { - accessor.getNClob(); - verify(innerAccessor).getNClob(); - } - - @Test - public void testGetURLUsesSpecificAccessor() throws SQLException { - accessor.getURL(); - verify(innerAccessor).getURL(); - } - - @Test - public void testGetStructUsesSpecificAccessor() throws SQLException { - accessor.getStruct(); - verify(innerAccessor).getStruct(); - } - - @Test - public void testGetArrayUsesSpecificAccessor() throws SQLException { - accessor.getArray(); - verify(innerAccessor).getArray(); - } - - @Test - public void testGetClobUsesSpecificAccessor() throws SQLException { - accessor.getClob(); - verify(innerAccessor).getClob(); - } - - @Test - public void testGetBlobUsesSpecificAccessor() throws SQLException { - accessor.getBlob(); - verify(innerAccessor).getBlob(); - } - - @Test - public void testGetRefUsesSpecificAccessor() throws SQLException { - accessor.getRef(); - verify(innerAccessor).getRef(); - } - - @Test - public void testGetCharacterStreamUsesSpecificAccessor() throws SQLException { - accessor.getCharacterStream(); - verify(innerAccessor).getCharacterStream(); - } - - @Test - public void testGetBinaryStreamUsesSpecificAccessor() throws SQLException { - accessor.getBinaryStream(); - verify(innerAccessor).getBinaryStream(); - } - - @Test - public void testGetUnicodeStreamUsesSpecificAccessor() throws SQLException { - accessor.getUnicodeStream(); - verify(innerAccessor).getUnicodeStream(); - } - - @Test - public void testGetAsciiStreamUsesSpecificAccessor() throws SQLException { - accessor.getAsciiStream(); - verify(innerAccessor).getAsciiStream(); - } - - @Test - public void testGetBytesUsesSpecificAccessor() throws SQLException { - accessor.getBytes(); - verify(innerAccessor).getBytes(); - } - - @Test - public void testGetBigDecimalUsesSpecificAccessor() throws SQLException { - accessor.getBigDecimal(); - verify(innerAccessor).getBigDecimal(); - } - - @Test - public void testGetDoubleUsesSpecificAccessor() throws SQLException { - accessor.getDouble(); - verify(innerAccessor).getDouble(); - } - - @Test - public void testGetFloatUsesSpecificAccessor() throws SQLException { - accessor.getFloat(); - verify(innerAccessor).getFloat(); - } - - @Test - public void testGetLongUsesSpecificAccessor() throws SQLException { - accessor.getLong(); - verify(innerAccessor).getLong(); - } - - @Test - public void testGetIntUsesSpecificAccessor() throws SQLException { - accessor.getInt(); - verify(innerAccessor).getInt(); - } - - @Test - public void testGetShortUsesSpecificAccessor() throws SQLException { - accessor.getShort(); - verify(innerAccessor).getShort(); - } - - @Test - public void testGetByteUsesSpecificAccessor() throws SQLException { - accessor.getByte(); - verify(innerAccessor).getByte(); - } - - @Test - public void testGetBooleanUsesSpecificAccessor() throws SQLException { - accessor.getBoolean(); - verify(innerAccessor).getBoolean(); - } - - @Test - public void testGetStringUsesSpecificAccessor() throws SQLException { - accessor.getString(); - verify(innerAccessor).getString(); - } - - @Test - public void testGetObjectClassUsesSpecificAccessor() { - accessor.getObjectClass(); - verify(innerAccessor).getObjectClass(); - } - - @Test - public void testGetObjectWithClassUsesSpecificAccessor() throws SQLException { - accessor.getObject(Object.class); - verify(innerAccessor).getObject(Object.class); - } - - @Test - public void testGetTimestampUsesSpecificAccessor() throws SQLException { - Calendar calendar = Calendar.getInstance(); - accessor.getTimestamp(calendar); - verify(innerAccessor).getTimestamp(calendar); - } - - @Test - public void testGetTimeUsesSpecificAccessor() throws SQLException { - Calendar calendar = Calendar.getInstance(); - accessor.getTime(calendar); - verify(innerAccessor).getTime(calendar); - } - - @Test - public void testGetDateUsesSpecificAccessor() throws SQLException { - Calendar calendar = Calendar.getInstance(); - accessor.getDate(calendar); - verify(innerAccessor).getDate(calendar); - } - - @Test - public void testGetObjectUsesSpecificAccessor() throws SQLException { - Map> map = mock(Map.class); - accessor.getObject(map); - verify(innerAccessor).getObject(map); - } - - @Test - public void testGetBigDecimalWithScaleUsesSpecificAccessor() throws SQLException { - accessor.getBigDecimal(2); - verify(innerAccessor).getBigDecimal(2); - } - - private static class AbstractArrowFlightJdbcUnionVectorAccessorMock - extends AbstractArrowFlightJdbcUnionVectorAccessor { - protected AbstractArrowFlightJdbcUnionVectorAccessorMock() { - super(() -> 0, (boolean wasNull) -> {}); - } - - @Override - protected ArrowFlightJdbcAccessor createAccessorForVector(ValueVector vector) { - return new ArrowFlightJdbcNullVectorAccessor((boolean wasNull) -> {}); - } - - @Override - protected byte getCurrentTypeId() { - return 0; - } - - @Override - protected ValueVector getVectorByTypeId(byte typeId) { - return new NullVector(); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessorTest.java deleted file mode 100644 index 6832ac0850d4c..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcDenseUnionVectorAccessorTest.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.Timestamp; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcDenseUnionVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private DenseUnionVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> - new ArrowFlightJdbcDenseUnionVectorAccessor( - (DenseUnionVector) vector, - getCurrentRow, - (boolean wasNull) -> { - // No Operation - }); - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - @BeforeEach - public void setup() throws Exception { - this.vector = DenseUnionVector.empty("", rootAllocatorTestExtension.getRootAllocator()); - this.vector.allocateNew(); - - // write some data - byte bigIntTypeId = - this.vector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType())); - byte float8TypeId = - this.vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT8.getType())); - byte timestampMilliTypeId = - this.vector.registerNewTypeId(Field.nullable("", Types.MinorType.TIMESTAMPMILLI.getType())); - - NullableBigIntHolder nullableBigIntHolder = new NullableBigIntHolder(); - nullableBigIntHolder.isSet = 1; - nullableBigIntHolder.value = Long.MAX_VALUE; - this.vector.setTypeId(0, bigIntTypeId); - this.vector.setSafe(0, nullableBigIntHolder); - - NullableFloat8Holder nullableFloat4Holder = new NullableFloat8Holder(); - nullableFloat4Holder.isSet = 1; - nullableFloat4Holder.value = Math.PI; - this.vector.setTypeId(1, float8TypeId); - this.vector.setSafe(1, nullableFloat4Holder); - - NullableTimeStampMilliHolder nullableTimeStampMilliHolder = new NullableTimeStampMilliHolder(); - nullableTimeStampMilliHolder.isSet = 1; - nullableTimeStampMilliHolder.value = 1625702400000L; - this.vector.setTypeId(2, timestampMilliTypeId); - this.vector.setSafe(2, nullableTimeStampMilliHolder); - - nullableBigIntHolder.isSet = 0; - this.vector.setTypeId(3, bigIntTypeId); - this.vector.setSafe(3, nullableBigIntHolder); - - this.vector.setValueCount(5); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @Test - public void getObject() throws Exception { - List result = accessorIterator.toList(vector); - List expected = - Arrays.asList(Long.MAX_VALUE, Math.PI, new Timestamp(1625702400000L), null, null); - - assertThat(result, is(expected)); - } - - @Test - public void getObjectForNull() throws Exception { - vector.reset(); - vector.setValueCount(5); - accessorIterator.assertAccessorGetter( - vector, AbstractArrowFlightJdbcUnionVectorAccessor::getObject, equalTo(null)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessorTest.java deleted file mode 100644 index 696e5afb71171..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcMapVectorAccessorTest.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.sql.Array; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Map; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.util.JsonStringHashMap; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcMapVectorAccessorTest { - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private MapVector vector; - - @BeforeEach - public void setup() { - vector = MapVector.empty("", rootAllocatorTestExtension.getRootAllocator(), false); - UnionMapWriter writer = vector.getWriter(); - writer.allocate(); - writer.setPosition(0); // optional - writer.startMap(); - writer.startEntry(); - writer.key().integer().writeInt(1); - writer.value().integer().writeInt(11); - writer.endEntry(); - writer.startEntry(); - writer.key().integer().writeInt(2); - writer.value().integer().writeInt(22); - writer.endEntry(); - writer.startEntry(); - writer.key().integer().writeInt(3); - writer.value().integer().writeInt(33); - writer.endEntry(); - writer.endMap(); - - writer.setPosition(1); - writer.startMap(); - writer.startEntry(); - writer.key().integer().writeInt(2); - writer.endEntry(); - writer.endMap(); - - writer.setPosition(2); - writer.startMap(); - writer.startEntry(); - writer.key().integer().writeInt(0); - writer.value().integer().writeInt(2000); - writer.endEntry(); - writer.startEntry(); - writer.key().integer().writeInt(1); - writer.value().integer().writeInt(2001); - writer.endEntry(); - writer.startEntry(); - writer.key().integer().writeInt(2); - writer.value().integer().writeInt(2002); - writer.endEntry(); - writer.startEntry(); - writer.key().integer().writeInt(3); - writer.value().integer().writeInt(2003); - writer.endEntry(); - writer.endMap(); - - writer.setValueCount(3); - } - - @AfterEach - public void tearDown() { - vector.close(); - } - - @Test - public void testShouldGetObjectReturnValidMap() { - AccessorTestUtils.Cursor cursor = new AccessorTestUtils.Cursor(vector.getValueCount()); - ArrowFlightJdbcMapVectorAccessor accessor = - new ArrowFlightJdbcMapVectorAccessor( - vector, cursor::getCurrentRow, (boolean wasNull) -> {}); - - Map expected = new JsonStringHashMap<>(); - expected.put(1, 11); - expected.put(2, 22); - expected.put(3, 33); - assertEquals(expected, accessor.getObject()); - assertFalse(accessor.wasNull()); - - cursor.next(); - expected = new JsonStringHashMap<>(); - expected.put(2, null); - assertEquals(expected, accessor.getObject()); - assertFalse(accessor.wasNull()); - - cursor.next(); - expected = new JsonStringHashMap<>(); - expected.put(0, 2000); - expected.put(1, 2001); - expected.put(2, 2002); - expected.put(3, 2003); - assertEquals(expected, accessor.getObject()); - assertFalse(accessor.wasNull()); - } - - @Test - public void testShouldGetObjectReturnNull() { - vector.setNull(0); - ArrowFlightJdbcMapVectorAccessor accessor = - new ArrowFlightJdbcMapVectorAccessor(vector, () -> 0, (boolean wasNull) -> {}); - - assertNull(accessor.getObject()); - assertTrue(accessor.wasNull()); - } - - @Test - public void testShouldGetArrayReturnValidArray() throws SQLException { - AccessorTestUtils.Cursor cursor = new AccessorTestUtils.Cursor(vector.getValueCount()); - ArrowFlightJdbcMapVectorAccessor accessor = - new ArrowFlightJdbcMapVectorAccessor( - vector, cursor::getCurrentRow, (boolean wasNull) -> {}); - - Array array = accessor.getArray(); - assertNotNull(array); - assertFalse(accessor.wasNull()); - - try (ResultSet resultSet = array.getResultSet()) { - assertTrue(resultSet.next()); - Map entry = resultSet.getObject(1, Map.class); - assertEquals(1, entry.get("key")); - assertEquals(11, entry.get("value")); - assertTrue(resultSet.next()); - entry = resultSet.getObject(1, Map.class); - assertEquals(2, entry.get("key")); - assertEquals(22, entry.get("value")); - assertTrue(resultSet.next()); - entry = resultSet.getObject(1, Map.class); - assertEquals(3, entry.get("key")); - assertEquals(33, entry.get("value")); - assertFalse(resultSet.next()); - } - - cursor.next(); - array = accessor.getArray(); - assertNotNull(array); - assertFalse(accessor.wasNull()); - try (ResultSet resultSet = array.getResultSet()) { - assertTrue(resultSet.next()); - Map entry = resultSet.getObject(1, Map.class); - assertEquals(2, entry.get("key")); - assertNull(entry.get("value")); - assertFalse(resultSet.next()); - } - - cursor.next(); - array = accessor.getArray(); - assertNotNull(array); - assertFalse(accessor.wasNull()); - try (ResultSet resultSet = array.getResultSet()) { - assertTrue(resultSet.next()); - Map entry = resultSet.getObject(1, Map.class); - assertEquals(0, entry.get("key")); - assertEquals(2000, entry.get("value")); - assertTrue(resultSet.next()); - entry = resultSet.getObject(1, Map.class); - assertEquals(1, entry.get("key")); - assertEquals(2001, entry.get("value")); - assertTrue(resultSet.next()); - entry = resultSet.getObject(1, Map.class); - assertEquals(2, entry.get("key")); - assertEquals(2002, entry.get("value")); - assertTrue(resultSet.next()); - entry = resultSet.getObject(1, Map.class); - assertEquals(3, entry.get("key")); - assertEquals(2003, entry.get("value")); - assertFalse(resultSet.next()); - } - } - - @Test - public void testShouldGetArrayReturnNull() { - vector.setNull(0); - ((StructVector) vector.getDataVector()).setNull(0); - - ArrowFlightJdbcMapVectorAccessor accessor = - new ArrowFlightJdbcMapVectorAccessor(vector, () -> 0, (boolean wasNull) -> {}); - - assertNull(accessor.getArray()); - assertTrue(accessor.wasNull()); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java deleted file mode 100644 index d3cc7f8544579..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.nullValue; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.sql.SQLException; -import java.sql.Struct; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.holders.NullableBitHolder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.JsonStringHashMap; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcStructVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private StructVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> - new ArrowFlightJdbcStructVectorAccessor( - (StructVector) vector, getCurrentRow, (boolean wasNull) -> {}); - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - @BeforeEach - public void setUp() throws Exception { - Map metadata = new HashMap<>(); - metadata.put("k1", "v1"); - FieldType type = new FieldType(true, ArrowType.Struct.INSTANCE, null, metadata); - vector = new StructVector("", rootAllocatorTestExtension.getRootAllocator(), type, null); - vector.allocateNew(); - - IntVector intVector = - vector.addOrGet("int", FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class); - Float8Vector float8Vector = - vector.addOrGet( - "float8", FieldType.nullable(Types.MinorType.FLOAT8.getType()), Float8Vector.class); - - intVector.setSafe(0, 100); - float8Vector.setSafe(0, 100.05); - vector.setIndexDefined(0); - intVector.setSafe(1, 200); - float8Vector.setSafe(1, 200.1); - vector.setIndexDefined(1); - - vector.setValueCount(2); - } - - @AfterEach - public void tearDown() throws Exception { - vector.close(); - } - - @Test - public void testShouldGetObjectClassReturnMapClass() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcStructVectorAccessor::getObjectClass, - (accessor, currentRow) -> equalTo(Map.class)); - } - - @Test - public void testShouldGetObjectReturnValidMap() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcStructVectorAccessor::getObject, - (accessor, currentRow) -> { - Map expected = new HashMap<>(); - expected.put("int", 100 * (currentRow + 1)); - expected.put("float8", 100.05 * (currentRow + 1)); - - return equalTo(expected); - }); - } - - @Test - public void testShouldGetObjectReturnNull() throws Exception { - vector.setNull(0); - vector.setNull(1); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcStructVectorAccessor::getObject, - (accessor, currentRow) -> nullValue()); - } - - @Test - public void testShouldGetStructReturnValidStruct() throws Exception { - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - Struct struct = accessor.getStruct(); - assert struct != null; - - Object[] expected = new Object[] {100 * (currentRow + 1), 100.05 * (currentRow + 1)}; - - assertThat(struct.getAttributes(), equalTo(expected)); - }); - } - - @Test - public void testShouldGetStructReturnNull() throws Exception { - vector.setNull(0); - vector.setNull(1); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcStructVectorAccessor::getStruct, - (accessor, currentRow) -> nullValue()); - } - - @Test - public void testShouldGetObjectWorkWithNestedComplexData() throws SQLException { - try (StructVector rootVector = - StructVector.empty("", rootAllocatorTestExtension.getRootAllocator())) { - StructVector structVector = rootVector.addOrGetStruct("struct"); - - FieldType intFieldType = FieldType.nullable(Types.MinorType.INT.getType()); - IntVector intVector = structVector.addOrGet("int", intFieldType, IntVector.class); - FieldType float8FieldType = FieldType.nullable(Types.MinorType.FLOAT8.getType()); - Float8Vector float8Vector = - structVector.addOrGet("float8", float8FieldType, Float8Vector.class); - - ListVector listVector = rootVector.addOrGetList("list"); - UnionListWriter listWriter = listVector.getWriter(); - listWriter.allocate(); - - UnionVector unionVector = rootVector.addOrGetUnion("union"); - - intVector.setSafe(0, 100); - intVector.setValueCount(1); - float8Vector.setSafe(0, 100.05); - float8Vector.setValueCount(1); - structVector.setIndexDefined(0); - - listWriter.setPosition(0); - listWriter.startList(); - listWriter.bigInt().writeBigInt(Long.MAX_VALUE); - listWriter.bigInt().writeBigInt(Long.MIN_VALUE); - listWriter.endList(); - listVector.setValueCount(1); - - unionVector.setType(0, Types.MinorType.BIT); - NullableBitHolder holder = new NullableBitHolder(); - holder.isSet = 1; - holder.value = 1; - unionVector.setSafe(0, holder); - unionVector.setValueCount(1); - - rootVector.setIndexDefined(0); - rootVector.setValueCount(1); - - Map expected = new JsonStringHashMap<>(); - Map nestedStruct = new JsonStringHashMap<>(); - nestedStruct.put("int", 100); - nestedStruct.put("float8", 100.05); - expected.put("struct", nestedStruct); - JsonStringArrayList nestedList = new JsonStringArrayList<>(); - nestedList.add(Long.MAX_VALUE); - nestedList.add(Long.MIN_VALUE); - expected.put("list", nestedList); - expected.put("union", true); - - ArrowFlightJdbcStructVectorAccessor accessor = - new ArrowFlightJdbcStructVectorAccessor(rootVector, () -> 0, (boolean wasNull) -> {}); - - assertEquals(expected, accessor.getObject()); - assertEquals(expected.toString(), accessor.getString()); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessorTest.java deleted file mode 100644 index 82cd882393cfa..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcUnionVectorAccessorTest.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.complex; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.Timestamp; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; -import org.apache.arrow.vector.types.Types; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcUnionVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private UnionVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> - new ArrowFlightJdbcUnionVectorAccessor( - (UnionVector) vector, getCurrentRow, (boolean wasNull) -> {}); - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - @BeforeEach - public void setup() { - this.vector = UnionVector.empty("", rootAllocatorTestExtension.getRootAllocator()); - this.vector.allocateNew(); - - NullableBigIntHolder nullableBigIntHolder = new NullableBigIntHolder(); - nullableBigIntHolder.isSet = 1; - nullableBigIntHolder.value = Long.MAX_VALUE; - this.vector.setType(0, Types.MinorType.BIGINT); - this.vector.setSafe(0, nullableBigIntHolder); - - NullableFloat8Holder nullableFloat4Holder = new NullableFloat8Holder(); - nullableFloat4Holder.isSet = 1; - nullableFloat4Holder.value = Math.PI; - this.vector.setType(1, Types.MinorType.FLOAT8); - this.vector.setSafe(1, nullableFloat4Holder); - - NullableTimeStampMilliHolder nullableTimeStampMilliHolder = new NullableTimeStampMilliHolder(); - nullableTimeStampMilliHolder.isSet = 1; - nullableTimeStampMilliHolder.value = 1625702400000L; - this.vector.setType(2, Types.MinorType.TIMESTAMPMILLI); - this.vector.setSafe(2, nullableTimeStampMilliHolder); - - nullableBigIntHolder.isSet = 0; - this.vector.setType(3, Types.MinorType.BIGINT); - this.vector.setSafe(3, nullableBigIntHolder); - - this.vector.setValueCount(5); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @Test - public void getObject() throws Exception { - List result = accessorIterator.toList(vector); - List expected = - Arrays.asList(Long.MAX_VALUE, Math.PI, new Timestamp(1625702400000L), null, null); - - assertThat(result, is(expected)); - } - - @Test - public void getObjectForNull() throws Exception { - vector.reset(); - vector.setValueCount(5); - - accessorIterator.assertAccessorGetter( - vector, AbstractArrowFlightJdbcUnionVectorAccessor::getObject, equalTo(null)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorTest.java deleted file mode 100644 index c7b050d00021c..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorTest.java +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import static org.hamcrest.CoreMatchers.equalTo; - -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class ArrowFlightJdbcBaseIntVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private BaseIntVector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> { - ArrowFlightJdbcAccessorFactory.WasNullConsumer noOpWasNullConsumer = - (boolean wasNull) -> {}; - if (vector instanceof UInt1Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt1Vector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof UInt2Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt2Vector) vector, getCurrentRow, noOpWasNullConsumer); - } else { - if (vector instanceof UInt4Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt4Vector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof UInt8Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt8Vector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof TinyIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (TinyIntVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof SmallIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (SmallIntVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof IntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (IntVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof BigIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (BigIntVector) vector, getCurrentRow, noOpWasNullConsumer); - } - } - throw new UnsupportedOperationException(); - }; - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - public static Stream data() { - return Stream.of( - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createIntVector(), - "IntVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createSmallIntVector(), - "SmallIntVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createTinyIntVector(), - "TinyIntVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createBigIntVector(), - "BigIntVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createUInt1Vector(), - "UInt1Vector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createUInt2Vector(), - "UInt2Vector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createUInt4Vector(), - "UInt4Vector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createUInt8Vector(), - "UInt8Vector")); - } - - public void setup(Supplier vectorSupplier) { - this.vector = vectorSupplier.get(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldConvertToByteMethodFromBaseIntVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBaseIntVectorAccessor::getByte, - (accessor, currentRow) -> equalTo((byte) accessor.getLong())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldConvertToShortMethodFromBaseIntVector( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBaseIntVectorAccessor::getShort, - (accessor, currentRow) -> equalTo((short) accessor.getLong())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldConvertToIntegerMethodFromBaseIntVector( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBaseIntVectorAccessor::getInt, - (accessor, currentRow) -> equalTo((int) accessor.getLong())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldConvertToFloatMethodFromBaseIntVector( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBaseIntVectorAccessor::getFloat, - (accessor, currentRow) -> equalTo((float) accessor.getLong())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldConvertToDoubleMethodFromBaseIntVector( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBaseIntVectorAccessor::getDouble, - (accessor, currentRow) -> equalTo((double) accessor.getLong())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldConvertToBooleanMethodFromBaseIntVector( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcBaseIntVectorAccessor::getBoolean, - (accessor, currentRow) -> equalTo(accessor.getLong() != 0L)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectClass(Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, ArrowFlightJdbcBaseIntVectorAccessor::getObjectClass, equalTo(Long.class)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorUnitTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorUnitTest.java deleted file mode 100644 index e629d2f6ecd1c..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessorUnitTest.java +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import static org.hamcrest.CoreMatchers.equalTo; - -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -public class ArrowFlightJdbcBaseIntVectorAccessorUnitTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private static UInt4Vector int4Vector; - private static UInt8Vector int8Vector; - private static IntVector intVectorWithNull; - private static TinyIntVector tinyIntVector; - private static SmallIntVector smallIntVector; - private static IntVector intVector; - private static BigIntVector bigIntVector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> { - ArrowFlightJdbcAccessorFactory.WasNullConsumer noOpWasNullConsumer = - (boolean wasNull) -> {}; - if (vector instanceof UInt1Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt1Vector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof UInt2Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt2Vector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof UInt4Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt4Vector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof UInt8Vector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (UInt8Vector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof TinyIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (TinyIntVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof SmallIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (SmallIntVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof IntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (IntVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof BigIntVector) { - return new ArrowFlightJdbcBaseIntVectorAccessor( - (BigIntVector) vector, getCurrentRow, noOpWasNullConsumer); - } - return null; - }; - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - @BeforeAll - public static void setup() { - int4Vector = new UInt4Vector("ID", rootAllocatorTestExtension.getRootAllocator()); - int4Vector.setSafe(0, 0x80000001); - int4Vector.setValueCount(1); - - int8Vector = new UInt8Vector("ID", rootAllocatorTestExtension.getRootAllocator()); - int8Vector.setSafe(0, 0xFFFFFFFFFFFFFFFFL); - int8Vector.setValueCount(1); - - intVectorWithNull = new IntVector("ID", rootAllocatorTestExtension.getRootAllocator()); - intVectorWithNull.setNull(0); - intVectorWithNull.setValueCount(1); - - tinyIntVector = new TinyIntVector("ID", rootAllocatorTestExtension.getRootAllocator()); - tinyIntVector.setSafe(0, 0xAA); - tinyIntVector.setValueCount(1); - - smallIntVector = new SmallIntVector("ID", rootAllocatorTestExtension.getRootAllocator()); - smallIntVector.setSafe(0, 0xAABB); - smallIntVector.setValueCount(1); - - intVector = new IntVector("ID", rootAllocatorTestExtension.getRootAllocator()); - intVector.setSafe(0, 0xAABBCCDD); - intVector.setValueCount(1); - - bigIntVector = new BigIntVector("ID", rootAllocatorTestExtension.getRootAllocator()); - bigIntVector.setSafe(0, 0xAABBCCDDEEFFAABBL); - bigIntVector.setValueCount(1); - } - - @AfterAll - public static void tearDown() throws Exception { - AutoCloseables.close( - bigIntVector, - intVector, - smallIntVector, - tinyIntVector, - int4Vector, - int8Vector, - intVectorWithNull, - rootAllocatorTestExtension); - } - - @Test - public void testShouldGetStringFromUnsignedValue() throws Exception { - accessorIterator.assertAccessorGetter( - int8Vector, - ArrowFlightJdbcBaseIntVectorAccessor::getString, - equalTo("18446744073709551615")); - } - - @Test - public void testShouldGetBytesFromIntVectorThrowsSqlException() throws Exception { - accessorIterator.assertAccessorGetterThrowingException( - intVector, ArrowFlightJdbcBaseIntVectorAccessor::getBytes); - } - - @Test - public void testShouldGetStringFromIntVectorWithNull() throws Exception { - accessorIterator.assertAccessorGetter( - intVectorWithNull, - ArrowFlightJdbcBaseIntVectorAccessor::getString, - CoreMatchers.nullValue()); - } - - @Test - public void testShouldGetObjectFromInt() throws Exception { - accessorIterator.assertAccessorGetter( - intVector, ArrowFlightJdbcBaseIntVectorAccessor::getObject, equalTo(0xAABBCCDD)); - } - - @Test - public void testShouldGetObjectFromTinyInt() throws Exception { - accessorIterator.assertAccessorGetter( - tinyIntVector, ArrowFlightJdbcBaseIntVectorAccessor::getObject, equalTo((byte) 0xAA)); - } - - @Test - public void testShouldGetObjectFromSmallInt() throws Exception { - accessorIterator.assertAccessorGetter( - smallIntVector, ArrowFlightJdbcBaseIntVectorAccessor::getObject, equalTo((short) 0xAABB)); - } - - @Test - public void testShouldGetObjectFromBigInt() throws Exception { - accessorIterator.assertAccessorGetter( - bigIntVector, - ArrowFlightJdbcBaseIntVectorAccessor::getObject, - equalTo(0xAABBCCDDEEFFAABBL)); - } - - @Test - public void testShouldGetObjectFromUnsignedInt() throws Exception { - accessorIterator.assertAccessorGetter( - int4Vector, ArrowFlightJdbcBaseIntVectorAccessor::getObject, equalTo(0x80000001)); - } - - @Test - public void testShouldGetObjectFromIntVectorWithNull() throws Exception { - accessorIterator.assertAccessorGetter( - intVectorWithNull, - ArrowFlightJdbcBaseIntVectorAccessor::getObject, - CoreMatchers.nullValue()); - } - - @Test - public void testShouldGetBigDecimalFromIntVectorWithNull() throws Exception { - accessorIterator.assertAccessorGetter( - intVectorWithNull, - ArrowFlightJdbcBaseIntVectorAccessor::getBigDecimal, - CoreMatchers.nullValue()); - } - - @Test - public void testShouldGetBigDecimalWithScaleFromIntVectorWithNull() throws Exception { - accessorIterator.assertAccessorGetter( - intVectorWithNull, accessor -> accessor.getBigDecimal(2), CoreMatchers.nullValue()); - } - - @Test - public void testShouldGetBytesFromSmallVectorThrowsSqlException() throws Exception { - accessorIterator.assertAccessorGetterThrowingException( - smallIntVector, ArrowFlightJdbcBaseIntVectorAccessor::getBytes); - } - - @Test - public void testShouldGetBytesFromTinyIntVectorThrowsSqlException() throws Exception { - accessorIterator.assertAccessorGetterThrowingException( - tinyIntVector, ArrowFlightJdbcBaseIntVectorAccessor::getBytes); - } - - @Test - public void testShouldGetBytesFromBigIntVectorThrowsSqlException() throws Exception { - accessorIterator.assertAccessorGetterThrowingException( - bigIntVector, ArrowFlightJdbcBaseIntVectorAccessor::getBytes); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java deleted file mode 100644 index 20c086b26dbdc..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; - -import java.math.BigDecimal; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils.AccessorIterator; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils.CheckedFunction; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.BitVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcBitVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> - new ArrowFlightJdbcBitVectorAccessor( - (BitVector) vector, getCurrentRow, (boolean wasNull) -> {}); - private final AccessorIterator accessorIterator = - new AccessorIterator<>(accessorSupplier); - private BitVector vector; - private BitVector vectorWithNull; - private boolean[] arrayToAssert; - - @BeforeEach - public void setup() { - this.arrayToAssert = new boolean[] {false, true}; - this.vector = rootAllocatorTestExtension.createBitVector(); - this.vectorWithNull = rootAllocatorTestExtension.createBitVectorForNullTests(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - this.vectorWithNull.close(); - } - - private void iterate( - final CheckedFunction function, - final T result, - final T resultIfFalse, - final BitVector vector) - throws Exception { - accessorIterator.assertAccessorGetter( - vector, - function, - (accessor, currentRow) -> is(arrayToAssert[currentRow] ? result : resultIfFalse)); - } - - @Test - public void testShouldGetBooleanMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getBoolean, true, false, vector); - } - - @Test - public void testShouldGetByteMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getByte, (byte) 1, (byte) 0, vector); - } - - @Test - public void testShouldGetShortMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getShort, (short) 1, (short) 0, vector); - } - - @Test - public void testShouldGetIntMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getInt, 1, 0, vector); - } - - @Test - public void testShouldGetLongMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getLong, (long) 1, (long) 0, vector); - } - - @Test - public void testShouldGetFloatMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getFloat, (float) 1, (float) 0, vector); - } - - @Test - public void testShouldGetDoubleMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getDouble, (double) 1, (double) 0, vector); - } - - @Test - public void testShouldGetBigDecimalMethodFromBitVector() throws Exception { - iterate( - ArrowFlightJdbcBitVectorAccessor::getBigDecimal, BigDecimal.ONE, BigDecimal.ZERO, vector); - } - - @Test - public void testShouldGetBigDecimalMethodFromBitVectorFromNull() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getBigDecimal, null, null, vectorWithNull); - } - - @Test - public void testShouldGetObjectMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getObject, true, false, vector); - } - - @Test - public void testShouldGetObjectMethodFromBitVectorFromNull() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getObject, null, null, vectorWithNull); - } - - @Test - public void testShouldGetStringMethodFromBitVector() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getString, "true", "false", vector); - } - - @Test - public void testShouldGetStringMethodFromBitVectorFromNull() throws Exception { - iterate(ArrowFlightJdbcBitVectorAccessor::getString, null, null, vectorWithNull); - } - - @Test - public void testShouldGetObjectClass() throws Exception { - accessorIterator.assertAccessorGetter( - vector, ArrowFlightJdbcBitVectorAccessor::getObjectClass, equalTo(Boolean.class)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessorTest.java deleted file mode 100644 index 668c50a843386..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcDecimalVectorAccessorTest.java +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; - -import java.math.BigDecimal; -import java.util.function.Supplier; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessorFactory; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.ValueVector; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class ArrowFlightJdbcDecimalVectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private ValueVector vector; - private ValueVector vectorWithNull; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> { - ArrowFlightJdbcAccessorFactory.WasNullConsumer noOpWasNullConsumer = - (boolean wasNull) -> {}; - if (vector instanceof DecimalVector) { - return new ArrowFlightJdbcDecimalVectorAccessor( - (DecimalVector) vector, getCurrentRow, noOpWasNullConsumer); - } else if (vector instanceof Decimal256Vector) { - return new ArrowFlightJdbcDecimalVectorAccessor( - (Decimal256Vector) vector, getCurrentRow, noOpWasNullConsumer); - } - return null; - }; - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - public static Stream data() { - return Stream.of( - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createDecimalVector(), - "DecimalVector"), - Arguments.of( - (Supplier) () -> rootAllocatorTestExtension.createDecimal256Vector(), - "Decimal256Vector")); - } - - public void setup(Supplier vectorSupplier) { - this.vector = vectorSupplier.get(); - - this.vectorWithNull = vectorSupplier.get(); - this.vectorWithNull.clear(); - this.vectorWithNull.setValueCount(5); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - this.vectorWithNull.close(); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBigDecimalFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getBigDecimal, - (accessor, currentRow) -> CoreMatchers.notNullValue()); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetDoubleMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getDouble, - (accessor, currentRow) -> equalTo(accessor.getBigDecimal().doubleValue())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetFloatMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getFloat, - (accessor, currentRow) -> equalTo(accessor.getBigDecimal().floatValue())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetLongMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getLong, - (accessor, currentRow) -> equalTo(accessor.getBigDecimal().longValue())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetIntMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getInt, - (accessor, currentRow) -> equalTo(accessor.getBigDecimal().intValue())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetShortMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getShort, - (accessor, currentRow) -> equalTo(accessor.getBigDecimal().shortValue())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetByteMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getByte, - (accessor, currentRow) -> equalTo(accessor.getBigDecimal().byteValue())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getString, - (accessor, currentRow) -> equalTo(accessor.getBigDecimal().toString())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBooleanMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getBoolean, - (accessor, currentRow) -> equalTo(!accessor.getBigDecimal().equals(BigDecimal.ZERO))); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectMethodFromDecimalVector(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getObject, - (accessor, currentRow) -> equalTo(accessor.getBigDecimal())); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectClass(Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcDecimalVectorAccessor::getObjectClass, - (accessor, currentRow) -> equalTo(BigDecimal.class)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBigDecimalMethodFromDecimalVectorWithNull( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getBigDecimal, - (accessor, currentRow) -> CoreMatchers.nullValue()); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetObjectMethodFromDecimalVectorWithNull( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getObject, - (accessor, currentRow) -> CoreMatchers.nullValue()); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetStringMethodFromDecimalVectorWithNull( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getString, - (accessor, currentRow) -> CoreMatchers.nullValue()); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetByteMethodFromDecimalVectorWithNull(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getByte, - (accessor, currentRow) -> is((byte) 0)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetShortMethodFromDecimalVectorWithNull( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getShort, - (accessor, currentRow) -> is((short) 0)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetIntMethodFromDecimalVectorWithNull(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getInt, - (accessor, currentRow) -> is(0)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetLongMethodFromDecimalVectorWithNull(Supplier vectorSupplier) - throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getLong, - (accessor, currentRow) -> is((long) 0)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetFloatMethodFromDecimalVectorWithNull( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getFloat, - (accessor, currentRow) -> is(0.0f)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetDoubleMethodFromDecimalVectorWithNull( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getDouble, - (accessor, currentRow) -> is(0.0D)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBooleanMethodFromDecimalVectorWithNull( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcDecimalVectorAccessor::getBoolean, - (accessor, currentRow) -> is(false)); - } - - @ParameterizedTest - @MethodSource("data") - public void testShouldGetBigDecimalWithScaleMethodFromDecimalVectorWithNull( - Supplier vectorSupplier) throws Exception { - setup(vectorSupplier); - accessorIterator.assertAccessorGetter( - vectorWithNull, - accessor -> accessor.getBigDecimal(2), - (accessor, currentRow) -> CoreMatchers.nullValue()); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessorTest.java deleted file mode 100644 index 89ebc5336dae3..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat4VectorAccessorTest.java +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.SQLException; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.Float4Vector; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcFloat4VectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private Float4Vector vector; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> - new ArrowFlightJdbcFloat4VectorAccessor( - (Float4Vector) vector, getCurrentRow, (boolean wasNull) -> {}); - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - @BeforeEach - public void setup() { - this.vector = rootAllocatorTestExtension.createFloat4Vector(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - } - - @Test - public void testShouldGetFloatMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getFloat, - (accessor, currentRow) -> is(vector.get(currentRow))); - } - - @Test - public void testShouldGetObjectMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getObject, - (accessor) -> is(accessor.getFloat())); - } - - @Test - public void testShouldGetStringMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getString, - accessor -> is(Float.toString(accessor.getFloat()))); - } - - @Test - public void testShouldGetStringMethodFromFloat4VectorWithNull() throws Exception { - try (final Float4Vector float4Vector = - new Float4Vector("ID", rootAllocatorTestExtension.getRootAllocator())) { - float4Vector.setNull(0); - float4Vector.setValueCount(1); - - accessorIterator.assertAccessorGetter( - float4Vector, ArrowFlightJdbcFloat4VectorAccessor::getString, CoreMatchers.nullValue()); - } - } - - @Test - public void testShouldGetFloatMethodFromFloat4VectorWithNull() throws Exception { - try (final Float4Vector float4Vector = - new Float4Vector("ID", rootAllocatorTestExtension.getRootAllocator())) { - float4Vector.setNull(0); - float4Vector.setValueCount(1); - - accessorIterator.assertAccessorGetter( - float4Vector, ArrowFlightJdbcFloat4VectorAccessor::getFloat, is(0.0f)); - } - } - - @Test - public void testShouldGetBigDecimalMethodFromFloat4VectorWithNull() throws Exception { - try (final Float4Vector float4Vector = - new Float4Vector("ID", rootAllocatorTestExtension.getRootAllocator())) { - float4Vector.setNull(0); - float4Vector.setValueCount(1); - - accessorIterator.assertAccessorGetter( - float4Vector, - ArrowFlightJdbcFloat4VectorAccessor::getBigDecimal, - CoreMatchers.nullValue()); - } - } - - @Test - public void testShouldGetObjectMethodFromFloat4VectorWithNull() throws Exception { - try (final Float4Vector float4Vector = - new Float4Vector("ID", rootAllocatorTestExtension.getRootAllocator())) { - float4Vector.setNull(0); - float4Vector.setValueCount(1); - - accessorIterator.assertAccessorGetter( - float4Vector, ArrowFlightJdbcFloat4VectorAccessor::getObject, CoreMatchers.nullValue()); - } - } - - @Test - public void testShouldGetBooleanMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getBoolean, - accessor -> is(accessor.getFloat() != 0.0f)); - } - - @Test - public void testShouldGetByteMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getByte, - accessor -> is((byte) accessor.getFloat())); - } - - @Test - public void testShouldGetShortMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getShort, - accessor -> is((short) accessor.getFloat())); - } - - @Test - public void testShouldGetIntMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getInt, - accessor -> is((int) accessor.getFloat())); - } - - @Test - public void testShouldGetLongMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getLong, - accessor -> is((long) accessor.getFloat())); - } - - @Test - public void testShouldGetDoubleMethodFromFloat4Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getDouble, - accessor -> is((double) accessor.getFloat())); - } - - @Test - public void testShouldGetBigDecimalMethodFromFloat4Vector() throws Exception { - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - float value = accessor.getFloat(); - if (Float.isInfinite(value) || Float.isNaN(value)) { - assertThrows(SQLException.class, accessor::getBigDecimal); - } else { - assertThat(accessor.getBigDecimal(), is(BigDecimal.valueOf(value))); - } - }); - } - - @Test - public void testShouldGetBigDecimalWithScaleMethodFromFloat4Vector() throws Exception { - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - float value = accessor.getFloat(); - if (Float.isInfinite(value) || Float.isNaN(value)) { - assertThrows(SQLException.class, () -> accessor.getBigDecimal(9)); - } else { - assertThat( - accessor.getBigDecimal(9), - is(BigDecimal.valueOf(value).setScale(9, RoundingMode.HALF_UP))); - } - }); - } - - @Test - public void testShouldGetObjectClass() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat4VectorAccessor::getObjectClass, - accessor -> equalTo(Float.class)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessorTest.java deleted file mode 100644 index 90870e95384ad..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcFloat8VectorAccessorTest.java +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.numeric; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.sql.SQLException; -import org.apache.arrow.driver.jdbc.utils.AccessorTestUtils; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.vector.Float8Vector; -import org.hamcrest.CoreMatchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class ArrowFlightJdbcFloat8VectorAccessorTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private Float8Vector vector; - private Float8Vector vectorWithNull; - - private final AccessorTestUtils.AccessorSupplier - accessorSupplier = - (vector, getCurrentRow) -> - new ArrowFlightJdbcFloat8VectorAccessor( - (Float8Vector) vector, getCurrentRow, (boolean wasNull) -> {}); - - private final AccessorTestUtils.AccessorIterator - accessorIterator = new AccessorTestUtils.AccessorIterator<>(accessorSupplier); - - @BeforeEach - public void setup() { - this.vector = rootAllocatorTestExtension.createFloat8Vector(); - this.vectorWithNull = rootAllocatorTestExtension.createFloat8VectorForNullTests(); - } - - @AfterEach - public void tearDown() { - this.vector.close(); - this.vectorWithNull.close(); - } - - @Test - public void testShouldGetDoubleMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getDouble, - (accessor, currentRow) -> is(vector.getValueAsDouble(currentRow))); - } - - @Test - public void testShouldGetObjectMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getObject, - (accessor) -> is(accessor.getDouble())); - } - - @Test - public void testShouldGetStringMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getString, - (accessor) -> is(Double.toString(accessor.getDouble()))); - } - - @Test - public void testShouldGetBooleanMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getBoolean, - (accessor) -> is(accessor.getDouble() != 0.0)); - } - - @Test - public void testShouldGetByteMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getByte, - (accessor) -> is((byte) accessor.getDouble())); - } - - @Test - public void testShouldGetShortMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getShort, - (accessor) -> is((short) accessor.getDouble())); - } - - @Test - public void testShouldGetIntMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getInt, - (accessor) -> is((int) accessor.getDouble())); - } - - @Test - public void testShouldGetLongMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getLong, - (accessor) -> is((long) accessor.getDouble())); - } - - @Test - public void testShouldGetFloatMethodFromFloat8Vector() throws Exception { - accessorIterator.assertAccessorGetter( - vector, - ArrowFlightJdbcFloat8VectorAccessor::getFloat, - (accessor) -> is((float) accessor.getDouble())); - } - - @Test - public void testShouldGetBigDecimalMethodFromFloat8Vector() throws Exception { - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - double value = accessor.getDouble(); - if (Double.isInfinite(value) || Double.isNaN(value)) { - assertThrows(SQLException.class, accessor::getBigDecimal); - } else { - assertThat(accessor.getBigDecimal(), is(BigDecimal.valueOf(value))); - } - }); - } - - @Test - public void testShouldGetObjectClass() throws Exception { - accessorIterator.assertAccessorGetter( - vector, ArrowFlightJdbcFloat8VectorAccessor::getObjectClass, equalTo(Double.class)); - } - - @Test - public void testShouldGetStringMethodFromFloat8VectorWithNull() throws Exception { - accessorIterator.assertAccessorGetter( - vectorWithNull, ArrowFlightJdbcFloat8VectorAccessor::getString, CoreMatchers.nullValue()); - } - - @Test - public void testShouldGetFloatMethodFromFloat8VectorWithNull() throws Exception { - accessorIterator.assertAccessorGetter( - vectorWithNull, ArrowFlightJdbcFloat8VectorAccessor::getFloat, is(0.0f)); - } - - @Test - public void testShouldGetBigDecimalMethodFromFloat8VectorWithNull() throws Exception { - accessorIterator.assertAccessorGetter( - vectorWithNull, - ArrowFlightJdbcFloat8VectorAccessor::getBigDecimal, - CoreMatchers.nullValue()); - } - - @Test - public void testShouldGetBigDecimalWithScaleMethodFromFloat4Vector() throws Exception { - accessorIterator.iterate( - vector, - (accessor, currentRow) -> { - double value = accessor.getDouble(); - if (Double.isInfinite(value) || Double.isNaN(value)) { - assertThrows(SQLException.class, () -> accessor.getBigDecimal(9)); - } else { - assertThat( - accessor.getBigDecimal(9), - is(BigDecimal.valueOf(value).setScale(9, RoundingMode.HALF_UP))); - } - }); - } - - @Test - public void testShouldGetObjectMethodFromFloat8VectorWithNull() throws Exception { - accessorIterator.assertAccessorGetter( - vectorWithNull, ArrowFlightJdbcFloat8VectorAccessor::getObject, CoreMatchers.nullValue()); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessorTest.java deleted file mode 100644 index a2f6fd586fa5b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/text/ArrowFlightJdbcVarCharVectorAccessorTest.java +++ /dev/null @@ -1,698 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.accessor.impl.text; - -import static java.nio.charset.StandardCharsets.US_ASCII; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.commons.io.IOUtils.toByteArray; -import static org.apache.commons.io.IOUtils.toCharArray; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.instanceOf; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.mockito.Mockito.when; - -import java.io.InputStream; -import java.io.Reader; -import java.math.BigDecimal; -import java.sql.Date; -import java.sql.SQLException; -import java.sql.Time; -import java.sql.Timestamp; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.TimeZone; -import java.util.function.IntSupplier; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcDateVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeStampVectorAccessor; -import org.apache.arrow.driver.jdbc.accessor.impl.calendar.ArrowFlightJdbcTimeVectorAccessor; -import org.apache.arrow.driver.jdbc.utils.RootAllocatorTestExtension; -import org.apache.arrow.driver.jdbc.utils.ThrowableAssertionUtils; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeStampVector; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -public class ArrowFlightJdbcVarCharVectorAccessorTest { - - private ArrowFlightJdbcVarCharVectorAccessor accessor; - private final SimpleDateFormat dateTimeFormat = - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); - private final SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm:ss.SSSXXX"); - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - @Mock private ArrowFlightJdbcVarCharVectorAccessor.Getter getter; - - @BeforeEach - public void setUp() { - IntSupplier currentRowSupplier = () -> 0; - accessor = - new ArrowFlightJdbcVarCharVectorAccessor( - getter, currentRowSupplier, (boolean wasNull) -> {}); - } - - @Test - public void testShouldGetStringFromNullReturnNull() { - when(getter.get(0)).thenReturn(null); - final String result = accessor.getString(); - - assertThat(result, equalTo(null)); - } - - @Test - public void testShouldGetStringReturnValidString() { - Text value = new Text("Value for Test."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - final String result = accessor.getString(); - - assertThat(result, instanceOf(String.class)); - assertThat(result, equalTo(value.toString())); - } - - @Test - public void testShouldGetObjectReturnValidString() { - Text value = new Text("Value for Test."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - final String result = accessor.getObject(); - - assertThat(result, instanceOf(String.class)); - assertThat(result, equalTo(value.toString())); - } - - @Test - public void testShouldGetByteThrowsExceptionForNonNumericValue() { - Text value = new Text("Invalid value for byte."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getByte()); - } - - @Test - public void testShouldGetByteThrowsExceptionForOutOfRangePositiveValue() { - Text value = new Text("128"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getByte()); - } - - @Test - public void testShouldGetByteThrowsExceptionForOutOfRangeNegativeValue() { - Text value = new Text("-129"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getByte()); - } - - @Test - public void testShouldGetByteReturnValidPositiveByte() throws Exception { - Text value = new Text("127"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - byte result = accessor.getByte(); - - assertThat(result, instanceOf(Byte.class)); - assertThat(result, equalTo((byte) 127)); - } - - @Test - public void testShouldGetByteReturnValidNegativeByte() throws Exception { - Text value = new Text("-128"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - byte result = accessor.getByte(); - - assertThat(result, instanceOf(Byte.class)); - assertThat(result, equalTo((byte) -128)); - } - - @Test - public void testShouldGetShortThrowsExceptionForNonNumericValue() { - Text value = new Text("Invalid value for short."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getShort()); - } - - @Test - public void testShouldGetShortThrowsExceptionForOutOfRangePositiveValue() { - Text value = new Text("32768"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getShort()); - } - - @Test - public void testShouldGetShortThrowsExceptionForOutOfRangeNegativeValue() { - Text value = new Text("-32769"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getShort()); - } - - @Test - public void testShouldGetShortReturnValidPositiveShort() throws Exception { - Text value = new Text("32767"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - short result = accessor.getShort(); - - assertThat(result, instanceOf(Short.class)); - assertThat(result, equalTo((short) 32767)); - } - - @Test - public void testShouldGetShortReturnValidNegativeShort() throws Exception { - Text value = new Text("-32768"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - short result = accessor.getShort(); - - assertThat(result, instanceOf(Short.class)); - assertThat(result, equalTo((short) -32768)); - } - - @Test - public void testShouldGetIntThrowsExceptionForNonNumericValue() { - Text value = new Text("Invalid value for int."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getInt()); - } - - @Test - public void testShouldGetIntThrowsExceptionForOutOfRangePositiveValue() { - Text value = new Text("2147483648"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getInt()); - } - - @Test - public void testShouldGetIntThrowsExceptionForOutOfRangeNegativeValue() { - Text value = new Text("-2147483649"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getInt()); - } - - @Test - public void testShouldGetIntReturnValidPositiveInteger() throws Exception { - Text value = new Text("2147483647"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - int result = accessor.getInt(); - - assertThat(result, instanceOf(Integer.class)); - assertThat(result, equalTo(2147483647)); - } - - @Test - public void testShouldGetIntReturnValidNegativeInteger() throws Exception { - Text value = new Text("-2147483648"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - int result = accessor.getInt(); - - assertThat(result, instanceOf(Integer.class)); - assertThat(result, equalTo(-2147483648)); - } - - @Test - public void testShouldGetLongThrowsExceptionForNonNumericValue() { - Text value = new Text("Invalid value for long."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getLong()); - } - - @Test - public void testShouldGetLongThrowsExceptionForOutOfRangePositiveValue() { - Text value = new Text("9223372036854775808"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getLong()); - } - - @Test - public void testShouldGetLongThrowsExceptionForOutOfRangeNegativeValue() { - Text value = new Text("-9223372036854775809"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getLong()); - } - - @Test - public void testShouldGetLongReturnValidPositiveLong() throws Exception { - Text value = new Text("9223372036854775807"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - long result = accessor.getLong(); - - assertThat(result, instanceOf(Long.class)); - assertThat(result, equalTo(9223372036854775807L)); - } - - @Test - public void testShouldGetLongReturnValidNegativeLong() throws Exception { - Text value = new Text("-9223372036854775808"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - long result = accessor.getLong(); - - assertThat(result, instanceOf(Long.class)); - assertThat(result, equalTo(-9223372036854775808L)); - } - - @Test - public void testShouldBigDecimalWithParametersThrowsExceptionForNonNumericValue() { - Text value = new Text("Invalid value for BigDecimal."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getBigDecimal(1)); - } - - @Test - public void testShouldGetBigDecimalThrowsExceptionForNonNumericValue() { - Text value = new Text("Invalid value for BigDecimal."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getBigDecimal()); - } - - @Test - public void testShouldGetBigDecimalReturnValidPositiveBigDecimal() throws Exception { - Text value = new Text("9223372036854775807000.999"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - BigDecimal result = accessor.getBigDecimal(); - - assertThat(result, instanceOf(BigDecimal.class)); - assertThat(result, equalTo(new BigDecimal("9223372036854775807000.999"))); - } - - @Test - public void testShouldGetBigDecimalReturnValidNegativeBigDecimal() throws Exception { - Text value = new Text("-9223372036854775807000.999"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - BigDecimal result = accessor.getBigDecimal(); - - assertThat(result, instanceOf(BigDecimal.class)); - assertThat(result, equalTo(new BigDecimal("-9223372036854775807000.999"))); - } - - @Test - public void testShouldGetDoubleThrowsExceptionForNonNumericValue() { - Text value = new Text("Invalid value for double."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getDouble()); - } - - @Test - public void testShouldGetDoubleReturnValidPositiveDouble() throws Exception { - Text value = new Text("1.7976931348623157E308D"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - double result = accessor.getDouble(); - - assertThat(result, instanceOf(Double.class)); - assertThat(result, equalTo(1.7976931348623157E308D)); - } - - @Test - public void testShouldGetDoubleReturnValidNegativeDouble() throws Exception { - Text value = new Text("-1.7976931348623157E308D"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - double result = accessor.getDouble(); - - assertThat(result, instanceOf(Double.class)); - assertThat(result, equalTo(-1.7976931348623157E308D)); - } - - @Test - public void testShouldGetDoubleWorkWithPositiveInfinity() throws Exception { - Text value = new Text("Infinity"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - double result = accessor.getDouble(); - - assertThat(result, instanceOf(Double.class)); - assertThat(result, equalTo(Double.POSITIVE_INFINITY)); - } - - @Test - public void testShouldGetDoubleWorkWithNegativeInfinity() throws Exception { - Text value = new Text("-Infinity"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - double result = accessor.getDouble(); - - assertThat(result, instanceOf(Double.class)); - assertThat(result, equalTo(Double.NEGATIVE_INFINITY)); - } - - @Test - public void testShouldGetDoubleWorkWithNaN() throws Exception { - Text value = new Text("NaN"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - double result = accessor.getDouble(); - - assertThat(result, instanceOf(Double.class)); - assertThat(result, equalTo(Double.NaN)); - } - - @Test - public void testShouldGetFloatThrowsExceptionForNonNumericValue() { - Text value = new Text("Invalid value for float."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getFloat()); - } - - @Test - public void testShouldGetFloatReturnValidPositiveFloat() throws Exception { - Text value = new Text("3.4028235E38F"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - float result = accessor.getFloat(); - - assertThat(result, instanceOf(Float.class)); - assertThat(result, equalTo(3.4028235E38F)); - } - - @Test - public void testShouldGetFloatReturnValidNegativeFloat() throws Exception { - Text value = new Text("-3.4028235E38F"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - float result = accessor.getFloat(); - - assertThat(result, instanceOf(Float.class)); - assertThat(result, equalTo(-3.4028235E38F)); - } - - @Test - public void testShouldGetFloatWorkWithPositiveInfinity() throws Exception { - Text value = new Text("Infinity"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - float result = accessor.getFloat(); - - assertThat(result, instanceOf(Float.class)); - assertThat(result, equalTo(Float.POSITIVE_INFINITY)); - } - - @Test - public void testShouldGetFloatWorkWithNegativeInfinity() throws Exception { - Text value = new Text("-Infinity"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - float result = accessor.getFloat(); - - assertThat(result, instanceOf(Float.class)); - assertThat(result, equalTo(Float.NEGATIVE_INFINITY)); - } - - @Test - public void testShouldGetFloatWorkWithNaN() throws Exception { - Text value = new Text("NaN"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - float result = accessor.getFloat(); - - assertThat(result, instanceOf(Float.class)); - assertThat(result, equalTo(Float.NaN)); - } - - @Test - public void testShouldGetDateThrowsExceptionForNonDateValue() { - Text value = new Text("Invalid value for date."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getDate(null)); - } - - @Test - public void testShouldGetDateReturnValidDateWithoutCalendar() throws Exception { - Text value = new Text("2021-07-02"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Date result = accessor.getDate(null); - - assertThat(result, instanceOf(Date.class)); - - Calendar calendar = Calendar.getInstance(); - calendar.setTime(result); - - assertThat(dateTimeFormat.format(calendar.getTime()), equalTo("2021-07-02T00:00:00.000Z")); - } - - @Test - public void testShouldGetDateReturnValidDateWithCalendar() throws Exception { - Text value = new Text("2021-07-02"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("America/Sao_Paulo")); - Date result = accessor.getDate(calendar); - - calendar = Calendar.getInstance(TimeZone.getTimeZone("Etc/UTC")); - calendar.setTime(result); - - assertThat(dateTimeFormat.format(calendar.getTime()), equalTo("2021-07-02T03:00:00.000Z")); - } - - @Test - public void testShouldGetTimeThrowsExceptionForNonTimeValue() { - Text value = new Text("Invalid value for time."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getTime(null)); - } - - @Test - public void testShouldGetTimeReturnValidDateWithoutCalendar() throws Exception { - Text value = new Text("02:30:00"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Time result = accessor.getTime(null); - - Calendar calendar = Calendar.getInstance(); - calendar.setTime(result); - - assertThat(timeFormat.format(calendar.getTime()), equalTo("02:30:00.000Z")); - } - - @Test - public void testShouldGetTimeReturnValidDateWithCalendar() throws Exception { - Text value = new Text("02:30:00"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("America/Sao_Paulo")); - Time result = accessor.getTime(calendar); - - calendar = Calendar.getInstance(TimeZone.getTimeZone("Etc/UTC")); - calendar.setTime(result); - - assertThat(timeFormat.format(calendar.getTime()), equalTo("05:30:00.000Z")); - } - - @Test - public void testShouldGetTimestampThrowsExceptionForNonTimeValue() { - Text value = new Text("Invalid value for timestamp."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - assertThrows(SQLException.class, () -> accessor.getTimestamp(null)); - } - - @Test - public void testShouldGetTimestampReturnValidDateWithoutCalendar() throws Exception { - Text value = new Text("2021-07-02 02:30:00.000"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Timestamp result = accessor.getTimestamp(null); - - Calendar calendar = Calendar.getInstance(); - calendar.setTime(result); - - assertThat(dateTimeFormat.format(calendar.getTime()), equalTo("2021-07-02T02:30:00.000Z")); - } - - @Test - public void testShouldGetTimestampReturnValidDateWithCalendar() throws Exception { - Text value = new Text("2021-07-02 02:30:00.000"); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("America/Sao_Paulo")); - Timestamp result = accessor.getTimestamp(calendar); - - calendar = Calendar.getInstance(TimeZone.getTimeZone("Etc/UTC")); - calendar.setTime(result); - - assertThat(dateTimeFormat.format(calendar.getTime()), equalTo("2021-07-02T05:30:00.000Z")); - } - - private void assertGetBoolean(Text value, boolean expectedResult) throws SQLException { - when(getter.get(0)).thenReturn(value == null ? null : value.copyBytes()); - boolean result = accessor.getBoolean(); - assertThat(result, equalTo(expectedResult)); - } - - private void assertGetBooleanForSQLException(Text value) { - when(getter.get(0)).thenReturn(value == null ? null : value.copyBytes()); - ThrowableAssertionUtils.simpleAssertThrowableClass( - SQLException.class, () -> accessor.getBoolean()); - } - - @Test - public void testShouldGetBooleanThrowsSQLExceptionForInvalidValue() { - assertGetBooleanForSQLException(new Text("anything")); - } - - @Test - public void testShouldGetBooleanThrowsSQLExceptionForEmpty() { - assertGetBooleanForSQLException(new Text("")); - } - - @Test - public void testShouldGetBooleanReturnFalseFor0() throws Exception { - assertGetBoolean(new Text("0"), false); - } - - @Test - public void testShouldGetBooleanReturnFalseForFalseString() throws Exception { - assertGetBoolean(new Text("false"), false); - } - - @Test - public void testShouldGetBooleanReturnFalseForNull() throws Exception { - assertGetBoolean(null, false); - } - - @Test - public void testShouldGetBytesReturnValidByteArray() { - Text value = new Text("Value for Test."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - final byte[] result = accessor.getBytes(); - - assertThat(result, instanceOf(byte[].class)); - assertThat(result, equalTo(value.toString().getBytes(UTF_8))); - } - - @Test - public void testShouldGetUnicodeStreamReturnValidInputStream() throws Exception { - Text value = new Text("Value for Test."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - try (final InputStream result = accessor.getUnicodeStream()) { - byte[] resultBytes = toByteArray(result); - - assertThat(new String(resultBytes, UTF_8), equalTo(value.toString())); - } - } - - @Test - public void testShouldGetAsciiStreamReturnValidInputStream() throws Exception { - Text valueText = new Text("Value for Test."); - byte[] valueAscii = valueText.toString().getBytes(US_ASCII); - when(getter.get(0)).thenReturn(valueText.copyBytes()); - - try (final InputStream result = accessor.getAsciiStream()) { - byte[] resultBytes = toByteArray(result); - - assertArrayEquals(valueAscii, resultBytes); - } - } - - @Test - public void testShouldGetCharacterStreamReturnValidReader() throws Exception { - Text value = new Text("Value for Test."); - when(getter.get(0)).thenReturn(value.copyBytes()); - - try (Reader result = accessor.getCharacterStream()) { - char[] resultChars = toCharArray(result); - - assertThat(new String(resultChars), equalTo(value.toString())); - } - } - - @Test - public void testShouldGetTimeStampBeConsistentWithTimeStampAccessor() throws Exception { - try (TimeStampVector timeStampVector = - rootAllocatorTestExtension.createTimeStampMilliVector()) { - ArrowFlightJdbcTimeStampVectorAccessor timeStampVectorAccessor = - new ArrowFlightJdbcTimeStampVectorAccessor( - timeStampVector, () -> 0, (boolean wasNull) -> {}); - - Text value = new Text(timeStampVectorAccessor.getString()); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Timestamp timestamp = accessor.getTimestamp(null); - assertThat(timestamp, equalTo(timeStampVectorAccessor.getTimestamp(null))); - } - } - - @Test - public void testShouldGetTimeBeConsistentWithTimeAccessor() throws Exception { - try (TimeMilliVector timeVector = rootAllocatorTestExtension.createTimeMilliVector()) { - ArrowFlightJdbcTimeVectorAccessor timeVectorAccessor = - new ArrowFlightJdbcTimeVectorAccessor(timeVector, () -> 0, (boolean wasNull) -> {}); - - Text value = new Text(timeVectorAccessor.getString()); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Time time = accessor.getTime(null); - assertThat(time, equalTo(timeVectorAccessor.getTime(null))); - } - } - - @Test - public void testShouldGetDateBeConsistentWithDateAccessor() throws Exception { - try (DateMilliVector dateVector = rootAllocatorTestExtension.createDateMilliVector()) { - ArrowFlightJdbcDateVectorAccessor dateVectorAccessor = - new ArrowFlightJdbcDateVectorAccessor(dateVector, () -> 0, (boolean wasNull) -> {}); - - Text value = new Text(dateVectorAccessor.getString()); - when(getter.get(0)).thenReturn(value.copyBytes()); - - Date date = accessor.getDate(null); - assertThat(date, equalTo(dateVectorAccessor.getDate(null))); - } - } - - @Test - public void testShouldGetObjectClassReturnString() { - final Class clazz = accessor.getObjectClass(); - assertThat(clazz, equalTo(String.class)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/Authentication.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/Authentication.java deleted file mode 100644 index 7e2d8621785aa..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/Authentication.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.authentication; - -import java.util.Properties; -import org.apache.arrow.flight.auth2.CallHeaderAuthenticator; - -public interface Authentication { - /** - * Create a {@link CallHeaderAuthenticator} which is used to authenticate the connection. - * - * @return a CallHeaderAuthenticator. - */ - CallHeaderAuthenticator authenticate(); - - /** - * Uses the validCredentials variable and populate the Properties object. - * - * @param properties the Properties object that will be populated. - */ - void populateProperties(Properties properties); -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/TokenAuthentication.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/TokenAuthentication.java deleted file mode 100644 index 6d93ff705bd62..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/TokenAuthentication.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.authentication; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl; -import org.apache.arrow.flight.CallHeaders; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.auth2.CallHeaderAuthenticator; - -public class TokenAuthentication implements Authentication { - private final List validCredentials; - - public TokenAuthentication(List validCredentials) { - this.validCredentials = validCredentials; - } - - @Override - public CallHeaderAuthenticator authenticate() { - return new CallHeaderAuthenticator() { - @Override - public AuthResult authenticate(CallHeaders incomingHeaders) { - String authorization = incomingHeaders.get("authorization"); - if (!validCredentials.contains(authorization)) { - throw CallStatus.UNAUTHENTICATED - .withDescription("Invalid credentials.") - .toRuntimeException(); - } - return new AuthResult() { - @Override - public String getPeerIdentity() { - return authorization; - } - }; - } - }; - } - - @Override - public void populateProperties(Properties properties) { - this.validCredentials.forEach( - value -> - properties.put( - ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.TOKEN.camelName(), - value)); - } - - public static final class Builder { - private final List tokenList = new ArrayList<>(); - - public TokenAuthentication.Builder token(String token) { - tokenList.add("Bearer " + token); - return this; - } - - public TokenAuthentication build() { - return new TokenAuthentication(tokenList); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/UserPasswordAuthentication.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/UserPasswordAuthentication.java deleted file mode 100644 index 2b74c880b3135..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/authentication/UserPasswordAuthentication.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.authentication; - -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.auth2.BasicCallHeaderAuthenticator; -import org.apache.arrow.flight.auth2.CallHeaderAuthenticator; -import org.apache.arrow.flight.auth2.GeneratedBearerTokenAuthenticator; - -public class UserPasswordAuthentication implements Authentication { - - private final Map validCredentials; - - public UserPasswordAuthentication(Map validCredentials) { - this.validCredentials = validCredentials; - } - - private String getCredentials(String key) { - return validCredentials.getOrDefault(key, null); - } - - @Override - public CallHeaderAuthenticator authenticate() { - return new GeneratedBearerTokenAuthenticator( - new BasicCallHeaderAuthenticator( - (username, password) -> { - if (validCredentials.containsKey(username) - && getCredentials(username).equals(password)) { - return () -> username; - } - throw CallStatus.UNAUTHENTICATED - .withDescription("Invalid credentials.") - .toRuntimeException(); - })); - } - - @Override - public void populateProperties(Properties properties) { - validCredentials.forEach( - (key, value) -> { - properties.put( - ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.USER.camelName(), key); - properties.put( - ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.PASSWORD.camelName(), - value); - }); - } - - public static class Builder { - Map credentials = new HashMap<>(); - - public Builder user(String username, String password) { - credentials.put(username, password); - return this; - } - - public UserPasswordAuthentication build() { - return new UserPasswordAuthentication(credentials); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java deleted file mode 100644 index 6beaba82360cc..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.client; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.Optional; -import org.apache.arrow.driver.jdbc.FlightServerTestExtension; -import org.apache.arrow.driver.jdbc.utils.CoreMockedSqlProducers; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** Test the behavior of ArrowFlightSqlClientHandler.Builder. */ -public class ArrowFlightSqlClientHandlerBuilderTest { - - @RegisterExtension - public static final FlightServerTestExtension FLIGHT_SERVER_TEST_EXTENSION = - FlightServerTestExtension.createStandardTestExtension( - CoreMockedSqlProducers.getLegacyProducer()); - - private static BufferAllocator allocator; - - @BeforeAll - public static void setup() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterAll - public static void tearDown() { - allocator.close(); - } - - @Test - public void testRetainCookiesOnAuthOff() throws Exception { - // Arrange - final ArrowFlightSqlClientHandler.Builder rootBuilder = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withBufferAllocator(allocator) - .withUsername(FlightServerTestExtension.DEFAULT_USER) - .withPassword(FlightServerTestExtension.DEFAULT_PASSWORD) - .withEncryption(false) - .withRetainCookies(true) - .withRetainAuth(false); - - try (ArrowFlightSqlClientHandler rootHandler = rootBuilder.build()) { - // Act - final ArrowFlightSqlClientHandler.Builder testBuilder = - new ArrowFlightSqlClientHandler.Builder(rootBuilder); - - // Assert - assertSame(rootBuilder.cookieFactory, testBuilder.cookieFactory); - assertNotSame(rootBuilder.authFactory, testBuilder.authFactory); - } - } - - @Test - public void testRetainCookiesOffAuthOff() throws Exception { - // Arrange - final ArrowFlightSqlClientHandler.Builder rootBuilder = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withBufferAllocator(allocator) - .withUsername(FlightServerTestExtension.DEFAULT_USER) - .withPassword(FlightServerTestExtension.DEFAULT_PASSWORD) - .withEncryption(false) - .withRetainCookies(false) - .withRetainAuth(false); - - try (ArrowFlightSqlClientHandler rootHandler = rootBuilder.build()) { - // Act - final ArrowFlightSqlClientHandler.Builder testBuilder = - new ArrowFlightSqlClientHandler.Builder(rootBuilder); - - // Assert - assertNotSame(rootBuilder.cookieFactory, testBuilder.cookieFactory); - assertNotSame(rootBuilder.authFactory, testBuilder.authFactory); - } - } - - @Test - public void testRetainCookiesOnAuthOn() throws Exception { - // Arrange - final ArrowFlightSqlClientHandler.Builder rootBuilder = - new ArrowFlightSqlClientHandler.Builder() - .withHost(FLIGHT_SERVER_TEST_EXTENSION.getHost()) - .withPort(FLIGHT_SERVER_TEST_EXTENSION.getPort()) - .withBufferAllocator(allocator) - .withUsername(FlightServerTestExtension.DEFAULT_USER) - .withPassword(FlightServerTestExtension.DEFAULT_PASSWORD) - .withEncryption(false) - .withRetainCookies(true) - .withRetainAuth(true); - - try (ArrowFlightSqlClientHandler rootHandler = rootBuilder.build()) { - // Act - final ArrowFlightSqlClientHandler.Builder testBuilder = - new ArrowFlightSqlClientHandler.Builder(rootBuilder); - - // Assert - assertSame(rootBuilder.cookieFactory, testBuilder.cookieFactory); - assertSame(rootBuilder.authFactory, testBuilder.authFactory); - } - } - - @Test - public void testDefaults() { - final ArrowFlightSqlClientHandler.Builder builder = new ArrowFlightSqlClientHandler.Builder(); - - // Validate all non-mandatory fields against defaults in ArrowFlightConnectionProperty. - assertNull(builder.username); - assertNull(builder.password); - assertTrue(builder.useEncryption); - assertFalse(builder.disableCertificateVerification); - assertNull(builder.trustStorePath); - assertNull(builder.trustStorePassword); - assertTrue(builder.useSystemTrustStore); - assertNull(builder.token); - assertTrue(builder.retainAuth); - assertTrue(builder.retainCookies); - assertNull(builder.tlsRootCertificatesPath); - assertNull(builder.clientCertificatePath); - assertNull(builder.clientKeyPath); - assertEquals(Optional.empty(), builder.catalog); - } - - @Test - public void testCatalog() { - ArrowFlightSqlClientHandler.Builder rootBuilder = new ArrowFlightSqlClientHandler.Builder(); - - rootBuilder.withCatalog(null); - assertFalse(rootBuilder.catalog.isPresent()); - - rootBuilder.withCatalog(""); - assertTrue(rootBuilder.catalog.isPresent()); - - rootBuilder.withCatalog(" "); - assertTrue(rootBuilder.catalog.isPresent()); - - final String noSpaces = "noSpaces"; - rootBuilder.withCatalog(noSpaces); - assertTrue(rootBuilder.catalog.isPresent()); - assertEquals(noSpaces, rootBuilder.catalog.get()); - - final String nameWithSpaces = " spaces "; - rootBuilder.withCatalog(nameWithSpaces); - assertTrue(rootBuilder.catalog.isPresent()); - assertEquals(nameWithSpaces, rootBuilder.catalog.get()); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java deleted file mode 100644 index 0d1498abe4ae1..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.client.utils; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.mockito.Mockito.mock; - -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.Method; -import java.security.KeyStore; -import java.security.KeyStoreException; -import java.security.NoSuchAlgorithmException; -import java.security.cert.Certificate; -import java.security.cert.CertificateException; -import java.util.Arrays; -import java.util.Collections; -import java.util.Enumeration; -import org.bouncycastle.openssl.jcajce.JcaPEMWriter; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.MockedStatic; -import org.mockito.Mockito; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -public class ClientAuthenticationUtilsTest { - @Mock KeyStore keyStoreMock; - - @Test - public void testGetCertificatesInputStream() throws IOException, KeyStoreException { - JcaPEMWriter pemWriterMock = mock(JcaPEMWriter.class); - Certificate certificateMock = mock(Certificate.class); - Enumeration alias = Collections.enumeration(Arrays.asList("test1", "test2")); - - Mockito.when(keyStoreMock.aliases()).thenReturn(alias); - Mockito.when(keyStoreMock.isCertificateEntry("test1")).thenReturn(true); - Mockito.when(keyStoreMock.getCertificate("test1")).thenReturn(certificateMock); - - ClientAuthenticationUtils.getCertificatesInputStream(keyStoreMock, pemWriterMock); - Mockito.verify(pemWriterMock).writeObject(certificateMock); - Mockito.verify(pemWriterMock).flush(); - } - - @Test - public void testGetKeyStoreInstance() - throws IOException, KeyStoreException, CertificateException, NoSuchAlgorithmException { - try (MockedStatic keyStoreMockedStatic = Mockito.mockStatic(KeyStore.class)) { - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getKeyStoreInstance(Mockito.any())) - .thenReturn(keyStoreMock); - - KeyStore receiveKeyStore = ClientAuthenticationUtils.getKeyStoreInstance("test1"); - Mockito.verify(keyStoreMock).load(null, null); - - assertEquals(receiveKeyStore, keyStoreMock); - } - } - - @Test - public void testGetDefaultKeyStoreInstancePassword() - throws IOException, KeyStoreException, CertificateException, NoSuchAlgorithmException { - try (MockedStatic keyStoreMockedStatic = Mockito.mockStatic(KeyStore.class)) { - - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit")) - .thenReturn(keyStoreMock); - KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit"); - assertEquals(receiveKeyStore, keyStoreMock); - } - } - - @Test - public void testGetDefaultKeyStoreInstanceNoPassword() - throws IOException, KeyStoreException, CertificateException, NoSuchAlgorithmException { - try (MockedStatic keyStoreMockedStatic = Mockito.mockStatic(KeyStore.class)) { - - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance(null)) - .thenReturn(keyStoreMock); - KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance(null); - assertEquals(receiveKeyStore, keyStoreMock); - } - } - - @Test - public void testGetCertificateInputStreamFromMacSystem() - throws IOException, KeyStoreException, CertificateException, NoSuchAlgorithmException { - InputStream mock = mock(InputStream.class); - - try (MockedStatic keyStoreMockedStatic = createKeyStoreStaticMock(); - MockedStatic clientAuthenticationUtilsMockedStatic = - createClientAuthenticationUtilsStaticMock()) { - - setOperatingSystemMock(clientAuthenticationUtilsMockedStatic, false, true); - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getKeyStoreInstance("KeychainStore")) - .thenReturn(keyStoreMock); - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit")) - .thenReturn(keyStoreMock); - clientAuthenticationUtilsMockedStatic - .when(ClientAuthenticationUtils::getKeystoreInputStream) - .thenCallRealMethod(); - keyStoreMockedStatic.when(KeyStore::getDefaultType).thenCallRealMethod(); - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getCertificatesInputStream(Mockito.any())) - .thenReturn(mock); - - InputStream inputStream = - ClientAuthenticationUtils.getCertificateInputStreamFromSystem("changeit"); - assertEquals(inputStream, mock); - } - } - - @Test - public void testGetCertificateInputStreamFromWindowsSystem() - throws IOException, KeyStoreException, CertificateException, NoSuchAlgorithmException { - InputStream mock = mock(InputStream.class); - - try (MockedStatic keyStoreMockedStatic = createKeyStoreStaticMock(); - MockedStatic clientAuthenticationUtilsMockedStatic = - createClientAuthenticationUtilsStaticMock()) { - - setOperatingSystemMock(clientAuthenticationUtilsMockedStatic, true, false); - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getKeyStoreInstance("Windows-ROOT")) - .thenReturn(keyStoreMock); - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getKeyStoreInstance("Windows-MY")) - .thenReturn(keyStoreMock); - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getCertificatesInputStream(Mockito.any())) - .thenReturn(mock); - - InputStream inputStream = - ClientAuthenticationUtils.getCertificateInputStreamFromSystem("test"); - assertEquals(inputStream, mock); - } - } - - @Test - public void testGetCertificateInputStreamFromLinuxSystem() - throws IOException, KeyStoreException, CertificateException, NoSuchAlgorithmException { - InputStream mock = mock(InputStream.class); - - try (MockedStatic keyStoreMockedStatic = createKeyStoreStaticMock(); - MockedStatic clientAuthenticationUtilsMockedStatic = - createClientAuthenticationUtilsStaticMock()) { - - setOperatingSystemMock(clientAuthenticationUtilsMockedStatic, false, false); - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getCertificatesInputStream(Mockito.any())) - .thenReturn(mock); - keyStoreMockedStatic - .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance(Mockito.any())) - .thenReturn(keyStoreMock); - clientAuthenticationUtilsMockedStatic - .when(ClientAuthenticationUtils::getKeystoreInputStream) - .thenCallRealMethod(); - keyStoreMockedStatic.when(KeyStore::getDefaultType).thenCallRealMethod(); - - InputStream inputStream = - ClientAuthenticationUtils.getCertificateInputStreamFromSystem("changeit"); - assertEquals(inputStream, mock); - inputStream = ClientAuthenticationUtils.getCertificateInputStreamFromSystem(null); - assertEquals(inputStream, mock); - } - } - - private MockedStatic createKeyStoreStaticMock() { - return Mockito.mockStatic( - KeyStore.class, - invocationOnMock -> { - Method method = invocationOnMock.getMethod(); - if (method.getName().equals("getInstance")) { - return invocationOnMock.callRealMethod(); - } - return method.invoke(invocationOnMock.getMock(), invocationOnMock.getArguments()); - }); - } - - private MockedStatic createClientAuthenticationUtilsStaticMock() { - return Mockito.mockStatic( - ClientAuthenticationUtils.class, - invocationOnMock -> { - Method method = invocationOnMock.getMethod(); - if (method.getName().equals("getCertificateInputStreamFromSystem")) { - return invocationOnMock.callRealMethod(); - } - return method.invoke(invocationOnMock.getMock(), invocationOnMock.getArguments()); - }); - } - - private void setOperatingSystemMock( - MockedStatic clientAuthenticationUtilsMockedStatic, - boolean isWindows, - boolean isMac) { - clientAuthenticationUtilsMockedStatic.when(ClientAuthenticationUtils::isMac).thenReturn(isMac); - assertEquals(ClientAuthenticationUtils.isMac(), isMac); - clientAuthenticationUtilsMockedStatic - .when(ClientAuthenticationUtils::isWindows) - .thenReturn(isWindows); - assertEquals(ClientAuthenticationUtils.isWindows(), isWindows); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/AccessorTestUtils.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/AccessorTestUtils.java deleted file mode 100644 index 6285cf5d25506..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/AccessorTestUtils.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Consumer; -import java.util.function.Function; -import java.util.function.IntSupplier; -import java.util.function.Supplier; -import org.apache.arrow.driver.jdbc.accessor.ArrowFlightJdbcAccessor; -import org.apache.arrow.vector.ValueVector; -import org.hamcrest.Matcher; - -public class AccessorTestUtils { - @FunctionalInterface - public interface CheckedFunction { - R apply(T t) throws SQLException; - } - - public interface AccessorSupplier { - T supply(ValueVector vector, IntSupplier getCurrentRow); - } - - public interface AccessorConsumer { - void accept(T accessor, int currentRow) throws Exception; - } - - public interface MatcherGetter { - Matcher get(T accessor, int currentRow); - } - - public static class Cursor { - int currentRow = 0; - int limit; - - public Cursor(int limit) { - this.limit = limit; - } - - public void next() { - currentRow++; - } - - boolean hasNext() { - return currentRow < limit; - } - - public int getCurrentRow() { - return currentRow; - } - } - - public static class AccessorIterator { - private final AccessorSupplier accessorSupplier; - - public AccessorIterator(AccessorSupplier accessorSupplier) { - this.accessorSupplier = accessorSupplier; - } - - public void iterate(ValueVector vector, AccessorConsumer accessorConsumer) throws Exception { - int valueCount = vector.getValueCount(); - if (valueCount == 0) { - throw new IllegalArgumentException("Vector is empty"); - } - - Cursor cursor = new Cursor(valueCount); - T accessor = accessorSupplier.supply(vector, cursor::getCurrentRow); - - while (cursor.hasNext()) { - accessorConsumer.accept(accessor, cursor.getCurrentRow()); - cursor.next(); - } - } - - public void iterate(ValueVector vector, Consumer accessorConsumer) throws Exception { - iterate(vector, (accessor, currentRow) -> accessorConsumer.accept(accessor)); - } - - public List toList(ValueVector vector) throws Exception { - List result = new ArrayList<>(); - iterate(vector, (accessor, currentRow) -> result.add(accessor.getObject())); - - return result; - } - - public void assertAccessorGetter( - ValueVector vector, CheckedFunction getter, MatcherGetter matcherGetter) - throws Exception { - iterate( - vector, - (accessor, currentRow) -> { - R object = getter.apply(accessor); - boolean wasNull = accessor.wasNull(); - - assertThat(object, matcherGetter.get(accessor, currentRow)); - assertThat(wasNull, is(accessor.getObject() == null)); - }); - } - - public void assertAccessorGetterThrowingException( - ValueVector vector, CheckedFunction getter) throws Exception { - iterate( - vector, - (accessor, currentRow) -> - ThrowableAssertionUtils.simpleAssertThrowableClass( - SQLException.class, () -> getter.apply(accessor))); - } - - public void assertAccessorGetter( - ValueVector vector, CheckedFunction getter, Function> matcherGetter) - throws Exception { - assertAccessorGetter(vector, getter, (accessor, currentRow) -> matcherGetter.apply(accessor)); - } - - public void assertAccessorGetter( - ValueVector vector, CheckedFunction getter, Supplier> matcherGetter) - throws Exception { - assertAccessorGetter(vector, getter, (accessor, currentRow) -> matcherGetter.get()); - } - - public void assertAccessorGetter( - ValueVector vector, CheckedFunction getter, Matcher matcher) throws Exception { - assertAccessorGetter(vector, getter, (accessor, currentRow) -> matcher); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImplTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImplTest.java deleted file mode 100644 index 4a46b5f5bedb5..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImplTest.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static java.lang.Runtime.getRuntime; -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.CATALOG; -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.HOST; -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.PASSWORD; -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.PORT; -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.THREAD_POOL_SIZE; -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.USER; -import static org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty.USE_ENCRYPTION; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.util.Properties; -import java.util.Random; -import java.util.function.Function; -import java.util.stream.Stream; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public final class ArrowFlightConnectionConfigImplTest { - - private static final Random RANDOM = new Random(12L); - - private Properties properties; - private ArrowFlightConnectionConfigImpl arrowFlightConnectionConfig; - - public ArrowFlightConnectionProperty property; - public Object value; - public Function arrowFlightConnectionConfigFunction; - - @BeforeEach - public void setUp() { - properties = new Properties(); - arrowFlightConnectionConfig = new ArrowFlightConnectionConfigImpl(properties); - } - - @ParameterizedTest - @MethodSource("provideParameters") - public void testGetProperty( - ArrowFlightConnectionProperty property, - Object value, - Function configFunction) { - properties.put(property.camelName(), value); - arrowFlightConnectionConfigFunction = configFunction; - assertThat(configFunction.apply(arrowFlightConnectionConfig), is(value)); - assertThat(arrowFlightConnectionConfigFunction.apply(arrowFlightConnectionConfig), is(value)); - } - - public static Stream provideParameters() { - return Stream.of( - Arguments.of( - HOST, - "host", - (Function) - ArrowFlightConnectionConfigImpl::getHost), - Arguments.of( - PORT, - RANDOM.nextInt(Short.toUnsignedInt(Short.MAX_VALUE)), - (Function) - ArrowFlightConnectionConfigImpl::getPort), - Arguments.of( - USER, - "user", - (Function) - ArrowFlightConnectionConfigImpl::getUser), - Arguments.of( - PASSWORD, - "password", - (Function) - ArrowFlightConnectionConfigImpl::getPassword), - Arguments.of( - USE_ENCRYPTION, - RANDOM.nextBoolean(), - (Function) - ArrowFlightConnectionConfigImpl::useEncryption), - Arguments.of( - THREAD_POOL_SIZE, - RANDOM.nextInt(getRuntime().availableProcessors()), - (Function) - ArrowFlightConnectionConfigImpl::threadPoolSize), - Arguments.of( - CATALOG, - "catalog", - (Function) - ArrowFlightConnectionConfigImpl::getCatalog)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionPropertyTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionPropertyTest.java deleted file mode 100644 index 5929b9f94a5ef..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionPropertyTest.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.apache.arrow.util.AutoCloseables.close; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assumptions.assumeTrue; -import static org.mockito.MockitoAnnotations.openMocks; - -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; -import org.apache.arrow.driver.jdbc.utils.ArrowFlightConnectionConfigImpl.ArrowFlightConnectionProperty; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -public final class ArrowFlightConnectionPropertyTest { - - @Mock public Properties properties; - - private AutoCloseable mockitoResource; - - public ArrowFlightConnectionProperty arrowFlightConnectionProperty; - - @BeforeEach - public void setUp() { - mockitoResource = openMocks(this); - } - - @AfterEach - public void tearDown() throws Exception { - close(mockitoResource); - } - - @ParameterizedTest - @MethodSource("provideParameters") - public void testWrapIsUnsupported(ArrowFlightConnectionProperty property) { - this.arrowFlightConnectionProperty = property; - ThrowableAssertionUtils.simpleAssertThrowableClass( - UnsupportedOperationException.class, () -> arrowFlightConnectionProperty.wrap(properties)); - } - - @ParameterizedTest - @MethodSource("provideParameters") - public void testRequiredPropertyThrows(ArrowFlightConnectionProperty property) { - this.arrowFlightConnectionProperty = property; - assumeTrue(arrowFlightConnectionProperty.required()); - ThrowableAssertionUtils.simpleAssertThrowableClass( - IllegalStateException.class, () -> arrowFlightConnectionProperty.get(new Properties())); - } - - @ParameterizedTest - @MethodSource("provideParameters") - public void testOptionalPropertyReturnsDefault(ArrowFlightConnectionProperty property) { - this.arrowFlightConnectionProperty = property; - assumeTrue(!arrowFlightConnectionProperty.required()); - assertEquals( - arrowFlightConnectionProperty.defaultValue(), - arrowFlightConnectionProperty.get(new Properties())); - } - - public static List provideParameters() { - final ArrowFlightConnectionProperty[] arrowFlightConnectionProperties = - ArrowFlightConnectionProperty.values(); - final List parameters = new ArrayList<>(arrowFlightConnectionProperties.length); - for (final ArrowFlightConnectionProperty arrowFlightConnectionProperty : - arrowFlightConnectionProperties) { - parameters.add(Arguments.of(arrowFlightConnectionProperty)); - } - return parameters; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapperTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapperTest.java deleted file mode 100644 index b481ef43522ef..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapperTest.java +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static java.lang.String.format; -import static java.util.stream.IntStream.range; -import static org.hamcrest.CoreMatchers.allOf; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.nullValue; -import static org.hamcrest.CoreMatchers.sameInstance; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLClientInfoException; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.Arrays; -import java.util.Random; -import org.apache.arrow.driver.jdbc.ArrowFlightConnection; -import org.apache.arrow.util.AutoCloseables; -import org.apache.calcite.avatica.AvaticaConnection; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -@ExtendWith(MockitoExtension.class) -public final class ConnectionWrapperTest { - - private static final String SCHEMA_NAME = "SCHEMA"; - private static final String PLACEHOLDER_QUERY = "SELECT * FROM DOES_NOT_MATTER"; - private static final int[] COLUMN_INDICES = range(0, 10).toArray(); - private static final String[] COLUMN_NAMES = - Arrays.stream(COLUMN_INDICES).mapToObj(i -> format("col%d", i)).toArray(String[]::new); - private static final String TYPE_NAME = "TYPE_NAME"; - private static final String SAVEPOINT_NAME = "SAVEPOINT"; - private static final String CLIENT_INFO = "CLIENT_INFO"; - private static final int RESULT_SET_TYPE = ResultSet.TYPE_FORWARD_ONLY; - private static final int RESULT_SET_CONCURRENCY = ResultSet.CONCUR_READ_ONLY; - private static final int RESULT_SET_HOLDABILITY = ResultSet.HOLD_CURSORS_OVER_COMMIT; - private static final int GENERATED_KEYS = Statement.NO_GENERATED_KEYS; - private static final Random RANDOM = new Random(Long.MAX_VALUE); - private static final int TIMEOUT = RANDOM.nextInt(Integer.MAX_VALUE); - - @Mock public AvaticaConnection underlyingConnection; - private ConnectionWrapper connectionWrapper; - - @BeforeEach - public void setUp() { - connectionWrapper = new ConnectionWrapper(underlyingConnection); - } - - @AfterEach - public void tearDown() throws Exception { - AutoCloseables.close(connectionWrapper, underlyingConnection); - } - - @Test - public void testUnwrappingUnderlyingConnectionShouldReturnUnderlyingConnection() { - assertThat( - assertDoesNotThrow(() -> connectionWrapper.unwrap(Object.class)), - is(sameInstance(underlyingConnection))); - assertThat( - assertDoesNotThrow(() -> connectionWrapper.unwrap(Connection.class)), - is(sameInstance(underlyingConnection))); - assertThat( - assertDoesNotThrow(() -> connectionWrapper.unwrap(AvaticaConnection.class)), - is(sameInstance(underlyingConnection))); - ThrowableAssertionUtils.simpleAssertThrowableClass( - ClassCastException.class, () -> connectionWrapper.unwrap(ArrowFlightConnection.class)); - ThrowableAssertionUtils.simpleAssertThrowableClass( - ClassCastException.class, () -> connectionWrapper.unwrap(ConnectionWrapper.class)); - } - - @Test - public void testCreateStatementShouldCreateStatementFromUnderlyingConnection() - throws SQLException { - assertThat( - connectionWrapper.createStatement(), - is(sameInstance(verify(underlyingConnection, times(1)).createStatement()))); - assertThat( - connectionWrapper.createStatement( - RESULT_SET_TYPE, RESULT_SET_CONCURRENCY, RESULT_SET_HOLDABILITY), - is( - verify(underlyingConnection, times(1)) - .createStatement(RESULT_SET_TYPE, RESULT_SET_CONCURRENCY, RESULT_SET_HOLDABILITY))); - assertThat( - connectionWrapper.createStatement(RESULT_SET_TYPE, RESULT_SET_CONCURRENCY), - is( - verify(underlyingConnection, times(1)) - .createStatement(RESULT_SET_TYPE, RESULT_SET_CONCURRENCY))); - } - - @Test - public void testPrepareStatementShouldPrepareStatementFromUnderlyingConnection() - throws SQLException { - assertThat( - connectionWrapper.prepareStatement(PLACEHOLDER_QUERY), - is( - sameInstance( - verify(underlyingConnection, times(1)).prepareStatement(PLACEHOLDER_QUERY)))); - assertThat( - connectionWrapper.prepareStatement(PLACEHOLDER_QUERY, COLUMN_INDICES), - is( - allOf( - sameInstance( - verify(underlyingConnection, times(1)) - .prepareStatement(PLACEHOLDER_QUERY, COLUMN_INDICES)), - nullValue()))); - assertThat( - connectionWrapper.prepareStatement(PLACEHOLDER_QUERY, COLUMN_NAMES), - is( - allOf( - sameInstance( - verify(underlyingConnection, times(1)) - .prepareStatement(PLACEHOLDER_QUERY, COLUMN_NAMES)), - nullValue()))); - assertThat( - connectionWrapper.prepareStatement( - PLACEHOLDER_QUERY, RESULT_SET_TYPE, RESULT_SET_CONCURRENCY), - is( - allOf( - sameInstance( - verify(underlyingConnection, times(1)) - .prepareStatement( - PLACEHOLDER_QUERY, RESULT_SET_TYPE, RESULT_SET_CONCURRENCY)), - nullValue()))); - assertThat( - connectionWrapper.prepareStatement(PLACEHOLDER_QUERY, GENERATED_KEYS), - is( - allOf( - sameInstance( - verify(underlyingConnection, times(1)) - .prepareStatement(PLACEHOLDER_QUERY, GENERATED_KEYS)), - nullValue()))); - } - - @Test - public void testPrepareCallShouldPrepareCallFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.prepareCall(PLACEHOLDER_QUERY), - is(sameInstance(verify(underlyingConnection, times(1)).prepareCall(PLACEHOLDER_QUERY)))); - assertThat( - connectionWrapper.prepareCall(PLACEHOLDER_QUERY, RESULT_SET_TYPE, RESULT_SET_CONCURRENCY), - is( - verify(underlyingConnection, times(1)) - .prepareCall(PLACEHOLDER_QUERY, RESULT_SET_TYPE, RESULT_SET_CONCURRENCY))); - } - - @Test - public void testNativeSqlShouldGetNativeSqlFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.nativeSQL(PLACEHOLDER_QUERY), - is(sameInstance(verify(underlyingConnection, times(1)).nativeSQL(PLACEHOLDER_QUERY)))); - } - - @Test - public void testSetAutoCommitShouldSetAutoCommitInUnderlyingConnection() throws SQLException { - connectionWrapper.setAutoCommit(true); - verify(underlyingConnection, times(1)).setAutoCommit(true); - connectionWrapper.setAutoCommit(false); - verify(underlyingConnection, times(1)).setAutoCommit(false); - } - - @Test - public void testGetAutoCommitShouldGetAutoCommitFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.getAutoCommit(), - is(verify(underlyingConnection, times(1)).getAutoCommit())); - } - - @Test - public void testCommitShouldCommitToUnderlyingConnection() throws SQLException { - connectionWrapper.commit(); - verify(underlyingConnection, times(1)).commit(); - } - - @Test - public void testRollbackShouldRollbackFromUnderlyingConnection() throws SQLException { - connectionWrapper.rollback(); - verify(underlyingConnection, times(1)).rollback(); - } - - @Test - public void testCloseShouldCloseUnderlyingConnection() throws SQLException { - connectionWrapper.close(); - verify(underlyingConnection, times(1)).close(); - } - - @Test - public void testIsClosedShouldGetStatusFromUnderlyingConnection() throws SQLException { - assertThat(connectionWrapper.isClosed(), is(verify(underlyingConnection, times(1)).isClosed())); - } - - @Test - public void testGetMetadataShouldGetMetadataFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.getMetaData(), is(verify(underlyingConnection, times(1)).getMetaData())); - } - - @Test - public void testSetReadOnlyShouldSetUnderlyingConnectionAsReadOnly() throws SQLException { - connectionWrapper.setReadOnly(false); - verify(underlyingConnection, times(1)).setReadOnly(false); - connectionWrapper.setReadOnly(true); - verify(underlyingConnection, times(1)).setReadOnly(true); - } - - @Test - public void testSetIsReadOnlyShouldGetStatusFromUnderlyingConnection() throws SQLException { - assertThat(connectionWrapper.isReadOnly(), is(verify(underlyingConnection).isReadOnly())); - } - - @Test - public void testSetCatalogShouldSetCatalogInUnderlyingConnection() throws SQLException { - final String catalog = "CATALOG"; - connectionWrapper.setCatalog(catalog); - verify(underlyingConnection, times(1)).setCatalog(catalog); - } - - @Test - public void testGetCatalogShouldGetCatalogFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.getCatalog(), - is(allOf(sameInstance(verify(underlyingConnection, times(1)).getCatalog()), nullValue()))); - } - - @Test - public void setTransactionIsolationShouldSetUnderlyingTransactionIsolation() throws SQLException { - final int transactionIsolation = Connection.TRANSACTION_NONE; - connectionWrapper.setTransactionIsolation(Connection.TRANSACTION_NONE); - verify(underlyingConnection, times(1)).setTransactionIsolation(transactionIsolation); - } - - @Test - public void getTransactionIsolationShouldGetUnderlyingConnectionIsolation() throws SQLException { - assertThat( - connectionWrapper.getTransactionIsolation(), - is(equalTo(verify(underlyingConnection, times(1)).getTransactionIsolation()))); - } - - @Test - public void getWarningShouldGetWarningsFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.getWarnings(), - is(allOf(sameInstance(verify(underlyingConnection, times(1)).getWarnings()), nullValue()))); - } - - @Test - public void testClearWarningShouldClearWarningsFromUnderlyingConnection() throws SQLException { - connectionWrapper.clearWarnings(); - verify(underlyingConnection, times(1)).clearWarnings(); - } - - @Test - public void getTypeMapShouldGetTypeMapFromUnderlyingConnection() throws SQLException { - when(underlyingConnection.getTypeMap()).thenReturn(null); - assertThat( - connectionWrapper.getTypeMap(), is(verify(underlyingConnection, times(1)).getTypeMap())); - } - - @Test - public void testSetTypeMapShouldSetTypeMapFromUnderlyingConnection() throws SQLException { - connectionWrapper.setTypeMap(null); - verify(underlyingConnection, times(1)).setTypeMap(null); - } - - @Test - public void testSetHoldabilityShouldSetUnderlyingConnection() throws SQLException { - connectionWrapper.setHoldability(RESULT_SET_HOLDABILITY); - verify(underlyingConnection, times(1)).setHoldability(RESULT_SET_HOLDABILITY); - } - - @Test - public void testGetHoldabilityShouldGetHoldabilityFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.getHoldability(), - is(equalTo(verify(underlyingConnection, times(1)).getHoldability()))); - } - - @Test - public void testSetSavepointShouldSetSavepointInUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.setSavepoint(), - is( - allOf( - sameInstance(verify(underlyingConnection, times(1)).setSavepoint()), nullValue()))); - assertThat( - connectionWrapper.setSavepoint(SAVEPOINT_NAME), - is(sameInstance(verify(underlyingConnection, times(1)).setSavepoint(SAVEPOINT_NAME)))); - } - - @Test - public void testRollbackShouldRollbackInUnderlyingConnection() throws SQLException { - connectionWrapper.rollback(null); - verify(underlyingConnection, times(1)).rollback(null); - } - - @Test - public void testReleaseSavepointShouldReleaseSavepointFromUnderlyingConnection() - throws SQLException { - connectionWrapper.releaseSavepoint(null); - verify(underlyingConnection, times(1)).releaseSavepoint(null); - } - - @Test - public void testCreateClobShouldCreateClobFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.createClob(), - is(allOf(sameInstance(verify(underlyingConnection, times(1)).createClob()), nullValue()))); - } - - @Test - public void testCreateBlobShouldCreateBlobFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.createBlob(), - is(allOf(sameInstance(verify(underlyingConnection, times(1)).createBlob()), nullValue()))); - } - - @Test - public void testCreateNClobShouldCreateNClobFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.createNClob(), - is(allOf(sameInstance(verify(underlyingConnection, times(1)).createNClob()), nullValue()))); - } - - @Test - public void testCreateSQLXMLShouldCreateSQLXMLFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.createSQLXML(), - is( - allOf( - sameInstance(verify(underlyingConnection, times(1)).createSQLXML()), nullValue()))); - } - - @Test - public void testIsValidShouldReturnWhetherUnderlyingConnectionIsValid() throws SQLException { - assertThat( - connectionWrapper.isValid(TIMEOUT), - is(verify(underlyingConnection, times(1)).isValid(TIMEOUT))); - } - - @Test - public void testSetClientInfoShouldSetClientInfoInUnderlyingConnection() - throws SQLClientInfoException { - connectionWrapper.setClientInfo(null); - verify(underlyingConnection, times(1)).setClientInfo(null); - } - - @Test - public void testGetClientInfoShouldGetClientInfoFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.getClientInfo(CLIENT_INFO), - is( - allOf( - sameInstance(verify(underlyingConnection, times(1)).getClientInfo(CLIENT_INFO)), - nullValue()))); - assertThat( - connectionWrapper.getClientInfo(), - is( - allOf( - sameInstance(verify(underlyingConnection, times(1)).getClientInfo()), - nullValue()))); - } - - @Test - public void testCreateArrayOfShouldCreateArrayFromUnderlyingConnection() throws SQLException { - final Object[] elements = range(0, 100).boxed().toArray(); - assertThat( - connectionWrapper.createArrayOf(TYPE_NAME, elements), - is( - allOf( - sameInstance( - verify(underlyingConnection, times(1)).createArrayOf(TYPE_NAME, elements)), - nullValue()))); - } - - @Test - public void testCreateStructShouldCreateStructFromUnderlyingConnection() throws SQLException { - final Object[] attributes = range(0, 120).boxed().toArray(); - assertThat( - connectionWrapper.createStruct(TYPE_NAME, attributes), - is( - allOf( - sameInstance( - verify(underlyingConnection, times(1)).createStruct(TYPE_NAME, attributes)), - nullValue()))); - } - - @Test - public void testSetSchemaShouldSetSchemaInUnderlyingConnection() throws SQLException { - connectionWrapper.setSchema(SCHEMA_NAME); - verify(underlyingConnection, times(1)).setSchema(SCHEMA_NAME); - } - - @Test - public void testGetSchemaShouldGetSchemaFromUnderlyingConnection() throws SQLException { - assertThat( - connectionWrapper.getSchema(), - is(allOf(sameInstance(verify(underlyingConnection, times(1)).getSchema()), nullValue()))); - } - - @Test - public void testAbortShouldAbortUnderlyingConnection() throws SQLException { - connectionWrapper.abort(null); - verify(underlyingConnection, times(1)).abort(null); - } - - @Test - public void testSetNetworkTimeoutShouldSetNetworkTimeoutInUnderlyingConnection() - throws SQLException { - connectionWrapper.setNetworkTimeout(null, TIMEOUT); - verify(underlyingConnection, times(1)).setNetworkTimeout(null, TIMEOUT); - } - - @Test - public void testGetNetworkTimeoutShouldGetNetworkTimeoutFromUnderlyingConnection() - throws SQLException { - assertThat( - connectionWrapper.getNetworkTimeout(), - is(equalTo(verify(underlyingConnection, times(1)).getNetworkTimeout()))); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ConvertUtilsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ConvertUtilsTest.java deleted file mode 100644 index f6f549b5ed3d8..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ConvertUtilsTest.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.MatcherAssert.assertThat; - -import com.google.common.collect.ImmutableList; -import java.util.List; -import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.calcite.avatica.ColumnMetaData; -import org.apache.calcite.avatica.proto.Common; -import org.junit.jupiter.api.Test; - -public class ConvertUtilsTest { - - @Test - public void testShouldSetOnColumnMetaDataBuilder() { - - final Common.ColumnMetaData.Builder builder = Common.ColumnMetaData.newBuilder(); - final FlightSqlColumnMetadata expectedColumnMetaData = - new FlightSqlColumnMetadata.Builder() - .catalogName("catalog1") - .schemaName("schema1") - .tableName("table1") - .isAutoIncrement(true) - .isCaseSensitive(true) - .isReadOnly(true) - .isSearchable(true) - .precision(20) - .scale(10) - .build(); - ConvertUtils.setOnColumnMetaDataBuilder(builder, expectedColumnMetaData.getMetadataMap()); - assertBuilder(builder, expectedColumnMetaData); - } - - @Test - public void testShouldConvertArrowFieldsToColumnMetaDataList() { - - final List listField = - ImmutableList.of( - new Field( - "col1", - new FieldType( - true, - ArrowType.Utf8.INSTANCE, - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("catalog1") - .schemaName("schema1") - .tableName("table1") - .build() - .getMetadataMap()), - null)); - - final List expectedColumnMetaData = - ImmutableList.of( - ColumnMetaData.fromProto( - Common.ColumnMetaData.newBuilder() - .setCatalogName("catalog1") - .setSchemaName("schema1") - .setTableName("table1") - .build())); - - final List actualColumnMetaData = - ConvertUtils.convertArrowFieldsToColumnMetaDataList(listField); - assertColumnMetaData(expectedColumnMetaData, actualColumnMetaData); - } - - private void assertColumnMetaData( - final List expected, final List actual) { - assertThat(expected.size(), equalTo(actual.size())); - int size = expected.size(); - for (int i = 0; i < size; i++) { - final ColumnMetaData expectedColumnMetaData = expected.get(i); - final ColumnMetaData actualColumnMetaData = actual.get(i); - assertThat(expectedColumnMetaData.catalogName, equalTo(actualColumnMetaData.catalogName)); - assertThat(expectedColumnMetaData.schemaName, equalTo(actualColumnMetaData.schemaName)); - assertThat(expectedColumnMetaData.tableName, equalTo(actualColumnMetaData.tableName)); - assertThat(expectedColumnMetaData.readOnly, equalTo(actualColumnMetaData.readOnly)); - assertThat(expectedColumnMetaData.autoIncrement, equalTo(actualColumnMetaData.autoIncrement)); - assertThat(expectedColumnMetaData.precision, equalTo(actualColumnMetaData.precision)); - assertThat(expectedColumnMetaData.scale, equalTo(actualColumnMetaData.scale)); - assertThat(expectedColumnMetaData.caseSensitive, equalTo(actualColumnMetaData.caseSensitive)); - assertThat(expectedColumnMetaData.searchable, equalTo(actualColumnMetaData.searchable)); - } - } - - private void assertBuilder( - final Common.ColumnMetaData.Builder builder, - final FlightSqlColumnMetadata flightSqlColumnMetaData) { - - final Integer precision = flightSqlColumnMetaData.getPrecision(); - final Integer scale = flightSqlColumnMetaData.getScale(); - - assertThat(flightSqlColumnMetaData.getCatalogName(), equalTo(builder.getCatalogName())); - assertThat(flightSqlColumnMetaData.getSchemaName(), equalTo(builder.getSchemaName())); - assertThat(flightSqlColumnMetaData.getTableName(), equalTo(builder.getTableName())); - assertThat(flightSqlColumnMetaData.isAutoIncrement(), equalTo(builder.getAutoIncrement())); - assertThat(flightSqlColumnMetaData.isCaseSensitive(), equalTo(builder.getCaseSensitive())); - assertThat(flightSqlColumnMetaData.isSearchable(), equalTo(builder.getSearchable())); - assertThat(flightSqlColumnMetaData.isReadOnly(), equalTo(builder.getReadOnly())); - assertThat(precision == null ? 0 : precision, equalTo(builder.getPrecision())); - assertThat(scale == null ? 0 : scale, equalTo(builder.getScale())); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/CoreMockedSqlProducers.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/CoreMockedSqlProducers.java deleted file mode 100644 index 8197d7d95f09a..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/CoreMockedSqlProducers.java +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static java.lang.String.format; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertAll; - -import com.google.common.collect.ImmutableList; -import java.sql.Date; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.function.Consumer; -import java.util.stream.IntStream; -import org.apache.arrow.flight.FlightProducer.ServerStreamListener; -import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; - -/** Standard {@link MockFlightSqlProducer} instances for tests. */ -// TODO Remove this once all tests are refactor to use only the queries they need. -public final class CoreMockedSqlProducers { - - public static final String LEGACY_REGULAR_SQL_CMD = "SELECT * FROM TEST"; - public static final String LEGACY_METADATA_SQL_CMD = "SELECT * FROM METADATA"; - public static final String LEGACY_CANCELLATION_SQL_CMD = "SELECT * FROM TAKES_FOREVER"; - public static final String LEGACY_REGULAR_WITH_EMPTY_SQL_CMD = "SELECT * FROM TEST_EMPTIES"; - - private CoreMockedSqlProducers() { - // Prevent instantiation. - } - - /** - * Gets the {@link MockFlightSqlProducer} for legacy tests and backward compatibility. - * - * @return a new producer. - */ - public static MockFlightSqlProducer getLegacyProducer() { - - final MockFlightSqlProducer producer = new MockFlightSqlProducer(); - addLegacyRegularSqlCmdSupport(producer); - addLegacyMetadataSqlCmdSupport(producer); - addLegacyCancellationSqlCmdSupport(producer); - addQueryWithEmbeddedEmptyRoot(producer); - return producer; - } - - private static void addQueryWithEmbeddedEmptyRoot(final MockFlightSqlProducer producer) { - final Schema querySchema = - new Schema( - ImmutableList.of( - new Field("ID", new FieldType(true, new ArrowType.Int(64, true), null), null))); - - final List> resultProducers = new ArrayList<>(); - Consumer dataRoot = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(querySchema, allocator)) { - root.allocateNew(); - root.setRowCount(0); - listener.start(root); - listener.putNext(); // empty root - ((BigIntVector) root.getVector("ID")).setSafe(0, 100L); - root.setRowCount(1); - listener.putNext(); // data root - root.clear(); - root.setRowCount(0); - listener.putNext(); // empty root - ((BigIntVector) root.getVector("ID")).setSafe(0, 100L); - root.setRowCount(1); - listener.putNext(); // data root - } finally { - listener.completed(); - } - }; - resultProducers.add(dataRoot); - producer.addSelectQuery(LEGACY_REGULAR_WITH_EMPTY_SQL_CMD, querySchema, resultProducers); - } - - private static void addLegacyRegularSqlCmdSupport(final MockFlightSqlProducer producer) { - final Schema querySchema = - new Schema( - ImmutableList.of( - new Field("ID", new FieldType(true, new ArrowType.Int(64, true), null), null), - new Field("Name", new FieldType(true, new ArrowType.Utf8(), null), null), - new Field("Age", new FieldType(true, new ArrowType.Int(32, false), null), null), - new Field( - "Salary", - new FieldType( - true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null), - null), - new Field( - "Hire Date", new FieldType(true, new ArrowType.Date(DateUnit.DAY), null), null), - new Field( - "Last Sale", - new FieldType(true, new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), null), - null))); - final List> resultProducers = new ArrayList<>(); - IntStream.range(0, 10) - .forEach( - page -> { - resultProducers.add( - listener -> { - final int rowsPerPage = 5000; - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final VectorSchemaRoot root = - VectorSchemaRoot.create(querySchema, allocator)) { - root.allocateNew(); - listener.start(root); - int batchSize = 500; - int indexOnBatch = 0; - int resultsOffset = page * rowsPerPage; - for (int i = 0; i < rowsPerPage; i++) { - ((BigIntVector) root.getVector("ID")) - .setSafe( - indexOnBatch, (long) Integer.MAX_VALUE + 1 + i + resultsOffset); - ((VarCharVector) root.getVector("Name")) - .setSafe(indexOnBatch, new Text("Test Name #" + (resultsOffset + i))); - ((UInt4Vector) root.getVector("Age")) - .setSafe(indexOnBatch, (int) Short.MAX_VALUE + 1 + i + resultsOffset); - ((Float8Vector) root.getVector("Salary")) - .setSafe( - indexOnBatch, - Math.scalb((double) (i + resultsOffset) / 2, i + resultsOffset)); - ((DateDayVector) root.getVector("Hire Date")) - .setSafe(indexOnBatch, i + resultsOffset); - ((TimeStampMilliVector) root.getVector("Last Sale")) - .setSafe(indexOnBatch, Long.MAX_VALUE - i - resultsOffset); - indexOnBatch++; - if (indexOnBatch == batchSize) { - root.setRowCount(indexOnBatch); - if (listener.isCancelled()) { - return; - } - listener.putNext(); - root.allocateNew(); - indexOnBatch = 0; - } - } - if (listener.isCancelled()) { - return; - } - root.setRowCount(indexOnBatch); - listener.putNext(); - } finally { - listener.completed(); - } - }); - }); - producer.addSelectQuery(LEGACY_REGULAR_SQL_CMD, querySchema, resultProducers); - } - - private static void addLegacyMetadataSqlCmdSupport(final MockFlightSqlProducer producer) { - final Schema metadataSchema = - new Schema( - ImmutableList.of( - new Field( - "integer0", - new FieldType( - true, - new ArrowType.Int(64, true), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("CATALOG_NAME_1") - .schemaName("SCHEMA_NAME_1") - .tableName("TABLE_NAME_1") - .typeName("TYPE_NAME_1") - .precision(10) - .scale(0) - .isAutoIncrement(true) - .isCaseSensitive(false) - .isReadOnly(true) - .isSearchable(true) - .build() - .getMetadataMap()), - null), - new Field( - "string1", - new FieldType( - true, - new ArrowType.Utf8(), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("CATALOG_NAME_2") - .schemaName("SCHEMA_NAME_2") - .tableName("TABLE_NAME_2") - .typeName("TYPE_NAME_2") - .precision(65535) - .scale(0) - .isAutoIncrement(false) - .isCaseSensitive(true) - .isReadOnly(false) - .isSearchable(true) - .build() - .getMetadataMap()), - null), - new Field( - "float2", - new FieldType( - true, - new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("CATALOG_NAME_3") - .schemaName("SCHEMA_NAME_3") - .tableName("TABLE_NAME_3") - .typeName("TYPE_NAME_3") - .precision(15) - .scale(20) - .isAutoIncrement(false) - .isCaseSensitive(false) - .isReadOnly(false) - .isSearchable(true) - .build() - .getMetadataMap()), - null))); - final Consumer formula = - listener -> { - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(metadataSchema, allocator)) { - root.allocateNew(); - ((BigIntVector) root.getVector("integer0")).setSafe(0, 1); - ((VarCharVector) root.getVector("string1")).setSafe(0, new Text("teste")); - ((Float4Vector) root.getVector("float2")).setSafe(0, (float) 4.1); - root.setRowCount(1); - listener.start(root); - listener.putNext(); - } finally { - listener.completed(); - } - }; - producer.addSelectQuery( - LEGACY_METADATA_SQL_CMD, metadataSchema, Collections.singletonList(formula)); - } - - private static void addLegacyCancellationSqlCmdSupport(final MockFlightSqlProducer producer) { - producer.addSelectQuery( - LEGACY_CANCELLATION_SQL_CMD, - new Schema( - Collections.singletonList( - new Field( - "integer0", new FieldType(true, new ArrowType.Int(64, true), null), null))), - Collections.singletonList( - listener -> { - // Should keep hanging until canceled. - })); - } - - /** - * Asserts that the values in the provided {@link ResultSet} are expected for the legacy {@link - * MockFlightSqlProducer}. - * - * @param resultSet the result set. - * @throws SQLException on error. - */ - public static void assertLegacyRegularSqlResultSet(final ResultSet resultSet) - throws SQLException { - final int expectedRowCount = 50_000; - - final long[] expectedIds = new long[expectedRowCount]; - final List expectedNames = new ArrayList<>(expectedRowCount); - final int[] expectedAges = new int[expectedRowCount]; - final double[] expectedSalaries = new double[expectedRowCount]; - final List expectedHireDates = new ArrayList<>(expectedRowCount); - final List expectedLastSales = new ArrayList<>(expectedRowCount); - - final long[] actualIds = new long[expectedRowCount]; - final List actualNames = new ArrayList<>(expectedRowCount); - final int[] actualAges = new int[expectedRowCount]; - final double[] actualSalaries = new double[expectedRowCount]; - final List actualHireDates = new ArrayList<>(expectedRowCount); - final List actualLastSales = new ArrayList<>(expectedRowCount); - - int actualRowCount = 0; - - for (; resultSet.next(); actualRowCount++) { - expectedIds[actualRowCount] = (long) Integer.MAX_VALUE + 1 + actualRowCount; - expectedNames.add(format("Test Name #%d", actualRowCount)); - expectedAges[actualRowCount] = (int) Short.MAX_VALUE + 1 + actualRowCount; - expectedSalaries[actualRowCount] = Math.scalb((double) actualRowCount / 2, actualRowCount); - expectedHireDates.add(new Date(86_400_000L * actualRowCount)); - expectedLastSales.add(new Timestamp(Long.MAX_VALUE - actualRowCount)); - - actualIds[actualRowCount] = (long) resultSet.getObject(1); - actualNames.add((String) resultSet.getObject(2)); - actualAges[actualRowCount] = (int) resultSet.getObject(3); - actualSalaries[actualRowCount] = (double) resultSet.getObject(4); - actualHireDates.add((Date) resultSet.getObject(5)); - actualLastSales.add((Timestamp) resultSet.getObject(6)); - } - - final int finalActualRowCount = actualRowCount; - assertAll( - "ResultSet values are as expected", - () -> assertThat(finalActualRowCount, is(equalTo(expectedRowCount))), - () -> assertThat(actualIds, is(expectedIds)), - () -> assertThat(actualNames, is(expectedNames)), - () -> assertThat(actualAges, is(expectedAges)), - () -> assertThat(actualSalaries, is(expectedSalaries)), - () -> assertThat(actualHireDates, is(expectedHireDates)), - () -> assertThat(actualLastSales, is(expectedLastSales))); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtilsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtilsTest.java deleted file mode 100644 index 9c6635202349b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/DateTimeUtilsTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.Timestamp; -import java.time.Instant; -import java.util.Calendar; -import java.util.TimeZone; -import org.junit.jupiter.api.Test; - -public class DateTimeUtilsTest { - - private final TimeZone defaultTimezone = TimeZone.getTimeZone("UTC"); - private final TimeZone alternateTimezone = TimeZone.getTimeZone("America/Vancouver"); - private final long positiveEpochMilli = 959817600000L; // 2000-06-01 00:00:00 UTC - private final long negativeEpochMilli = -618105600000L; // 1950-06-01 00:00:00 UTC - - @Test - public void testShouldGetOffsetWithSameTimeZone() { - final TimeZone currentTimezone = TimeZone.getDefault(); - - final long epochMillis = positiveEpochMilli; - final long offset = defaultTimezone.getOffset(epochMillis); - - TimeZone.setDefault(defaultTimezone); - - try { // Trying to guarantee timezone returns to its original value - final long expected = epochMillis + offset; - final long actual = - DateTimeUtils.applyCalendarOffset(epochMillis, Calendar.getInstance(defaultTimezone)); - - assertThat(actual, is(expected)); - } finally { - // Reset Timezone - TimeZone.setDefault(currentTimezone); - } - } - - @Test - public void testShouldGetOffsetWithDifferentTimeZone() { - final TimeZone currentTimezone = TimeZone.getDefault(); - - final long epochMillis = negativeEpochMilli; - final long offset = alternateTimezone.getOffset(epochMillis); - - TimeZone.setDefault(alternateTimezone); - - try { // Trying to guarantee timezone returns to its original value - final long expectedEpochMillis = epochMillis + offset; - final long actualEpochMillis = - DateTimeUtils.applyCalendarOffset(epochMillis, Calendar.getInstance(defaultTimezone)); - - assertThat(actualEpochMillis, is(expectedEpochMillis)); - } finally { - // Reset Timezone - TimeZone.setDefault(currentTimezone); - } - } - - @Test - public void testShouldGetTimestampPositive() { - long epochMilli = positiveEpochMilli; - final Instant instant = Instant.ofEpochMilli(epochMilli); - - final Timestamp expected = Timestamp.from(instant); - final Timestamp actual = DateTimeUtils.getTimestampValue(epochMilli); - - assertThat(expected, is(actual)); - } - - @Test - public void testShouldGetTimestampNegative() { - final long epochMilli = negativeEpochMilli; - final Instant instant = Instant.ofEpochMilli(epochMilli); - - final Timestamp expected = Timestamp.from(instant); - final Timestamp actual = DateTimeUtils.getTimestampValue(epochMilli); - - assertThat(expected, is(actual)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FallbackFlightSqlProducer.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FallbackFlightSqlProducer.java deleted file mode 100644 index 9aa257172cc22..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FallbackFlightSqlProducer.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import com.google.protobuf.Any; -import com.google.protobuf.ByteString; -import com.google.protobuf.Message; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.BasicFlightSqlProducer; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; - -public class FallbackFlightSqlProducer extends BasicFlightSqlProducer { - private final VectorSchemaRoot data; - - public FallbackFlightSqlProducer(VectorSchemaRoot resultData) { - this.data = resultData; - } - - @Override - protected List determineEndpoints( - T request, FlightDescriptor flightDescriptor, Schema schema) { - return Collections.emptyList(); - } - - @Override - public void createPreparedStatement( - FlightSql.ActionCreatePreparedStatementRequest request, - CallContext context, - StreamListener listener) { - final FlightSql.ActionCreatePreparedStatementResult.Builder resultBuilder = - FlightSql.ActionCreatePreparedStatementResult.newBuilder() - .setPreparedStatementHandle(request.getQueryBytes()); - - final ByteString datasetSchemaBytes = - ByteString.copyFrom(data.getSchema().serializeAsMessage()); - - resultBuilder.setDatasetSchema(datasetSchemaBytes); - listener.onNext(new Result(Any.pack(resultBuilder.build()).toByteArray())); - listener.onCompleted(); - } - - @Override - public FlightInfo getFlightInfoStatement( - FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) { - return getFlightInfo(descriptor, command.getQuery()); - } - - @Override - public FlightInfo getFlightInfoPreparedStatement( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - FlightDescriptor descriptor) { - return getFlightInfo(descriptor, command.getPreparedStatementHandle().toStringUtf8()); - } - - @Override - public void getStreamStatement( - FlightSql.TicketStatementQuery ticket, CallContext context, ServerStreamListener listener) { - listener.start(data); - listener.putNext(); - listener.completed(); - } - - @Override - public void closePreparedStatement( - FlightSql.ActionClosePreparedStatementRequest request, - CallContext context, - StreamListener listener) { - listener.onCompleted(); - } - - private FlightInfo getFlightInfo(FlightDescriptor descriptor, String query) { - final List endpoints; - final Ticket ticket = - new Ticket(Any.pack(FlightSql.TicketStatementQuery.getDefaultInstance()).toByteArray()); - if (query.equals("fallback")) { - endpoints = - Collections.singletonList( - FlightEndpoint.builder(ticket, Location.reuseConnection()).build()); - } else if (query.equals("fallback with error")) { - endpoints = - Collections.singletonList( - FlightEndpoint.builder( - ticket, - Location.forGrpcInsecure("localhost", 9999), - Location.reuseConnection()) - .build()); - } else { - throw CallStatus.UNIMPLEMENTED.withDescription(query).toRuntimeException(); - } - return FlightInfo.builder(data.getSchema(), descriptor, endpoints).build(); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueueTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueueTest.java deleted file mode 100644 index f40610a0320ab..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueueTest.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.nullValue; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.mockito.Mockito.mock; - -import java.util.concurrent.CompletionService; -import org.apache.arrow.driver.jdbc.client.CloseableEndpointStreamPair; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; - -/** Tests for {@link FlightEndpointDataQueue}. */ -@ExtendWith(MockitoExtension.class) -public class FlightEndpointDataQueueTest { - - @Mock private CompletionService mockedService; - private FlightEndpointDataQueue queue; - - @BeforeEach - public void setUp() { - queue = new FlightEndpointDataQueue(mockedService); - } - - @Test - public void testNextShouldRetrieveNullIfEmpty() throws Exception { - assertThat(queue.next(), is(nullValue())); - } - - @Test - public void testNextShouldThrowExceptionUponClose() throws Exception { - queue.close(); - ThrowableAssertionUtils.simpleAssertThrowableClass( - IllegalStateException.class, () -> queue.next()); - } - - @Test - public void testEnqueueShouldThrowExceptionUponClose() throws Exception { - queue.close(); - ThrowableAssertionUtils.simpleAssertThrowableClass( - IllegalStateException.class, () -> queue.enqueue(mock(CloseableEndpointStreamPair.class))); - } - - @Test - public void testCheckOpen() throws Exception { - assertDoesNotThrow( - () -> { - queue.checkOpen(); - return true; - }); - queue.close(); - ThrowableAssertionUtils.simpleAssertThrowableClass( - IllegalStateException.class, () -> queue.checkOpen()); - } - - @Test - public void testShouldCloseQueue() throws Exception { - assertThat(queue.isClosed(), is(false)); - queue.close(); - assertThat(queue.isClosed(), is(true)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightSqlTestCertificates.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightSqlTestCertificates.java deleted file mode 100644 index b72dcef743329..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FlightSqlTestCertificates.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.io.File; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; - -/** Utility class for unit tests that need to reference the certificate params. */ -public class FlightSqlTestCertificates { - - public static final String TEST_DATA_ENV_VAR = "ARROW_TEST_DATA"; - public static final String TEST_DATA_PROPERTY = "arrow.test.dataRoot"; - - static Path getTestDataRoot() { - String path = System.getenv(TEST_DATA_ENV_VAR); - if (path == null) { - path = System.getProperty(TEST_DATA_PROPERTY); - } - return Paths.get( - Objects.requireNonNull( - path, - String.format( - "Could not find test data path. Set the environment variable %s or the JVM property %s.", - TEST_DATA_ENV_VAR, TEST_DATA_PROPERTY))); - } - - /** - * Get the Path from the Files to be used in the encrypted test of Flight. - * - * @return the Path from the Files with certificates and keys. - */ - static Path getFlightTestDataRoot() { - return getTestDataRoot().resolve("flight"); - } - - /** - * Create File object with the CA certificate. - * - * @return A File containing the Root CA certificate. - */ - public static File exampleCACert() { - final Path root = getFlightTestDataRoot(); - return root.resolve("root-ca.pem").toFile(); - } - - /** - * Create CertKeyPair object with the certificates and keys. - * - * @return A list with CertKeyPair. - */ - public static List exampleTlsCerts() { - final Path root = getFlightTestDataRoot(); - return Arrays.asList( - new CertKeyPair(root.resolve("cert0.pem").toFile(), root.resolve("cert0.pkcs1").toFile()), - new CertKeyPair(root.resolve("cert1.pem").toFile(), root.resolve("cert1.pkcs1").toFile())); - } - - public static class CertKeyPair { - - public final File cert; - public final File key; - - public CertKeyPair(File cert, File key) { - this.cert = cert; - this.key = key; - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java deleted file mode 100644 index a8874c48693f1..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java +++ /dev/null @@ -1,689 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static com.google.protobuf.Any.pack; -import static com.google.protobuf.ByteString.copyFrom; -import static java.lang.String.format; -import static java.util.UUID.randomUUID; -import static java.util.stream.Collectors.toList; - -import com.google.protobuf.Any; -import com.google.protobuf.ByteString; -import com.google.protobuf.Message; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import java.util.AbstractMap.SimpleImmutableEntry; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.UUID; -import java.util.function.BiConsumer; -import java.util.function.Consumer; -import java.util.stream.IntStream; -import org.apache.arrow.flight.Action; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.SchemaResult; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.flight.sql.SqlInfoBuilder; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionClosePreparedStatementRequest; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementRequest; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCatalogs; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCrossReference; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetDbSchemas; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetExportedKeys; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetImportedKeys; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetPrimaryKeys; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetSqlInfo; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTableTypes; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate; -import org.apache.arrow.flight.sql.impl.FlightSql.DoPutUpdateResult; -import org.apache.arrow.flight.sql.impl.FlightSql.TicketStatementQuery; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.WriteChannel; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.calcite.avatica.Meta.StatementType; - -/** An ad-hoc {@link FlightSqlProducer} for tests. */ -public final class MockFlightSqlProducer implements FlightSqlProducer { - - private final Map>> queryResults = new HashMap<>(); - private final Map> selectResultProviders = new HashMap<>(); - private final Map preparedStatements = new HashMap<>(); - private final Map> catalogQueriesResults = - new HashMap<>(); - private final Map>> - updateResultProviders = new HashMap<>(); - private final SqlInfoBuilder sqlInfoBuilder = new SqlInfoBuilder(); - private final Map parameterSchemas = new HashMap<>(); - private final Map>> expectedParameterValues = new HashMap<>(); - - private final Map actionTypeCounter = new HashMap<>(); - - private static FlightInfo getFlightInfoExportedAndImportedKeys( - final Message message, final FlightDescriptor descriptor) { - return getFlightInfo(message, Schemas.GET_IMPORTED_KEYS_SCHEMA, descriptor); - } - - private static FlightInfo getFlightInfo( - final Message message, final Schema schema, final FlightDescriptor descriptor) { - return new FlightInfo( - schema, - descriptor, - Collections.singletonList(new FlightEndpoint(new Ticket(Any.pack(message).toByteArray()))), - -1, - -1); - } - - public static ByteBuffer serializeSchema(final Schema schema) { - final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - try { - MessageSerializer.serialize(new WriteChannel(Channels.newChannel(outputStream)), schema); - - return ByteBuffer.wrap(outputStream.toByteArray()); - } catch (final IOException e) { - throw new RuntimeException("Failed to serialize schema", e); - } - } - - /** - * Registers a new {@link StatementType#SELECT} SQL query. - * - * @param sqlCommand the SQL command under which to register the new query. - * @param schema the schema to use for the query result. - * @param resultProviders the result provider for this query. - */ - public void addSelectQuery( - final String sqlCommand, - final Schema schema, - final List> resultProviders) { - final int providers = resultProviders.size(); - final List uuids = - IntStream.range(0, providers) - .mapToObj(index -> new UUID(sqlCommand.hashCode(), Integer.hashCode(index))) - .collect(toList()); - queryResults.put(sqlCommand, new SimpleImmutableEntry<>(schema, uuids)); - IntStream.range(0, providers) - .forEach( - index -> this.selectResultProviders.put(uuids.get(index), resultProviders.get(index))); - } - - /** - * Registers a new {@link StatementType#UPDATE} SQL query. - * - * @param sqlCommand the SQL command. - * @param updatedRows the number of rows affected. - */ - public void addUpdateQuery(final String sqlCommand, final long updatedRows) { - addUpdateQuery( - sqlCommand, - (flightStream, putResultStreamListener) -> { - final DoPutUpdateResult result = - DoPutUpdateResult.newBuilder().setRecordCount(updatedRows).build(); - try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - final ArrowBuf buffer = allocator.buffer(result.getSerializedSize())) { - buffer.writeBytes(result.toByteArray()); - putResultStreamListener.onNext(PutResult.metadata(buffer)); - } catch (final Throwable throwable) { - putResultStreamListener.onError(throwable); - } finally { - putResultStreamListener.onCompleted(); - } - }); - } - - /** - * Adds a catalog query to the results. - * - * @param message the {@link Message} corresponding to the catalog query request type to register. - * @param resultsProvider the results provider. - */ - public void addCatalogQuery( - final Message message, final Consumer resultsProvider) { - catalogQueriesResults.put(message, resultsProvider); - } - - /** - * Registers a new {@link StatementType#UPDATE} SQL query. - * - * @param sqlCommand the SQL command. - * @param resultsProvider consumer for producing update results. - */ - void addUpdateQuery( - final String sqlCommand, - final BiConsumer> resultsProvider) { - Preconditions.checkState( - updateResultProviders.putIfAbsent(sqlCommand, resultsProvider) == null, - format("Attempted to overwrite preexisting query: <%s>.", sqlCommand)); - } - - /** Registers parameters expected to be provided with a prepared statement. */ - public void addExpectedParameters( - String query, Schema parameterSchema, List> expectedValues) { - parameterSchemas.put(query, parameterSchema); - expectedParameterValues.put(query, expectedValues); - } - - @Override - public void createPreparedStatement( - final ActionCreatePreparedStatementRequest request, - final CallContext callContext, - final StreamListener listener) { - try { - final ByteString preparedStatementHandle = - copyFrom(randomUUID().toString().getBytes(StandardCharsets.UTF_8)); - final String query = request.getQuery(); - - final ActionCreatePreparedStatementResult.Builder resultBuilder = - ActionCreatePreparedStatementResult.newBuilder() - .setPreparedStatementHandle(preparedStatementHandle); - - final Entry> entry = queryResults.get(query); - if (entry != null) { - preparedStatements.put(preparedStatementHandle, query); - - final Schema datasetSchema = entry.getKey(); - final ByteString datasetSchemaBytes = ByteString.copyFrom(serializeSchema(datasetSchema)); - - resultBuilder.setDatasetSchema(datasetSchemaBytes); - } else if (updateResultProviders.containsKey(query)) { - preparedStatements.put(preparedStatementHandle, query); - - } else { - listener.onError( - CallStatus.INVALID_ARGUMENT.withDescription("Query not found").toRuntimeException()); - return; - } - - final Schema parameterSchema = parameterSchemas.get(query); - if (parameterSchema != null) { - final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - MessageSerializer.serialize( - new WriteChannel(Channels.newChannel(outputStream)), parameterSchema); - resultBuilder.setParameterSchema(ByteString.copyFrom(outputStream.toByteArray())); - } - - listener.onNext(new Result(pack(resultBuilder.build()).toByteArray())); - } catch (final Throwable t) { - listener.onError(t); - } finally { - listener.onCompleted(); - } - } - - @Override - public void closePreparedStatement( - final ActionClosePreparedStatementRequest actionClosePreparedStatementRequest, - final CallContext callContext, - final StreamListener streamListener) { - // TODO Implement this method. - streamListener.onCompleted(); - } - - @Override - public FlightInfo getFlightInfoStatement( - final CommandStatementQuery commandStatementQuery, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - final String query = commandStatementQuery.getQuery(); - final Entry> queryInfo = - Preconditions.checkNotNull( - queryResults.get(query), format("Query not registered: <%s>.", query)); - final List endpoints = - queryInfo.getValue().stream() - .map(TicketConversionUtils::getTicketBytesFromUuid) - .map(TicketConversionUtils::getTicketStatementQueryFromHandle) - .map(TicketConversionUtils::getEndpointFromMessage) - .collect(toList()); - return FlightInfo.builder(queryInfo.getKey(), flightDescriptor, endpoints) - .setAppMetadata("foo".getBytes(StandardCharsets.UTF_8)) - .build(); - } - - @Override - public FlightInfo getFlightInfoPreparedStatement( - final CommandPreparedStatementQuery commandPreparedStatementQuery, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - final ByteString preparedStatementHandle = - commandPreparedStatementQuery.getPreparedStatementHandle(); - - final String query = - Preconditions.checkNotNull( - preparedStatements.get(preparedStatementHandle), - format("No query registered under handle: <%s>.", preparedStatementHandle)); - final Entry> queryInfo = - Preconditions.checkNotNull( - queryResults.get(query), format("Query not registered: <%s>.", query)); - final List endpoints = - queryInfo.getValue().stream() - .map(TicketConversionUtils::getTicketBytesFromUuid) - .map(TicketConversionUtils::getCommandPreparedStatementQueryFromHandle) - .map(TicketConversionUtils::getEndpointFromMessage) - .collect(toList()); - return FlightInfo.builder(queryInfo.getKey(), flightDescriptor, endpoints) - .setAppMetadata("foo".getBytes(StandardCharsets.UTF_8)) - .build(); - } - - @Override - public SchemaResult getSchemaStatement( - final CommandStatementQuery commandStatementQuery, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - final String query = commandStatementQuery.getQuery(); - final Entry> queryInfo = - Preconditions.checkNotNull( - queryResults.get(query), format("Query not registered: <%s>.", query)); - - return new SchemaResult(queryInfo.getKey()); - } - - @Override - public void getStreamStatement( - final TicketStatementQuery ticketStatementQuery, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - final UUID uuid = UUID.fromString(ticketStatementQuery.getStatementHandle().toStringUtf8()); - Preconditions.checkNotNull( - selectResultProviders.get(uuid), - "No consumer was registered for the specified UUID: <%s>.", - uuid) - .accept(serverStreamListener); - } - - @Override - public void getStreamPreparedStatement( - final CommandPreparedStatementQuery commandPreparedStatementQuery, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - final UUID uuid = - UUID.fromString(commandPreparedStatementQuery.getPreparedStatementHandle().toStringUtf8()); - Preconditions.checkNotNull( - selectResultProviders.get(uuid), - "No consumer was registered for the specified UUID: <%s>.", - uuid) - .accept(serverStreamListener); - } - - @Override - public Runnable acceptPutStatement( - final CommandStatementUpdate commandStatementUpdate, - final CallContext callContext, - final FlightStream flightStream, - final StreamListener streamListener) { - return () -> { - final String query = commandStatementUpdate.getQuery(); - final BiConsumer> resultProvider = - Preconditions.checkNotNull( - updateResultProviders.get(query), - format("No consumer found for query: <%s>.", query)); - resultProvider.accept(flightStream, streamListener); - }; - } - - private boolean validateParameters( - String query, FlightStream flightStream, StreamListener streamListener) { - final List> expectedValues = expectedParameterValues.get(query); - if (expectedValues != null) { - int index = 0; - while (flightStream.next()) { - final VectorSchemaRoot root = flightStream.getRoot(); - for (int i = 0; i < root.getRowCount(); i++) { - if (index >= expectedValues.size()) { - streamListener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("More parameter rows provided than expected") - .toRuntimeException()); - return true; - } - List expectedRow = expectedValues.get(index++); - if (root.getFieldVectors().size() != expectedRow.size()) { - streamListener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Parameter count mismatch") - .toRuntimeException()); - return true; - } - - for (int paramIndex = 0; paramIndex < expectedRow.size(); paramIndex++) { - Object expected = expectedRow.get(paramIndex); - Object actual = root.getVector(paramIndex).getObject(i); - boolean matches; - if (expected.getClass().isArray()) { - matches = - Arrays.equals((Object[]) expected, ((JsonStringArrayList) actual).toArray()); - } else { - matches = Objects.equals(expected, actual); - } - if (!matches) { - streamListener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription( - String.format( - "Parameter mismatch. Expected: %s Actual: %s", expected, actual)) - .toRuntimeException()); - return true; - } - } - } - } - if (index < expectedValues.size()) { - streamListener.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Fewer parameter rows provided than expected") - .toRuntimeException()); - return true; - } - } - return false; - } - - @Override - public Runnable acceptPutPreparedStatementUpdate( - final CommandPreparedStatementUpdate commandPreparedStatementUpdate, - final CallContext callContext, - final FlightStream flightStream, - final StreamListener streamListener) { - final ByteString handle = commandPreparedStatementUpdate.getPreparedStatementHandle(); - final String query = - Preconditions.checkNotNull( - preparedStatements.get(handle), - format("No query registered under handle: <%s>.", handle)); - - if (validateParameters(query, flightStream, streamListener)) { - return () -> {}; - } - - return acceptPutStatement( - CommandStatementUpdate.newBuilder().setQuery(query).build(), - callContext, - flightStream, - streamListener); - } - - @Override - public Runnable acceptPutPreparedStatementQuery( - final CommandPreparedStatementQuery commandPreparedStatementQuery, - final CallContext callContext, - final FlightStream flightStream, - final StreamListener streamListener) { - final ByteString handle = commandPreparedStatementQuery.getPreparedStatementHandle(); - final String query = - Preconditions.checkNotNull( - preparedStatements.get(handle), - format("No query registered under handle: <%s>.", handle)); - - if (validateParameters(query, flightStream, streamListener)) { - return () -> {}; - } - - return streamListener::onCompleted; - } - - @Override - public FlightInfo getFlightInfoSqlInfo( - final CommandGetSqlInfo commandGetSqlInfo, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfo(commandGetSqlInfo, Schemas.GET_SQL_INFO_SCHEMA, flightDescriptor); - } - - @Override - public void getStreamSqlInfo( - final CommandGetSqlInfo commandGetSqlInfo, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - sqlInfoBuilder.send(commandGetSqlInfo.getInfoList(), serverStreamListener); - } - - @Override - public FlightInfo getFlightInfoTypeInfo( - FlightSql.CommandGetXdbcTypeInfo request, CallContext context, FlightDescriptor descriptor) { - // TODO Implement this - return null; - } - - @Override - public void getStreamTypeInfo( - FlightSql.CommandGetXdbcTypeInfo request, - CallContext context, - ServerStreamListener listener) { - // TODO Implement this - } - - @Override - public FlightInfo getFlightInfoCatalogs( - final CommandGetCatalogs commandGetCatalogs, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfo(commandGetCatalogs, Schemas.GET_CATALOGS_SCHEMA, flightDescriptor); - } - - @Override - public void getStreamCatalogs( - final CallContext callContext, final ServerStreamListener serverStreamListener) { - final CommandGetCatalogs command = CommandGetCatalogs.getDefaultInstance(); - getStreamCatalogFunctions(command, serverStreamListener); - } - - @Override - public FlightInfo getFlightInfoSchemas( - final CommandGetDbSchemas commandGetSchemas, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfo(commandGetSchemas, Schemas.GET_SCHEMAS_SCHEMA, flightDescriptor); - } - - @Override - public void getStreamSchemas( - final CommandGetDbSchemas commandGetSchemas, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - getStreamCatalogFunctions(commandGetSchemas, serverStreamListener); - } - - @Override - public FlightInfo getFlightInfoTables( - final CommandGetTables commandGetTables, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfo(commandGetTables, Schemas.GET_TABLES_SCHEMA_NO_SCHEMA, flightDescriptor); - } - - @Override - public void getStreamTables( - final CommandGetTables commandGetTables, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - getStreamCatalogFunctions(commandGetTables, serverStreamListener); - } - - @Override - public FlightInfo getFlightInfoTableTypes( - final CommandGetTableTypes commandGetTableTypes, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfo(commandGetTableTypes, Schemas.GET_TABLE_TYPES_SCHEMA, flightDescriptor); - } - - @Override - public void getStreamTableTypes( - final CallContext callContext, final ServerStreamListener serverStreamListener) { - final CommandGetTableTypes command = CommandGetTableTypes.getDefaultInstance(); - getStreamCatalogFunctions(command, serverStreamListener); - } - - @Override - public FlightInfo getFlightInfoPrimaryKeys( - final CommandGetPrimaryKeys commandGetPrimaryKeys, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfo(commandGetPrimaryKeys, Schemas.GET_PRIMARY_KEYS_SCHEMA, flightDescriptor); - } - - @Override - public void getStreamPrimaryKeys( - final CommandGetPrimaryKeys commandGetPrimaryKeys, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - getStreamCatalogFunctions(commandGetPrimaryKeys, serverStreamListener); - } - - @Override - public FlightInfo getFlightInfoExportedKeys( - final CommandGetExportedKeys commandGetExportedKeys, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfoExportedAndImportedKeys(commandGetExportedKeys, flightDescriptor); - } - - @Override - public FlightInfo getFlightInfoImportedKeys( - final CommandGetImportedKeys commandGetImportedKeys, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfoExportedAndImportedKeys(commandGetImportedKeys, flightDescriptor); - } - - @Override - public FlightInfo getFlightInfoCrossReference( - final CommandGetCrossReference commandGetCrossReference, - final CallContext callContext, - final FlightDescriptor flightDescriptor) { - return getFlightInfoExportedAndImportedKeys(commandGetCrossReference, flightDescriptor); - } - - @Override - public void getStreamExportedKeys( - final CommandGetExportedKeys commandGetExportedKeys, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - getStreamCatalogFunctions(commandGetExportedKeys, serverStreamListener); - } - - @Override - public void getStreamImportedKeys( - final CommandGetImportedKeys commandGetImportedKeys, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - getStreamCatalogFunctions(commandGetImportedKeys, serverStreamListener); - } - - @Override - public void getStreamCrossReference( - final CommandGetCrossReference commandGetCrossReference, - final CallContext callContext, - final ServerStreamListener serverStreamListener) { - getStreamCatalogFunctions(commandGetCrossReference, serverStreamListener); - } - - @Override - public void close() { - // TODO No-op. - } - - @Override - public void listFlights( - final CallContext callContext, - final Criteria criteria, - final StreamListener streamListener) { - // TODO Implement this method. - throw CallStatus.UNIMPLEMENTED.toRuntimeException(); - } - - @Override - public void doAction(CallContext context, Action action, StreamListener listener) { - FlightSqlProducer.super.doAction(context, action, listener); - actionTypeCounter.put( - action.getType(), actionTypeCounter.getOrDefault(action.getType(), 0) + 1); - } - - /** - * Clear the `actionTypeCounter` map and restore to its default state. Intended to be used in - * tests. - */ - public void clearActionTypeCounter() { - actionTypeCounter.clear(); - } - - public Map getActionTypeCounter() { - return actionTypeCounter; - } - - private void getStreamCatalogFunctions( - final Message ticket, final ServerStreamListener serverStreamListener) { - Preconditions.checkNotNull( - catalogQueriesResults.get(ticket), - format("Query not registered for ticket: <%s>", ticket)) - .accept(serverStreamListener); - } - - public SqlInfoBuilder getSqlInfoBuilder() { - return sqlInfoBuilder; - } - - private static final class TicketConversionUtils { - private TicketConversionUtils() { - // Prevent instantiation. - } - - private static ByteString getTicketBytesFromUuid(final UUID uuid) { - return ByteString.copyFromUtf8(uuid.toString()); - } - - private static TicketStatementQuery getTicketStatementQueryFromHandle(final ByteString handle) { - return TicketStatementQuery.newBuilder().setStatementHandle(handle).build(); - } - - private static CommandPreparedStatementQuery getCommandPreparedStatementQueryFromHandle( - final ByteString handle) { - return CommandPreparedStatementQuery.newBuilder().setPreparedStatementHandle(handle).build(); - } - - private static FlightEndpoint getEndpointFromMessage(final Message message) { - return new FlightEndpoint(new Ticket(Any.pack(message).toByteArray())); - } - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/PartitionedFlightSqlProducer.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/PartitionedFlightSqlProducer.java deleted file mode 100644 index d96fef2bbdd58..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/PartitionedFlightSqlProducer.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static com.google.protobuf.Any.pack; - -import com.google.protobuf.ByteString; -import com.google.protobuf.Message; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.NoOpFlightProducer; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.BasicFlightSqlProducer; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; - -public class PartitionedFlightSqlProducer extends BasicFlightSqlProducer { - - /** A minimal FlightProducer intended to just serve data when given the correct Ticket. */ - public static class DataOnlyFlightSqlProducer extends NoOpFlightProducer { - private final Ticket ticket; - private final VectorSchemaRoot data; - - public DataOnlyFlightSqlProducer(Ticket ticket, VectorSchemaRoot data) { - this.ticket = ticket; - this.data = data; - } - - @Override - public void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - if (!Arrays.equals(ticket.getBytes(), this.ticket.getBytes())) { - listener.error( - CallStatus.INVALID_ARGUMENT.withDescription("Illegal ticket.").toRuntimeException()); - return; - } - - listener.start(data); - listener.putNext(); - listener.completed(); - } - } - - private final List endpoints; - - private final Schema schema; - - public PartitionedFlightSqlProducer(Schema schema, FlightEndpoint... endpoints) { - this.schema = schema; - this.endpoints = Arrays.asList(endpoints); - } - - @Override - protected List determineEndpoints( - T request, FlightDescriptor flightDescriptor, Schema schema) { - return endpoints; - } - - @Override - public void createPreparedStatement( - FlightSql.ActionCreatePreparedStatementRequest request, - CallContext context, - StreamListener listener) { - final FlightSql.ActionCreatePreparedStatementResult.Builder resultBuilder = - FlightSql.ActionCreatePreparedStatementResult.newBuilder() - .setPreparedStatementHandle(ByteString.EMPTY); - - final ByteString datasetSchemaBytes = ByteString.copyFrom(schema.serializeAsMessage()); - - resultBuilder.setDatasetSchema(datasetSchemaBytes); - listener.onNext(new Result(pack(resultBuilder.build()).toByteArray())); - listener.onCompleted(); - } - - @Override - public FlightInfo getFlightInfoStatement( - FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) { - return FlightInfo.builder(schema, descriptor, endpoints).build(); - } - - @Override - public FlightInfo getFlightInfoPreparedStatement( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - FlightDescriptor descriptor) { - return FlightInfo.builder(schema, descriptor, endpoints).build(); - } - - @Override - public void closePreparedStatement( - FlightSql.ActionClosePreparedStatementRequest request, - CallContext context, - StreamListener listener) { - listener.onCompleted(); - } - - // Note -- getStream() is intentionally not implemented. -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ResultSetTestUtils.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ResultSetTestUtils.java deleted file mode 100644 index bcefbf68bc25b..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ResultSetTestUtils.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static java.util.stream.IntStream.range; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Function; - -/** - * Utility class for testing that require asserting that the values in a {@link ResultSet} are - * expected. - */ -public final class ResultSetTestUtils { - - /** - * Checks that the values (rows and columns) in the provided {@link ResultSet} are expected. - * - * @param resultSet the {@code ResultSet} to assert. - * @param expectedResults the rows and columns representing the only values the {@code resultSet} - * is expected to have. - * @param the type to be found in the expected results for the {@code resultSet}. - * @throws SQLException if querying the {@code ResultSet} fails at some point unexpectedly. - */ - public static void testData(final ResultSet resultSet, final List> expectedResults) - throws SQLException { - testData( - resultSet, - range(1, resultSet.getMetaData().getColumnCount() + 1).toArray(), - expectedResults); - } - - /** - * Checks that the values (rows and columns) in the provided {@link ResultSet} are expected. - * - * @param resultSet the {@code ResultSet} to assert. - * @param columnNames the column names to fetch in the {@code ResultSet} for comparison. - * @param expectedResults the rows and columns representing the only values the {@code resultSet} - * is expected to have. - * @param the type to be found in the expected results for the {@code resultSet}. - * @throws SQLException if querying the {@code ResultSet} fails at some point unexpectedly. - */ - @SuppressWarnings("unchecked") - public static void testData( - final ResultSet resultSet, - final List columnNames, - final List> expectedResults) - throws SQLException { - testData( - resultSet, - data -> { - final List columns = new ArrayList<>(); - for (final String columnName : columnNames) { - try { - columns.add((T) resultSet.getObject(columnName)); - } catch (final SQLException e) { - throw new RuntimeException(e); - } - } - return columns; - }, - expectedResults); - } - - /** - * Checks that the values (rows and columns) in the provided {@link ResultSet} are expected. - * - * @param resultSet the {@code ResultSet} to assert. - * @param columnIndices the column indices to fetch in the {@code ResultSet} for comparison. - * @param expectedResults the rows and columns representing the only values the {@code resultSet} - * is expected to have. - * @param the type to be found in the expected results for the {@code resultSet}. - * @throws SQLException if querying the {@code ResultSet} fails at some point unexpectedly. - */ - @SuppressWarnings("unchecked") - public static void testData( - final ResultSet resultSet, final int[] columnIndices, final List> expectedResults) - throws SQLException { - testData( - resultSet, - data -> { - final List columns = new ArrayList<>(); - for (final int columnIndex : columnIndices) { - try { - columns.add((T) resultSet.getObject(columnIndex)); - } catch (final SQLException e) { - throw new RuntimeException(e); - } - } - return columns; - }, - expectedResults); - } - - /** - * Checks that the values (rows and columns) in the provided {@link ResultSet} are expected. - * - * @param resultSet the {@code ResultSet} to assert. - * @param dataConsumer the column indices to fetch in the {@code ResultSet} for comparison. - * @param expectedResults the rows and columns representing the only values the {@code resultSet} - * is expected to have. - * @param the type to be found in the expected results for the {@code resultSet}. - * @throws SQLException if querying the {@code ResultSet} fails at some point unexpectedly. - */ - public static void testData( - final ResultSet resultSet, - final Function> dataConsumer, - final List> expectedResults) - throws SQLException { - final List> actualResults = new ArrayList<>(); - while (resultSet.next()) { - actualResults.add(dataConsumer.apply(resultSet)); - } - assertThat(actualResults, is(expectedResults)); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestExtension.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestExtension.java deleted file mode 100644 index 347e92a16c4e5..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestExtension.java +++ /dev/null @@ -1,814 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.util.Random; -import java.util.concurrent.TimeUnit; -import java.util.stream.IntStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter; -import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.junit.jupiter.api.extension.AfterAllCallback; -import org.junit.jupiter.api.extension.BeforeAllCallback; -import org.junit.jupiter.api.extension.ExtensionContext; - -public class RootAllocatorTestExtension - implements BeforeAllCallback, AfterAllCallback, AutoCloseable { - - public static final byte MAX_VALUE = Byte.MAX_VALUE; - private final BufferAllocator rootAllocator = new RootAllocator(); - - private final Random random = new Random(10); - - @Override - public void beforeAll(ExtensionContext context) { - // no-op - } - - @Override - public void afterAll(ExtensionContext context) { - try { - close(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - public BufferAllocator getRootAllocator() { - return rootAllocator; - } - - @Override - public void close() throws Exception { - this.rootAllocator.getChildAllocators().forEach(BufferAllocator::close); - AutoCloseables.close(this.rootAllocator); - } - - /** - * Create a Float8Vector to be used in the accessor tests. - * - * @return Float8Vector - */ - public Float8Vector createFloat8Vector() { - double[] doubleVectorValues = - new double[] { - 0, - 1, - -1, - Byte.MIN_VALUE, - Byte.MAX_VALUE, - Short.MIN_VALUE, - Short.MAX_VALUE, - Integer.MIN_VALUE, - Integer.MAX_VALUE, - Long.MIN_VALUE, - Long.MAX_VALUE, - Float.MAX_VALUE, - -Float.MAX_VALUE, - Float.NEGATIVE_INFINITY, - Float.POSITIVE_INFINITY, - Float.MIN_VALUE, - -Float.MIN_VALUE, - Double.MAX_VALUE, - -Double.MAX_VALUE, - Double.NEGATIVE_INFINITY, - Double.POSITIVE_INFINITY, - Double.MIN_VALUE, - -Double.MIN_VALUE, - }; - - Float8Vector result = new Float8Vector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < doubleVectorValues.length) { - result.setSafe(i, doubleVectorValues[i]); - } else { - result.setSafe(i, random.nextDouble()); - } - } - - return result; - } - - public Float8Vector createFloat8VectorForNullTests() { - final Float8Vector float8Vector = new Float8Vector("ID", this.getRootAllocator()); - float8Vector.allocateNew(1); - float8Vector.setNull(0); - float8Vector.setValueCount(1); - - return float8Vector; - } - - /** - * Create a Float4Vector to be used in the accessor tests. - * - * @return Float4Vector - */ - public Float4Vector createFloat4Vector() { - - float[] floatVectorValues = - new float[] { - 0, - 1, - -1, - Byte.MIN_VALUE, - Byte.MAX_VALUE, - Short.MIN_VALUE, - Short.MAX_VALUE, - Integer.MIN_VALUE, - Integer.MAX_VALUE, - Long.MIN_VALUE, - Long.MAX_VALUE, - Float.MAX_VALUE, - -Float.MAX_VALUE, - Float.NEGATIVE_INFINITY, - Float.POSITIVE_INFINITY, - Float.MIN_VALUE, - -Float.MIN_VALUE, - }; - - Float4Vector result = new Float4Vector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < floatVectorValues.length) { - result.setSafe(i, floatVectorValues[i]); - } else { - result.setSafe(i, random.nextFloat()); - } - } - - return result; - } - - /** - * Create a IntVector to be used in the accessor tests. - * - * @return IntVector - */ - public IntVector createIntVector() { - - int[] intVectorValues = - new int[] { - 0, - 1, - -1, - Byte.MIN_VALUE, - Byte.MAX_VALUE, - Short.MIN_VALUE, - Short.MAX_VALUE, - Integer.MIN_VALUE, - Integer.MAX_VALUE, - }; - - IntVector result = new IntVector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < intVectorValues.length) { - result.setSafe(i, intVectorValues[i]); - } else { - result.setSafe(i, random.nextInt()); - } - } - - return result; - } - - /** - * Create a SmallIntVector to be used in the accessor tests. - * - * @return SmallIntVector - */ - public SmallIntVector createSmallIntVector() { - - short[] smallIntVectorValues = - new short[] { - 0, 1, -1, Byte.MIN_VALUE, Byte.MAX_VALUE, Short.MIN_VALUE, Short.MAX_VALUE, - }; - - SmallIntVector result = new SmallIntVector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < smallIntVectorValues.length) { - result.setSafe(i, smallIntVectorValues[i]); - } else { - result.setSafe(i, random.nextInt(Short.MAX_VALUE)); - } - } - - return result; - } - - /** - * Create a TinyIntVector to be used in the accessor tests. - * - * @return TinyIntVector - */ - public TinyIntVector createTinyIntVector() { - - byte[] byteVectorValues = - new byte[] { - 0, 1, -1, Byte.MIN_VALUE, Byte.MAX_VALUE, - }; - - TinyIntVector result = new TinyIntVector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < byteVectorValues.length) { - result.setSafe(i, byteVectorValues[i]); - } else { - result.setSafe(i, random.nextInt(Byte.MAX_VALUE)); - } - } - - return result; - } - - /** - * Create a BigIntVector to be used in the accessor tests. - * - * @return BigIntVector - */ - public BigIntVector createBigIntVector() { - - long[] longVectorValues = - new long[] { - 0, - 1, - -1, - Byte.MIN_VALUE, - Byte.MAX_VALUE, - Short.MIN_VALUE, - Short.MAX_VALUE, - Integer.MIN_VALUE, - Integer.MAX_VALUE, - Long.MIN_VALUE, - Long.MAX_VALUE, - }; - - BigIntVector result = new BigIntVector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < longVectorValues.length) { - result.setSafe(i, longVectorValues[i]); - } else { - result.setSafe(i, random.nextLong()); - } - } - - return result; - } - - /** - * Create a UInt1Vector to be used in the accessor tests. - * - * @return UInt1Vector - */ - public UInt1Vector createUInt1Vector() { - - short[] uInt1VectorValues = - new short[] { - 0, 1, -1, Byte.MIN_VALUE, Byte.MAX_VALUE, - }; - - UInt1Vector result = new UInt1Vector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < uInt1VectorValues.length) { - result.setSafe(i, uInt1VectorValues[i]); - } else { - result.setSafe(i, random.nextInt(0x100)); - } - } - - return result; - } - - /** - * Create a UInt2Vector to be used in the accessor tests. - * - * @return UInt2Vector - */ - public UInt2Vector createUInt2Vector() { - - int[] uInt2VectorValues = - new int[] { - 0, 1, -1, Byte.MIN_VALUE, Byte.MAX_VALUE, Short.MIN_VALUE, Short.MAX_VALUE, - }; - - UInt2Vector result = new UInt2Vector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < uInt2VectorValues.length) { - result.setSafe(i, uInt2VectorValues[i]); - } else { - result.setSafe(i, random.nextInt(0x10000)); - } - } - - return result; - } - - /** - * Create a UInt4Vector to be used in the accessor tests. - * - * @return UInt4Vector - */ - public UInt4Vector createUInt4Vector() { - - int[] uInt4VectorValues = - new int[] { - 0, - 1, - -1, - Byte.MIN_VALUE, - Byte.MAX_VALUE, - Short.MIN_VALUE, - Short.MAX_VALUE, - Integer.MIN_VALUE, - Integer.MAX_VALUE - }; - - UInt4Vector result = new UInt4Vector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < uInt4VectorValues.length) { - result.setSafe(i, uInt4VectorValues[i]); - } else { - result.setSafe(i, random.nextInt(Integer.MAX_VALUE)); - } - } - - return result; - } - - /** - * Create a UInt8Vector to be used in the accessor tests. - * - * @return UInt8Vector - */ - public UInt8Vector createUInt8Vector() { - - long[] uInt8VectorValues = - new long[] { - 0, - 1, - -1, - Byte.MIN_VALUE, - Byte.MAX_VALUE, - Short.MIN_VALUE, - Short.MAX_VALUE, - Integer.MIN_VALUE, - Integer.MAX_VALUE, - Long.MIN_VALUE, - Long.MAX_VALUE - }; - - UInt8Vector result = new UInt8Vector("", this.getRootAllocator()); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < uInt8VectorValues.length) { - result.setSafe(i, uInt8VectorValues[i]); - } else { - result.setSafe(i, random.nextLong()); - } - } - - return result; - } - - /** - * Create a VarBinaryVector to be used in the accessor tests. - * - * @return VarBinaryVector - */ - public VarBinaryVector createVarBinaryVector() { - return createVarBinaryVector(""); - } - - /** - * Create a VarBinaryVector to be used in the accessor tests. - * - * @return VarBinaryVector - */ - public VarBinaryVector createVarBinaryVector(final String fieldName) { - VarBinaryVector valueVector = new VarBinaryVector(fieldName, this.getRootAllocator()); - valueVector.allocateNew(3); - valueVector.setSafe(0, (fieldName + "__BINARY_DATA_0001").getBytes(StandardCharsets.UTF_8)); - valueVector.setSafe(1, (fieldName + "__BINARY_DATA_0002").getBytes(StandardCharsets.UTF_8)); - valueVector.setSafe(2, (fieldName + "__BINARY_DATA_0003").getBytes(StandardCharsets.UTF_8)); - valueVector.setValueCount(3); - - return valueVector; - } - - /** - * Create a LargeVarBinaryVector to be used in the accessor tests. - * - * @return LargeVarBinaryVector - */ - public LargeVarBinaryVector createLargeVarBinaryVector() { - LargeVarBinaryVector valueVector = new LargeVarBinaryVector("", this.getRootAllocator()); - valueVector.allocateNew(3); - valueVector.setSafe(0, "BINARY_DATA_0001".getBytes(StandardCharsets.UTF_8)); - valueVector.setSafe(1, "BINARY_DATA_0002".getBytes(StandardCharsets.UTF_8)); - valueVector.setSafe(2, "BINARY_DATA_0003".getBytes(StandardCharsets.UTF_8)); - valueVector.setValueCount(3); - - return valueVector; - } - - /** - * Create a FixedSizeBinaryVector to be used in the accessor tests. - * - * @return FixedSizeBinaryVector - */ - public FixedSizeBinaryVector createFixedSizeBinaryVector() { - FixedSizeBinaryVector valueVector = new FixedSizeBinaryVector("", this.getRootAllocator(), 16); - valueVector.allocateNew(3); - valueVector.setSafe(0, "BINARY_DATA_0001".getBytes(StandardCharsets.UTF_8)); - valueVector.setSafe(1, "BINARY_DATA_0002".getBytes(StandardCharsets.UTF_8)); - valueVector.setSafe(2, "BINARY_DATA_0003".getBytes(StandardCharsets.UTF_8)); - valueVector.setValueCount(3); - - return valueVector; - } - - /** - * Create a UInt8Vector to be used in the accessor tests. - * - * @return UInt8Vector - */ - public DecimalVector createDecimalVector() { - - BigDecimal[] bigDecimalValues = - new BigDecimal[] { - new BigDecimal(0), - new BigDecimal(1), - new BigDecimal(-1), - new BigDecimal(Byte.MIN_VALUE), - new BigDecimal(Byte.MAX_VALUE), - new BigDecimal(-Short.MAX_VALUE), - new BigDecimal(Short.MIN_VALUE), - new BigDecimal(Integer.MIN_VALUE), - new BigDecimal(Integer.MAX_VALUE), - new BigDecimal(Long.MIN_VALUE), - new BigDecimal(-Long.MAX_VALUE), - new BigDecimal("170141183460469231731687303715884105727") - }; - - DecimalVector result = new DecimalVector("ID", this.getRootAllocator(), 39, 0); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < bigDecimalValues.length) { - result.setSafe(i, bigDecimalValues[i]); - } else { - result.setSafe(i, random.nextLong()); - } - } - - return result; - } - - /** - * Create a UInt8Vector to be used in the accessor tests. - * - * @return UInt8Vector - */ - public Decimal256Vector createDecimal256Vector() { - - BigDecimal[] bigDecimalValues = - new BigDecimal[] { - new BigDecimal(0), - new BigDecimal(1), - new BigDecimal(-1), - new BigDecimal(Byte.MIN_VALUE), - new BigDecimal(Byte.MAX_VALUE), - new BigDecimal(-Short.MAX_VALUE), - new BigDecimal(Short.MIN_VALUE), - new BigDecimal(Integer.MIN_VALUE), - new BigDecimal(Integer.MAX_VALUE), - new BigDecimal(Long.MIN_VALUE), - new BigDecimal(-Long.MAX_VALUE), - new BigDecimal("170141183460469231731687303715884105727"), - new BigDecimal("17014118346046923173168234157303715884105727"), - new BigDecimal("1701411834604692317316823415265417303715884105727"), - new BigDecimal("-17014118346046923173168234152654115451237303715884105727"), - new BigDecimal("-17014118346046923173168234152654115451231545157303715884105727"), - new BigDecimal("1701411834604692315815656534152654115451231545157303715884105727"), - new BigDecimal("30560141183460469231581565634152654115451231545157303715884105727"), - new BigDecimal( - "57896044618658097711785492504343953926634992332820282019728792003956564819967"), - new BigDecimal( - "-56896044618658097711785492504343953926634992332820282019728792003956564819967") - }; - - Decimal256Vector result = new Decimal256Vector("ID", this.getRootAllocator(), 77, 0); - result.setValueCount(MAX_VALUE); - for (int i = 0; i < MAX_VALUE; i++) { - if (i < bigDecimalValues.length) { - result.setSafe(i, bigDecimalValues[i]); - } else { - result.setSafe(i, random.nextLong()); - } - } - - return result; - } - - public TimeStampNanoVector createTimeStampNanoVector() { - TimeStampNanoVector valueVector = new TimeStampNanoVector("", this.getRootAllocator()); - valueVector.allocateNew(2); - valueVector.setSafe(0, TimeUnit.MILLISECONDS.toNanos(1625702400000L)); - valueVector.setSafe(1, TimeUnit.MILLISECONDS.toNanos(1625788800000L)); - valueVector.setValueCount(2); - - return valueVector; - } - - public TimeStampNanoTZVector createTimeStampNanoTZVector(String timeZone) { - TimeStampNanoTZVector valueVector = - new TimeStampNanoTZVector("", this.getRootAllocator(), timeZone); - valueVector.allocateNew(2); - valueVector.setSafe(0, TimeUnit.MILLISECONDS.toNanos(1625702400000L)); - valueVector.setSafe(1, TimeUnit.MILLISECONDS.toNanos(1625788800000L)); - valueVector.setValueCount(2); - - return valueVector; - } - - public TimeStampMicroVector createTimeStampMicroVector() { - TimeStampMicroVector valueVector = new TimeStampMicroVector("", this.getRootAllocator()); - valueVector.allocateNew(2); - valueVector.setSafe(0, TimeUnit.MILLISECONDS.toMicros(1625702400000L)); - valueVector.setSafe(1, TimeUnit.MILLISECONDS.toMicros(1625788800000L)); - valueVector.setValueCount(2); - - return valueVector; - } - - public TimeStampMicroTZVector createTimeStampMicroTZVector(String timeZone) { - TimeStampMicroTZVector valueVector = - new TimeStampMicroTZVector("", this.getRootAllocator(), timeZone); - valueVector.allocateNew(2); - valueVector.setSafe(0, TimeUnit.MILLISECONDS.toMicros(1625702400000L)); - valueVector.setSafe(1, TimeUnit.MILLISECONDS.toMicros(1625788800000L)); - valueVector.setValueCount(2); - - return valueVector; - } - - public TimeStampMilliVector createTimeStampMilliVector() { - TimeStampMilliVector valueVector = new TimeStampMilliVector("", this.getRootAllocator()); - valueVector.allocateNew(2); - valueVector.setSafe(0, 1625702400000L); - valueVector.setSafe(1, 1625788800000L); - valueVector.setValueCount(2); - - return valueVector; - } - - public TimeStampMilliTZVector createTimeStampMilliTZVector(String timeZone) { - TimeStampMilliTZVector valueVector = - new TimeStampMilliTZVector("", this.getRootAllocator(), timeZone); - valueVector.allocateNew(2); - valueVector.setSafe(0, 1625702400000L); - valueVector.setSafe(1, 1625788800000L); - valueVector.setValueCount(2); - - return valueVector; - } - - public TimeStampSecVector createTimeStampSecVector() { - TimeStampSecVector valueVector = new TimeStampSecVector("", this.getRootAllocator()); - valueVector.allocateNew(2); - valueVector.setSafe(0, TimeUnit.MILLISECONDS.toSeconds(1625702400000L)); - valueVector.setSafe(1, TimeUnit.MILLISECONDS.toSeconds(1625788800000L)); - valueVector.setValueCount(2); - - return valueVector; - } - - public TimeStampSecTZVector createTimeStampSecTZVector(String timeZone) { - TimeStampSecTZVector valueVector = - new TimeStampSecTZVector("", this.getRootAllocator(), timeZone); - valueVector.allocateNew(2); - valueVector.setSafe(0, TimeUnit.MILLISECONDS.toSeconds(1625702400000L)); - valueVector.setSafe(1, TimeUnit.MILLISECONDS.toSeconds(1625788800000L)); - valueVector.setValueCount(2); - - return valueVector; - } - - public BitVector createBitVector() { - BitVector valueVector = new BitVector("Value", this.getRootAllocator()); - valueVector.allocateNew(2); - valueVector.setSafe(0, 0); - valueVector.setSafe(1, 1); - valueVector.setValueCount(2); - - return valueVector; - } - - public BitVector createBitVectorForNullTests() { - final BitVector bitVector = new BitVector("ID", this.getRootAllocator()); - bitVector.allocateNew(2); - bitVector.setNull(0); - bitVector.setValueCount(1); - - return bitVector; - } - - public TimeNanoVector createTimeNanoVector() { - TimeNanoVector valueVector = new TimeNanoVector("", this.getRootAllocator()); - valueVector.allocateNew(5); - valueVector.setSafe(0, 0); - valueVector.setSafe(1, 1_000_000_000L); // 1 second - valueVector.setSafe(2, 60 * 1_000_000_000L); // 1 minute - valueVector.setSafe(3, 60 * 60 * 1_000_000_000L); // 1 hour - valueVector.setSafe(4, (24 * 60 * 60 - 1) * 1_000_000_000L); // 23:59:59 - valueVector.setValueCount(5); - - return valueVector; - } - - public TimeMicroVector createTimeMicroVector() { - TimeMicroVector valueVector = new TimeMicroVector("", this.getRootAllocator()); - valueVector.allocateNew(5); - valueVector.setSafe(0, 0); - valueVector.setSafe(1, 1_000_000L); // 1 second - valueVector.setSafe(2, 60 * 1_000_000L); // 1 minute - valueVector.setSafe(3, 60 * 60 * 1_000_000L); // 1 hour - valueVector.setSafe(4, (24 * 60 * 60 - 1) * 1_000_000L); // 23:59:59 - valueVector.setValueCount(5); - - return valueVector; - } - - public TimeMilliVector createTimeMilliVector() { - TimeMilliVector valueVector = new TimeMilliVector("", this.getRootAllocator()); - valueVector.allocateNew(5); - valueVector.setSafe(0, 0); - valueVector.setSafe(1, 1_000); // 1 second - valueVector.setSafe(2, 60 * 1_000); // 1 minute - valueVector.setSafe(3, 60 * 60 * 1_000); // 1 hour - valueVector.setSafe(4, (24 * 60 * 60 - 1) * 1_000); // 23:59:59 - valueVector.setValueCount(5); - - return valueVector; - } - - public TimeSecVector createTimeSecVector() { - TimeSecVector valueVector = new TimeSecVector("", this.getRootAllocator()); - valueVector.allocateNew(5); - valueVector.setSafe(0, 0); - valueVector.setSafe(1, 1); // 1 second - valueVector.setSafe(2, 60); // 1 minute - valueVector.setSafe(3, 60 * 60); // 1 hour - valueVector.setSafe(4, (24 * 60 * 60 - 1)); // 23:59:59 - valueVector.setValueCount(5); - - return valueVector; - } - - public DateDayVector createDateDayVector() { - DateDayVector valueVector = new DateDayVector("", this.getRootAllocator()); - valueVector.allocateNew(2); - valueVector.setSafe(0, (int) TimeUnit.MILLISECONDS.toDays(1625702400000L)); - valueVector.setSafe(1, (int) TimeUnit.MILLISECONDS.toDays(1625788800000L)); - valueVector.setValueCount(2); - - return valueVector; - } - - public DateMilliVector createDateMilliVector() { - DateMilliVector valueVector = new DateMilliVector("", this.getRootAllocator()); - valueVector.allocateNew(2); - valueVector.setSafe(0, 1625702400000L); - valueVector.setSafe(1, 1625788800000L); - valueVector.setValueCount(2); - - return valueVector; - } - - public ListVector createListVector() { - return createListVector(""); - } - - public ListVector createListVector(String fieldName) { - ListVector valueVector = ListVector.empty(fieldName, this.getRootAllocator()); - valueVector.setInitialCapacity(MAX_VALUE); - - UnionListWriter writer = valueVector.getWriter(); - - IntStream range = IntStream.range(0, MAX_VALUE); - - range.forEach( - row -> { - writer.startList(); - writer.setPosition(row); - IntStream.range(0, 5).map(j -> j * row).forEach(writer::writeInt); - writer.setValueCount(5); - writer.endList(); - }); - - valueVector.setValueCount(MAX_VALUE); - - return valueVector; - } - - public LargeListVector createLargeListVector() { - LargeListVector valueVector = LargeListVector.empty("", this.getRootAllocator()); - valueVector.setInitialCapacity(MAX_VALUE); - - UnionLargeListWriter writer = valueVector.getWriter(); - - IntStream range = IntStream.range(0, MAX_VALUE); - - range.forEach( - row -> { - writer.startList(); - writer.setPosition(row); - IntStream.range(0, 5).map(j -> j * row).forEach(writer::writeInt); - writer.setValueCount(5); - writer.endList(); - }); - - valueVector.setValueCount(MAX_VALUE); - - return valueVector; - } - - public FixedSizeListVector createFixedSizeListVector() { - FixedSizeListVector valueVector = FixedSizeListVector.empty("", 5, this.getRootAllocator()); - valueVector.setInitialCapacity(MAX_VALUE); - - UnionFixedSizeListWriter writer = valueVector.getWriter(); - - IntStream range = IntStream.range(0, MAX_VALUE); - - range.forEach( - row -> { - writer.startList(); - writer.setPosition(row); - IntStream.range(0, 5).map(j -> j * row).forEach(writer::writeInt); - writer.setValueCount(5); - writer.endList(); - }); - - valueVector.setValueCount(MAX_VALUE); - - return valueVector; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/SqlTypesTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/SqlTypesTest.java deleted file mode 100644 index 00af3c96ba054..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/SqlTypesTest.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.apache.arrow.driver.jdbc.utils.SqlTypes.getSqlTypeIdFromArrowType; -import static org.apache.arrow.driver.jdbc.utils.SqlTypes.getSqlTypeNameFromArrowType; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.sql.Types; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.jupiter.api.Test; - -public class SqlTypesTest { - - @Test - public void testGetSqlTypeIdFromArrowType() { - assertEquals(Types.TINYINT, getSqlTypeIdFromArrowType(new ArrowType.Int(8, true))); - assertEquals(Types.SMALLINT, getSqlTypeIdFromArrowType(new ArrowType.Int(16, true))); - assertEquals(Types.INTEGER, getSqlTypeIdFromArrowType(new ArrowType.Int(32, true))); - assertEquals(Types.BIGINT, getSqlTypeIdFromArrowType(new ArrowType.Int(64, true))); - - assertEquals(Types.BINARY, getSqlTypeIdFromArrowType(new ArrowType.FixedSizeBinary(1024))); - assertEquals(Types.VARBINARY, getSqlTypeIdFromArrowType(new ArrowType.Binary())); - assertEquals(Types.LONGVARBINARY, getSqlTypeIdFromArrowType(new ArrowType.LargeBinary())); - - assertEquals(Types.VARCHAR, getSqlTypeIdFromArrowType(new ArrowType.Utf8())); - assertEquals(Types.LONGVARCHAR, getSqlTypeIdFromArrowType(new ArrowType.LargeUtf8())); - - assertEquals(Types.DATE, getSqlTypeIdFromArrowType(new ArrowType.Date(DateUnit.MILLISECOND))); - assertEquals( - Types.TIME, getSqlTypeIdFromArrowType(new ArrowType.Time(TimeUnit.MILLISECOND, 32))); - assertEquals( - Types.TIMESTAMP, - getSqlTypeIdFromArrowType(new ArrowType.Timestamp(TimeUnit.MILLISECOND, ""))); - - assertEquals(Types.BOOLEAN, getSqlTypeIdFromArrowType(new ArrowType.Bool())); - - assertEquals(Types.DECIMAL, getSqlTypeIdFromArrowType(new ArrowType.Decimal(0, 0, 64))); - assertEquals( - Types.DOUBLE, - getSqlTypeIdFromArrowType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))); - assertEquals( - Types.FLOAT, - getSqlTypeIdFromArrowType(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE))); - - assertEquals(Types.ARRAY, getSqlTypeIdFromArrowType(new ArrowType.List())); - assertEquals(Types.ARRAY, getSqlTypeIdFromArrowType(new ArrowType.LargeList())); - assertEquals(Types.ARRAY, getSqlTypeIdFromArrowType(new ArrowType.FixedSizeList(10))); - - assertEquals(Types.JAVA_OBJECT, getSqlTypeIdFromArrowType(new ArrowType.Struct())); - assertEquals( - Types.JAVA_OBJECT, getSqlTypeIdFromArrowType(new ArrowType.Duration(TimeUnit.MILLISECOND))); - assertEquals( - Types.JAVA_OBJECT, - getSqlTypeIdFromArrowType(new ArrowType.Interval(IntervalUnit.DAY_TIME))); - assertEquals( - Types.JAVA_OBJECT, getSqlTypeIdFromArrowType(new ArrowType.Union(UnionMode.Dense, null))); - assertEquals(Types.JAVA_OBJECT, getSqlTypeIdFromArrowType(new ArrowType.Map(true))); - - assertEquals(Types.NULL, getSqlTypeIdFromArrowType(new ArrowType.Null())); - } - - @Test - public void testGetSqlTypeNameFromArrowType() { - assertEquals("TINYINT", getSqlTypeNameFromArrowType(new ArrowType.Int(8, true))); - assertEquals("SMALLINT", getSqlTypeNameFromArrowType(new ArrowType.Int(16, true))); - assertEquals("INTEGER", getSqlTypeNameFromArrowType(new ArrowType.Int(32, true))); - assertEquals("BIGINT", getSqlTypeNameFromArrowType(new ArrowType.Int(64, true))); - - assertEquals("BINARY", getSqlTypeNameFromArrowType(new ArrowType.FixedSizeBinary(1024))); - assertEquals("VARBINARY", getSqlTypeNameFromArrowType(new ArrowType.Binary())); - assertEquals("LONGVARBINARY", getSqlTypeNameFromArrowType(new ArrowType.LargeBinary())); - - assertEquals("VARCHAR", getSqlTypeNameFromArrowType(new ArrowType.Utf8())); - assertEquals("LONGVARCHAR", getSqlTypeNameFromArrowType(new ArrowType.LargeUtf8())); - - assertEquals("DATE", getSqlTypeNameFromArrowType(new ArrowType.Date(DateUnit.MILLISECOND))); - assertEquals("TIME", getSqlTypeNameFromArrowType(new ArrowType.Time(TimeUnit.MILLISECOND, 32))); - assertEquals( - "TIMESTAMP", - getSqlTypeNameFromArrowType(new ArrowType.Timestamp(TimeUnit.MILLISECOND, ""))); - - assertEquals("BOOLEAN", getSqlTypeNameFromArrowType(new ArrowType.Bool())); - - assertEquals("DECIMAL", getSqlTypeNameFromArrowType(new ArrowType.Decimal(0, 0, 64))); - assertEquals( - "DOUBLE", - getSqlTypeNameFromArrowType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))); - assertEquals( - "FLOAT", - getSqlTypeNameFromArrowType(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE))); - - assertEquals("ARRAY", getSqlTypeNameFromArrowType(new ArrowType.List())); - assertEquals("ARRAY", getSqlTypeNameFromArrowType(new ArrowType.LargeList())); - assertEquals("ARRAY", getSqlTypeNameFromArrowType(new ArrowType.FixedSizeList(10))); - - assertEquals("JAVA_OBJECT", getSqlTypeNameFromArrowType(new ArrowType.Struct())); - - assertEquals( - "JAVA_OBJECT", getSqlTypeNameFromArrowType(new ArrowType.Duration(TimeUnit.MILLISECOND))); - assertEquals( - "JAVA_OBJECT", getSqlTypeNameFromArrowType(new ArrowType.Interval(IntervalUnit.DAY_TIME))); - assertEquals( - "JAVA_OBJECT", getSqlTypeNameFromArrowType(new ArrowType.Union(UnionMode.Dense, null))); - assertEquals("JAVA_OBJECT", getSqlTypeNameFromArrowType(new ArrowType.Map(true))); - - assertEquals("NULL", getSqlTypeNameFromArrowType(new ArrowType.Null())); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java deleted file mode 100644 index 0280cbc910dba..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -/** - * Utility class to avoid upgrading JUnit to version >= 4.13 and keep using code to assert a {@link - * Throwable}. This should be removed as soon as we can use the proper assertThrows/checkThrows. - */ -public class ThrowableAssertionUtils { - private ThrowableAssertionUtils() {} - - public static void simpleAssertThrowableClass( - final Class expectedThrowable, final ThrowingRunnable runnable) { - try { - runnable.run(); - } catch (Throwable actualThrown) { - if (expectedThrowable.isInstance(actualThrown)) { - return; - } else { - final String mismatchMessage = - String.format( - "unexpected exception type thrown;\nexpected: %s\nactual: %s", - formatClass(expectedThrowable), formatClass(actualThrown.getClass())); - - throw new AssertionError(mismatchMessage, actualThrown); - } - } - final String notThrownMessage = - String.format( - "expected %s to be thrown, but nothing was thrown", formatClass(expectedThrowable)); - throw new AssertionError(notThrownMessage); - } - - private static String formatClass(final Class value) { - // Fallback for anonymous inner classes - final String className = value.getCanonicalName(); - return className == null ? value.getName() : className; - } - - public interface ThrowingRunnable { - void run() throws Throwable; - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/UrlParserTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/UrlParserTest.java deleted file mode 100644 index 529a05e881374..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/UrlParserTest.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; - -import java.util.Map; -import org.junit.jupiter.api.Test; - -class UrlParserTest { - @Test - void parse() { - final Map parsed = UrlParser.parse("foo=bar&123=456", "&"); - assertEquals(parsed.get("foo"), "bar"); - assertEquals(parsed.get("123"), "456"); - } - - @Test - void parseEscaped() { - final Map parsed = UrlParser.parse("foo=bar%26&%26123=456", "&"); - assertEquals(parsed.get("foo"), "bar&"); - assertEquals(parsed.get("&123"), "456"); - } - - @Test - void parseEmpty() { - final Map parsed = UrlParser.parse("a=&b&foo=bar&123=456", "&"); - assertEquals(parsed.get("a"), ""); - assertNull(parsed.get("b")); - assertEquals(parsed.get("foo"), "bar"); - assertEquals(parsed.get("123"), "456"); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformerTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformerTest.java deleted file mode 100644 index a4499ab0aa1c1..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformerTest.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc.utils; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; - -import com.google.common.collect.ImmutableList; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -public class VectorSchemaRootTransformerTest { - - @RegisterExtension - public static RootAllocatorTestExtension rootAllocatorTestExtension = - new RootAllocatorTestExtension(); - - private final BufferAllocator rootAllocator = rootAllocatorTestExtension.getRootAllocator(); - - @Test - public void testTransformerBuilderWorksCorrectly() throws Exception { - final VarBinaryVector field1 = rootAllocatorTestExtension.createVarBinaryVector("FIELD_1"); - final VarBinaryVector field2 = rootAllocatorTestExtension.createVarBinaryVector("FIELD_2"); - final VarBinaryVector field3 = rootAllocatorTestExtension.createVarBinaryVector("FIELD_3"); - - try (final VectorSchemaRoot originalRoot = VectorSchemaRoot.of(field1, field2, field3); - final VectorSchemaRoot clonedRoot = cloneVectorSchemaRoot(originalRoot)) { - - final VectorSchemaRootTransformer.Builder builder = - new VectorSchemaRootTransformer.Builder(originalRoot.getSchema(), rootAllocator); - - builder.renameFieldVector("FIELD_3", "FIELD_3_RENAMED"); - builder.addEmptyField("EMPTY_FIELD", new ArrowType.Bool()); - builder.renameFieldVector("FIELD_2", "FIELD_2_RENAMED"); - builder.renameFieldVector("FIELD_1", "FIELD_1_RENAMED"); - - final VectorSchemaRootTransformer transformer = builder.build(); - - final Schema transformedSchema = - new Schema( - ImmutableList.of( - Field.nullable("FIELD_3_RENAMED", new ArrowType.Binary()), - Field.nullable("EMPTY_FIELD", new ArrowType.Bool()), - Field.nullable("FIELD_2_RENAMED", new ArrowType.Binary()), - Field.nullable("FIELD_1_RENAMED", new ArrowType.Binary()))); - try (final VectorSchemaRoot transformedRoot = createVectorSchemaRoot(transformedSchema)) { - assertSame(transformedRoot, transformer.transform(clonedRoot, transformedRoot)); - assertEquals(transformedSchema, transformedRoot.getSchema()); - - final int rowCount = originalRoot.getRowCount(); - assertEquals(rowCount, transformedRoot.getRowCount()); - - final VarBinaryVector originalField1 = (VarBinaryVector) originalRoot.getVector("FIELD_1"); - final VarBinaryVector originalField2 = (VarBinaryVector) originalRoot.getVector("FIELD_2"); - final VarBinaryVector originalField3 = (VarBinaryVector) originalRoot.getVector("FIELD_3"); - - final VarBinaryVector transformedField1 = - (VarBinaryVector) transformedRoot.getVector("FIELD_1_RENAMED"); - final VarBinaryVector transformedField2 = - (VarBinaryVector) transformedRoot.getVector("FIELD_2_RENAMED"); - final VarBinaryVector transformedField3 = - (VarBinaryVector) transformedRoot.getVector("FIELD_3_RENAMED"); - final FieldVector emptyField = transformedRoot.getVector("EMPTY_FIELD"); - - for (int i = 0; i < rowCount; i++) { - assertArrayEquals(originalField1.getObject(i), transformedField1.getObject(i)); - assertArrayEquals(originalField2.getObject(i), transformedField2.getObject(i)); - assertArrayEquals(originalField3.getObject(i), transformedField3.getObject(i)); - assertNull(emptyField.getObject(i)); - } - } - } - } - - private VectorSchemaRoot cloneVectorSchemaRoot(final VectorSchemaRoot originalRoot) { - final VectorUnloader vectorUnloader = new VectorUnloader(originalRoot); - try (final ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch()) { - final VectorSchemaRoot clonedRoot = createVectorSchemaRoot(originalRoot.getSchema()); - final VectorLoader vectorLoader = new VectorLoader(clonedRoot); - vectorLoader.load(recordBatch); - return clonedRoot; - } - } - - private VectorSchemaRoot createVectorSchemaRoot(final Schema schema) { - final List fieldVectors = - schema.getFields().stream() - .map(field -> field.createVector(rootAllocator)) - .collect(Collectors.toList()); - return new VectorSchemaRoot(fieldVectors); - } -} diff --git a/java/flight/flight-sql-jdbc-core/src/test/resources/keys/keyStore.jks b/java/flight/flight-sql-jdbc-core/src/test/resources/keys/keyStore.jks deleted file mode 100644 index 32a9bedea500a..0000000000000 Binary files a/java/flight/flight-sql-jdbc-core/src/test/resources/keys/keyStore.jks and /dev/null differ diff --git a/java/flight/flight-sql-jdbc-core/src/test/resources/keys/noCertificate.jks b/java/flight/flight-sql-jdbc-core/src/test/resources/keys/noCertificate.jks deleted file mode 100644 index 071a1ebf97b3e..0000000000000 Binary files a/java/flight/flight-sql-jdbc-core/src/test/resources/keys/noCertificate.jks and /dev/null differ diff --git a/java/flight/flight-sql-jdbc-core/src/test/resources/logback.xml b/java/flight/flight-sql-jdbc-core/src/test/resources/logback.xml deleted file mode 100644 index ce66f8d82acda..0000000000000 --- a/java/flight/flight-sql-jdbc-core/src/test/resources/logback.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml deleted file mode 100644 index ae8c543fbfe3b..0000000000000 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ /dev/null @@ -1,178 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-flight - 19.0.0-SNAPSHOT - - - flight-sql-jdbc-driver - jar - Arrow Flight SQL JDBC Driver - A JDBC driver based on Arrow Flight SQL. - https://arrow.apache.org - - - - org.apache.arrow - flight-sql-jdbc-core - runtime - - - - org.slf4j - slf4j-jdk14 - runtime - - - - com.google.guava - guava - - runtime - - - - - - - org.apache.maven.plugins - maven-failsafe-plugin - - - default-it - - integration-test - verify - - - - - - org.apache.maven.plugins - maven-shade-plugin - - - - shade - - package - - false - true - false - - - *:* - - - - org.checkerframework:checker-qual - org.codehaus.mojo:animal-sniffer-annotations - javax.annotation:javax.annotation-api - com.google.android:annotations - com.google.errorprone:error_prone_annotations - com.google.code.findbugs:jsr305 - com.google.j2objc:j2objc-annotations - - - - - com. - org.apache.arrow.driver.jdbc.shaded.com. - - com.sun.** - - - - org. - org.apache.arrow.driver.jdbc.shaded.org. - - org.apache.arrow.driver.jdbc.** - - org.apache.arrow.flight.name - org.apache.arrow.flight.version - org.apache.arrow.flight.jdbc-driver.name - org.apache.arrow.flight.jdbc-driver.version - - - - io. - org.apache.arrow.driver.jdbc.shaded.io. - - - net. - org.apache.arrow.driver.jdbc.shaded.net. - - - mozilla. - org.apache.arrow.driver.jdbc.shaded.mozilla. - - - - META-INF.native.libnetty_ - META-INF.native.liborg_apache_arrow_driver_jdbc_shaded_netty_ - - - META-INF.native.netty_ - META-INF.native.org_apache_arrow_driver_jdbc_shaded_netty_ - - - - - - - - org.apache.arrow:arrow-vector - - codegen/** - - - - org.apache.calcite.avatica:* - - META-INF/services/java.sql.Driver - - - - *:* - - **/*.SF - **/*.RSA - **/*.DSA - - META-INF/native-image/ - META-INF/proguard/ - - META-INF/versions/ - **/*.proto - **/module-info.class - - - - - - - - - - diff --git a/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ITDriverJarValidation.java b/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ITDriverJarValidation.java deleted file mode 100644 index a0e108d6a067b..0000000000000 --- a/java/flight/flight-sql-jdbc-driver/src/test/java/org/apache/arrow/driver/jdbc/ITDriverJarValidation.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.driver.jdbc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -import com.google.common.collect.ImmutableSet; -import java.io.File; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.net.JarURLConnection; -import java.net.URISyntaxException; -import java.net.URL; -import java.net.URLClassLoader; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.function.Predicate; -import java.util.jar.JarEntry; -import java.util.jar.JarFile; -import java.util.stream.Stream; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; -import org.junit.jupiter.api.function.Executable; - -/** - * Check the content of the JDBC driver jar - * - *

    After shading everything should be either under org.apache.arrow.driver.jdbc. package - */ -public class ITDriverJarValidation { - /** - * Use this property to provide path to the JDBC driver jar. Can be used to run the test from an - * IDE - */ - public static final String JDBC_DRIVER_PATH_OVERRIDE = - System.getProperty("arrow-flight-jdbc-driver.jar.override"); - - /** List of allowed prefixes a jar entry may match. */ - public static final Set ALLOWED_PREFIXES = - ImmutableSet.of( - "org/apache/arrow/driver/jdbc/", // Driver code - "META-INF/maven/", // Maven metadata (useful for security scanner - "META-INF/services/", // ServiceLoader implementations - "META-INF/license/", - "META-INF/licenses/", - // Prefixes for native libraries - "META-INF/native/liborg_apache_arrow_driver_jdbc_shaded_", - "META-INF/native/org_apache_arrow_driver_jdbc_shaded_"); - - /** List of allowed files a jar entry may match. */ - public static final Set ALLOWED_FILES = - ImmutableSet.of( - "arrow-git.properties", - "properties/flight.properties", - "META-INF/io.netty.versions.properties", - "META-INF/MANIFEST.MF", - "META-INF/DEPENDENCIES", - "META-INF/FastDoubleParser-LICENSE", - "META-INF/FastDoubleParser-NOTICE", - "META-INF/LICENSE", - "META-INF/LICENSE.txt", - "META-INF/NOTICE", - "META-INF/NOTICE.txt", - "META-INF/thirdparty-LICENSE", - "META-INF/bigint-LICENSE"); - - // This method is designed to work with Maven failsafe plugin and expects the - // JDBC driver jar to be present in the test classpath (instead of the individual classes) - private static File getJdbcJarFile() throws IOException { - // Check if an override has been set - if (JDBC_DRIVER_PATH_OVERRIDE != null) { - return new File(JDBC_DRIVER_PATH_OVERRIDE); - } - - // Check classpath to find the driver jar (without loading the class) - URL driverClassURL = - ITDriverJarValidation.class - .getClassLoader() - .getResource("org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.class"); - - assertNotNull(driverClassURL, "Driver class was not detected in the classpath"); - assertEquals( - "jar", driverClassURL.getProtocol(), "Driver class was not found inside a jar file"); - - // Return the enclosing jar file - JarURLConnection connection = (JarURLConnection) driverClassURL.openConnection(); - try { - return new File(connection.getJarFileURL().toURI()); - } catch (URISyntaxException e) { - throw new IOException(e); - } - } - - /** Validate the content of the jar to enforce all 3rd party dependencies have been shaded. */ - @Test - @Timeout(value = 2, unit = TimeUnit.MINUTES) - public void validateShadedJar() throws IOException { - - try (JarFile jar = new JarFile(getJdbcJarFile())) { - Stream executables = - jar.stream() - .filter(Predicate.not(JarEntry::isDirectory)) - .map( - entry -> { - return () -> checkEntryAllowed(entry.getName()); - }); - - Assertions.assertAll(executables); - } - } - - /** Check that relocated netty code can also load matching native library. */ - @Test - @Timeout(value = 2, unit = TimeUnit.MINUTES) - public void checkNettyOpenSslNativeLoader() throws Throwable { - try (URLClassLoader driverClassLoader = - new URLClassLoader(new URL[] {getJdbcJarFile().toURI().toURL()}, null)) { - Class openSslClass = - driverClassLoader.loadClass( - "org.apache.arrow.driver.jdbc.shaded.io.netty.handler.ssl.OpenSsl"); - Method method = openSslClass.getDeclaredMethod("ensureAvailability"); - try { - method.invoke(null); - } catch (InvocationTargetException e) { - throw e.getCause(); - } - } - } - - /** - * Check if a jar entry is allowed. - * - *

    A jar entry is allowed if either it is part of the allowed files or it matches one of the - * allowed prefixes - * - * @param name the jar entry name - * @throws AssertionError if the entry is not allowed - */ - private void checkEntryAllowed(String name) { - // Check if there's a matching file entry first - if (ALLOWED_FILES.contains(name)) { - return; - } - - for (String prefix : ALLOWED_PREFIXES) { - if (name.startsWith(prefix)) { - return; - } - } - - throw new AssertionError("'" + name + "' is not an allowed jar entry"); - } -} diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml deleted file mode 100644 index 9cbc8430fedb7..0000000000000 --- a/java/flight/flight-sql/pom.xml +++ /dev/null @@ -1,131 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-flight - 19.0.0-SNAPSHOT - - - flight-sql - jar - Arrow Flight SQL - Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight - - - 1 - - --add-reads=org.apache.arrow.flight.sql=org.slf4j --add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - - org.apache.arrow - flight-core - - - io.netty - netty-transport-native-unix-common - - - io.netty - netty-transport-native-kqueue - - - io.netty - netty-transport-native-epoll - - - - - org.apache.arrow - arrow-memory-core - - - org.immutables - value-annotations - - - org.apache.arrow - arrow-jdbc - test - - - com.google.guava - guava - - - com.google.protobuf - protobuf-java - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - org.slf4j - slf4j-api - test - - - org.apache.derby - derby - 10.15.2.0 - test - - - org.apache.commons - commons-dbcp2 - 2.12.0 - test - - - commons-logging - commons-logging - - - - - org.apache.commons - commons-pool2 - 2.12.0 - test - - - org.apache.commons - commons-text - 1.12.0 - test - - - org.hamcrest - hamcrest - test - - - commons-cli - commons-cli - 1.9.0 - true - - - diff --git a/java/flight/flight-sql/src/main/java/module-info.java b/java/flight/flight-sql/src/main/java/module-info.java deleted file mode 100644 index 42be9ce6d92db..0000000000000 --- a/java/flight/flight-sql/src/main/java/module-info.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.flight.sql { - exports org.apache.arrow.flight.sql; - exports org.apache.arrow.flight.sql.example; - exports org.apache.arrow.flight.sql.util; - - requires com.google.common; - requires com.google.protobuf; - requires java.sql; - requires org.apache.arrow.flight.core; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; - requires org.apache.commons.cli; -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/BasicFlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/BasicFlightSqlProducer.java deleted file mode 100644 index 74bfd654497db..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/BasicFlightSqlProducer.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import com.google.protobuf.Message; -import java.util.List; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.vector.types.pojo.Schema; - -/** A {@link FlightSqlProducer} that implements getting FlightInfo for each metadata request. */ -public abstract class BasicFlightSqlProducer extends NoOpFlightSqlProducer { - - @Override - public FlightInfo getFlightInfoSqlInfo( - FlightSql.CommandGetSqlInfo request, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_SQL_INFO_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoTypeInfo( - FlightSql.CommandGetXdbcTypeInfo request, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_TYPE_INFO_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoCatalogs( - FlightSql.CommandGetCatalogs request, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_CATALOGS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoSchemas( - FlightSql.CommandGetDbSchemas request, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_SCHEMAS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoTables( - FlightSql.CommandGetTables request, CallContext context, FlightDescriptor descriptor) { - if (request.getIncludeSchema()) { - return generateFlightInfo(request, descriptor, Schemas.GET_TABLES_SCHEMA); - } - return generateFlightInfo(request, descriptor, Schemas.GET_TABLES_SCHEMA_NO_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoTableTypes( - FlightSql.CommandGetTableTypes request, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_TABLE_TYPES_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoPrimaryKeys( - FlightSql.CommandGetPrimaryKeys request, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_PRIMARY_KEYS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoExportedKeys( - FlightSql.CommandGetExportedKeys request, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_EXPORTED_KEYS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoImportedKeys( - FlightSql.CommandGetImportedKeys request, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_IMPORTED_KEYS_SCHEMA); - } - - @Override - public FlightInfo getFlightInfoCrossReference( - FlightSql.CommandGetCrossReference request, - CallContext context, - FlightDescriptor descriptor) { - return generateFlightInfo(request, descriptor, Schemas.GET_CROSS_REFERENCE_SCHEMA); - } - - /** - * Return a list of FlightEndpoints for the given request and FlightDescriptor. This method should - * validate that the request is supported by this FlightSqlProducer. - */ - protected abstract List determineEndpoints( - T request, FlightDescriptor flightDescriptor, Schema schema); - - protected FlightInfo generateFlightInfo( - T request, FlightDescriptor descriptor, Schema schema) { - final List endpoints = determineEndpoints(request, descriptor, schema); - return new FlightInfo(schema, descriptor, endpoints, -1, -1); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelListener.java deleted file mode 100644 index 869eda7379a5b..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelListener.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import com.google.protobuf.Any; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.sql.impl.FlightSql; - -/** Typed StreamListener for cancelQuery. */ -@SuppressWarnings("deprecation") -class CancelListener implements FlightProducer.StreamListener { - private final FlightProducer.StreamListener listener; - - CancelListener(FlightProducer.StreamListener listener) { - this.listener = listener; - } - - @Override - public void onNext(CancelResult val) { - FlightSql.ActionCancelQueryResult result = - FlightSql.ActionCancelQueryResult.newBuilder().setResult(val.toProtocol()).build(); - listener.onNext(new Result(Any.pack(result).toByteArray())); - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelResult.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelResult.java deleted file mode 100644 index 0802b8605aebc..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelResult.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import org.apache.arrow.flight.sql.impl.FlightSql; - -/** - * The result of cancelling a query. - * - * @deprecated Prefer {@link org.apache.arrow.flight.CancelStatus}. - */ -@Deprecated -public enum CancelResult { - UNSPECIFIED, - CANCELLED, - CANCELLING, - NOT_CANCELLABLE, - ; - - FlightSql.ActionCancelQueryResult.CancelResult toProtocol() { - switch (this) { - default: - case UNSPECIFIED: - return FlightSql.ActionCancelQueryResult.CancelResult.CANCEL_RESULT_UNSPECIFIED; - case CANCELLED: - return FlightSql.ActionCancelQueryResult.CancelResult.CANCEL_RESULT_CANCELLED; - case CANCELLING: - return FlightSql.ActionCancelQueryResult.CancelResult.CANCEL_RESULT_CANCELLING; - case NOT_CANCELLABLE: - return FlightSql.ActionCancelQueryResult.CancelResult.CANCEL_RESULT_NOT_CANCELLABLE; - } - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelStatusListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelStatusListener.java deleted file mode 100644 index e2b78750a7dec..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CancelStatusListener.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import org.apache.arrow.flight.CancelFlightInfoResult; -import org.apache.arrow.flight.CancelStatus; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.Result; - -/** Typed StreamListener for cancelFlightInfo. */ -class CancelStatusListener implements FlightProducer.StreamListener { - private final FlightProducer.StreamListener listener; - - CancelStatusListener(FlightProducer.StreamListener listener) { - this.listener = listener; - } - - @Override - public void onNext(CancelStatus val) { - listener.onNext(new Result(new CancelFlightInfoResult(val).serialize().array())); - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CloseSessionResultListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CloseSessionResultListener.java deleted file mode 100644 index 4114ac0444ead..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CloseSessionResultListener.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import org.apache.arrow.flight.CloseSessionResult; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.Result; - -/** Typed StreamListener for closeSession. */ -public class CloseSessionResultListener - implements FlightProducer.StreamListener { - private final FlightProducer.StreamListener listener; - - CloseSessionResultListener(FlightProducer.StreamListener listener) { - this.listener = listener; - } - - @Override - public void onNext(CloseSessionResult val) { - listener.onNext(new Result(val.serialize().array())); - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightEndpointListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightEndpointListener.java deleted file mode 100644 index 43a430b81078a..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightEndpointListener.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.Result; - -/** Typed StreamListener for renewFlightEndpoint. */ -public class FlightEndpointListener implements FlightProducer.StreamListener { - private final FlightProducer.StreamListener listener; - - FlightEndpointListener(FlightProducer.StreamListener listener) { - this.listener = listener; - } - - @Override - public void onNext(FlightEndpoint val) { - listener.onNext(new Result(val.serialize().array())); - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java deleted file mode 100644 index 9a6ffdfdca847..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java +++ /dev/null @@ -1,1540 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import static java.util.Objects.isNull; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointResult; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginTransactionRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginTransactionResult; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionCancelQueryRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionCancelQueryResult; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionClosePreparedStatementRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedSubstraitPlanRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionEndSavepointRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionEndTransactionRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCatalogs; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCrossReference; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetDbSchemas; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetExportedKeys; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetImportedKeys; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetPrimaryKeys; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetSqlInfo; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTableTypes; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetXdbcTypeInfo; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementSubstraitPlan; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate; -import static org.apache.arrow.flight.sql.impl.FlightSql.DoPutUpdateResult; -import static org.apache.arrow.flight.sql.impl.FlightSql.SqlInfo; - -import com.google.protobuf.Any; -import com.google.protobuf.ByteString; -import com.google.protobuf.InvalidProtocolBufferException; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.concurrent.ExecutionException; -import java.util.function.Consumer; -import java.util.stream.Collectors; -import org.apache.arrow.flight.Action; -import org.apache.arrow.flight.CallOption; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.CancelFlightInfoRequest; -import org.apache.arrow.flight.CancelFlightInfoResult; -import org.apache.arrow.flight.CloseSessionRequest; -import org.apache.arrow.flight.CloseSessionResult; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.GetSessionOptionsRequest; -import org.apache.arrow.flight.GetSessionOptionsResult; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.RenewFlightEndpointRequest; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.SchemaResult; -import org.apache.arrow.flight.SetSessionOptionsRequest; -import org.apache.arrow.flight.SetSessionOptionsResult; -import org.apache.arrow.flight.SyncPutListener; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions; -import org.apache.arrow.flight.sql.util.TableRef; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Flight client with Flight SQL semantics. */ -public class FlightSqlClient implements AutoCloseable { - private final FlightClient client; - - public FlightSqlClient(final FlightClient client) { - this.client = Objects.requireNonNull(client, "Client cannot be null!"); - } - - /** - * Execute a query on the server. - * - * @param query The query to execute. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo execute(final String query, final CallOption... options) { - return execute(query, /*transaction*/ null, options); - } - - /** - * Execute a query on the server. - * - * @param query The query to execute. - * @param transaction The transaction that this query is part of. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo execute( - final String query, Transaction transaction, final CallOption... options) { - final CommandStatementQuery.Builder builder = - CommandStatementQuery.newBuilder().setQuery(query); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Execute a Substrait plan on the server. - * - * @param plan The Substrait plan to execute. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo executeSubstrait(SubstraitPlan plan, CallOption... options) { - return executeSubstrait(plan, /*transaction*/ null, options); - } - - /** - * Execute a Substrait plan on the server. - * - * @param plan The Substrait plan to execute. - * @param transaction The transaction that this query is part of. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo executeSubstrait( - SubstraitPlan plan, Transaction transaction, CallOption... options) { - final CommandStatementSubstraitPlan.Builder builder = - CommandStatementSubstraitPlan.newBuilder(); - builder - .getPlanBuilder() - .setPlan(ByteString.copyFrom(plan.getPlan())) - .setVersion(plan.getVersion()); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** Get the schema of the result set of a query. */ - public SchemaResult getExecuteSchema( - String query, Transaction transaction, CallOption... options) { - final CommandStatementQuery.Builder builder = CommandStatementQuery.newBuilder(); - builder.setQuery(query); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** Get the schema of the result set of a query. */ - public SchemaResult getExecuteSchema(String query, CallOption... options) { - return getExecuteSchema(query, /*transaction*/ null, options); - } - - /** Get the schema of the result set of a Substrait plan. */ - public SchemaResult getExecuteSubstraitSchema( - SubstraitPlan plan, Transaction transaction, final CallOption... options) { - final CommandStatementSubstraitPlan.Builder builder = - CommandStatementSubstraitPlan.newBuilder(); - builder - .getPlanBuilder() - .setPlan(ByteString.copyFrom(plan.getPlan())) - .setVersion(plan.getVersion()); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** Get the schema of the result set of a Substrait plan. */ - public SchemaResult getExecuteSubstraitSchema( - SubstraitPlan substraitPlan, final CallOption... options) { - return getExecuteSubstraitSchema(substraitPlan, /*transaction*/ null, options); - } - - /** - * Execute a bulk ingest on the server. - * - * @param data data to be ingested - * @param ingestOptions options for the ingest request. - * @param options RPC-layer hints for this call. - * @return the number of rows affected. - */ - public long executeIngest( - final VectorSchemaRoot data, - final ExecuteIngestOptions ingestOptions, - final CallOption... options) { - return executeIngest(data, ingestOptions, /*transaction*/ null, options); - } - - /** - * Execute a bulk ingest on the server. - * - * @param dataReader data stream to be ingested - * @param ingestOptions options for the ingest request. - * @param options RPC-layer hints for this call. - * @return the number of rows affected. - */ - public long executeIngest( - final ArrowStreamReader dataReader, - final ExecuteIngestOptions ingestOptions, - final CallOption... options) { - return executeIngest(dataReader, ingestOptions, /*transaction*/ null, options); - } - - /** - * Execute a bulk ingest on the server. - * - * @param data data to be ingested - * @param ingestOptions options for the ingest request. - * @param transaction The transaction that this ingest request is part of. - * @param options RPC-layer hints for this call. - * @return the number of rows affected. - */ - public long executeIngest( - final VectorSchemaRoot data, - final ExecuteIngestOptions ingestOptions, - Transaction transaction, - final CallOption... options) { - return executeIngest( - data, ingestOptions, transaction, FlightClient.ClientStreamListener::putNext, options); - } - - /** - * Execute a bulk ingest on the server. - * - * @param dataReader data stream to be ingested - * @param ingestOptions options for the ingest request. - * @param transaction The transaction that this ingest request is part of. - * @param options RPC-layer hints for this call. - * @return the number of rows affected. - */ - public long executeIngest( - final ArrowStreamReader dataReader, - final ExecuteIngestOptions ingestOptions, - Transaction transaction, - final CallOption... options) { - - try { - return executeIngest( - dataReader.getVectorSchemaRoot(), - ingestOptions, - transaction, - listener -> { - while (true) { - try { - if (!dataReader.loadNextBatch()) { - break; - } - } catch (IOException e) { - throw CallStatus.UNKNOWN.withCause(e).toRuntimeException(); - } - listener.putNext(); - } - }, - options); - } catch (IOException e) { - throw CallStatus.UNKNOWN.withCause(e).toRuntimeException(); - } - } - - private long executeIngest( - final VectorSchemaRoot data, - final ExecuteIngestOptions ingestOptions, - final Transaction transaction, - final Consumer dataPutter, - final CallOption... options) { - try { - final CommandStatementIngest.Builder builder = CommandStatementIngest.newBuilder(); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - ingestOptions.updateCommandBuilder(builder); - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - try (final SyncPutListener putListener = new SyncPutListener()) { - - final FlightClient.ClientStreamListener listener = - client.startPut(descriptor, data, putListener, options); - dataPutter.accept(listener); - listener.completed(); - listener.getResult(); - - try (final PutResult result = putListener.read()) { - final DoPutUpdateResult doPutUpdateResult = - DoPutUpdateResult.parseFrom(result.getApplicationMetadata().nioBuffer()); - return doPutUpdateResult.getRecordCount(); - } - } - } catch (final InterruptedException e) { - throw CallStatus.CANCELLED.withCause(e).toRuntimeException(); - } catch (final ExecutionException e) { - throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException(); - } catch (final InvalidProtocolBufferException e) { - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } - } - - /** - * Execute an update query on the server. - * - * @param query The query to execute. - * @param options RPC-layer hints for this call. - * @return the number of rows affected. - */ - public long executeUpdate(final String query, final CallOption... options) { - return executeUpdate(query, /*transaction*/ null, options); - } - - /** - * Execute an update query on the server. - * - * @param query The query to execute. - * @param transaction The transaction that this query is part of. - * @param options RPC-layer hints for this call. - * @return the number of rows affected. - */ - public long executeUpdate( - final String query, Transaction transaction, final CallOption... options) { - final CommandStatementUpdate.Builder builder = - CommandStatementUpdate.newBuilder().setQuery(query); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - try (final SyncPutListener putListener = new SyncPutListener()) { - final FlightClient.ClientStreamListener listener = - client.startPut(descriptor, VectorSchemaRoot.of(), putListener, options); - try (final PutResult result = putListener.read()) { - final DoPutUpdateResult doPutUpdateResult = - DoPutUpdateResult.parseFrom(result.getApplicationMetadata().nioBuffer()); - return doPutUpdateResult.getRecordCount(); - } finally { - listener.getResult(); - } - } catch (final InterruptedException e) { - throw CallStatus.CANCELLED.withCause(e).toRuntimeException(); - } catch (final ExecutionException e) { - throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException(); - } catch (final InvalidProtocolBufferException e) { - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } - } - - /** - * Execute an update query on the server. - * - * @param plan The Substrait plan to execute. - * @param options RPC-layer hints for this call. - * @return the number of rows affected. - */ - public long executeSubstraitUpdate(SubstraitPlan plan, CallOption... options) { - return executeSubstraitUpdate(plan, /*transaction*/ null, options); - } - - /** - * Execute an update query on the server. - * - * @param plan The Substrait plan to execute. - * @param transaction The transaction that this query is part of. - * @param options RPC-layer hints for this call. - * @return the number of rows affected. - */ - public long executeSubstraitUpdate( - SubstraitPlan plan, Transaction transaction, CallOption... options) { - final CommandStatementSubstraitPlan.Builder builder = - CommandStatementSubstraitPlan.newBuilder(); - builder - .getPlanBuilder() - .setPlan(ByteString.copyFrom(plan.getPlan())) - .setVersion(plan.getVersion()); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - try (final SyncPutListener putListener = new SyncPutListener()) { - final FlightClient.ClientStreamListener listener = - client.startPut(descriptor, VectorSchemaRoot.of(), putListener, options); - try (final PutResult result = putListener.read()) { - final DoPutUpdateResult doPutUpdateResult = - DoPutUpdateResult.parseFrom(result.getApplicationMetadata().nioBuffer()); - return doPutUpdateResult.getRecordCount(); - } finally { - listener.getResult(); - } - } catch (final InterruptedException e) { - throw CallStatus.CANCELLED.withCause(e).toRuntimeException(); - } catch (final ExecutionException e) { - throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException(); - } catch (final InvalidProtocolBufferException e) { - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } - } - - /** - * Request a list of catalogs. - * - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getCatalogs(final CallOption... options) { - final CommandGetCatalogs.Builder builder = CommandGetCatalogs.newBuilder(); - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getCatalogs(CallOption...)} from the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_CATALOGS_SCHEMA}. - */ - public SchemaResult getCatalogsSchema(final CallOption... options) { - final CommandGetCatalogs command = CommandGetCatalogs.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Request a list of schemas. - * - * @param catalog The catalog. - * @param dbSchemaFilterPattern The schema filter pattern. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getSchemas( - final String catalog, final String dbSchemaFilterPattern, final CallOption... options) { - final CommandGetDbSchemas.Builder builder = CommandGetDbSchemas.newBuilder(); - - if (catalog != null) { - builder.setCatalog(catalog); - } - - if (dbSchemaFilterPattern != null) { - builder.setDbSchemaFilterPattern(dbSchemaFilterPattern); - } - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getSchemas(String, String, CallOption...)} from the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_SCHEMAS_SCHEMA}. - */ - public SchemaResult getSchemasSchema(final CallOption... options) { - final CommandGetDbSchemas command = CommandGetDbSchemas.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Get schema for a stream. - * - * @param descriptor The descriptor for the stream. - * @param options RPC-layer hints for this call. - */ - public SchemaResult getSchema(FlightDescriptor descriptor, CallOption... options) { - return client.getSchema(descriptor, options); - } - - /** - * Retrieve a stream from the server. - * - * @param ticket The ticket granting access to the data stream. - * @param options RPC-layer hints for this call. - */ - public FlightStream getStream(Ticket ticket, CallOption... options) { - return client.getStream(ticket, options); - } - - /** - * Request a set of Flight SQL metadata. - * - * @param info The set of metadata to retrieve. None to retrieve all metadata. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getSqlInfo(final SqlInfo... info) { - return getSqlInfo(info, new CallOption[0]); - } - - /** - * Request a set of Flight SQL metadata. - * - * @param info The set of metadata to retrieve. None to retrieve all metadata. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getSqlInfo(final SqlInfo[] info, final CallOption... options) { - final int[] infoNumbers = Arrays.stream(info).mapToInt(SqlInfo::getNumber).toArray(); - return getSqlInfo(infoNumbers, options); - } - - /** - * Request a set of Flight SQL metadata. Use this method if you would like to retrieve custom - * metadata, where the custom metadata key values start from 10_000. - * - * @param info The set of metadata to retrieve. None to retrieve all metadata. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getSqlInfo(final int[] info, final CallOption... options) { - return getSqlInfo(Arrays.stream(info).boxed().collect(Collectors.toList()), options); - } - - /** - * Request a set of Flight SQL metadata. Use this method if you would like to retrieve custom - * metadata, where the custom metadata key values start from 10_000. - * - * @param info The set of metadata to retrieve. None to retrieve all metadata. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getSqlInfo(final Iterable info, final CallOption... options) { - final CommandGetSqlInfo.Builder builder = CommandGetSqlInfo.newBuilder(); - builder.addAllInfo(info); - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getSqlInfo(SqlInfo...)} from the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_SQL_INFO_SCHEMA}. - */ - public SchemaResult getSqlInfoSchema(final CallOption... options) { - final CommandGetSqlInfo command = CommandGetSqlInfo.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Request the information about the data types supported related to a filter data type. - * - * @param dataType the data type to be used as filter. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getXdbcTypeInfo(final int dataType, final CallOption... options) { - final CommandGetXdbcTypeInfo.Builder builder = CommandGetXdbcTypeInfo.newBuilder(); - - builder.setDataType(dataType); - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Request the information about all the data types supported. - * - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getXdbcTypeInfo(final CallOption... options) { - final CommandGetXdbcTypeInfo.Builder builder = CommandGetXdbcTypeInfo.newBuilder(); - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getXdbcTypeInfo(CallOption...)} from the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_TYPE_INFO_SCHEMA}. - */ - public SchemaResult getXdbcTypeInfoSchema(final CallOption... options) { - final CommandGetXdbcTypeInfo command = CommandGetXdbcTypeInfo.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Request a list of tables. - * - * @param catalog The catalog. - * @param dbSchemaFilterPattern The schema filter pattern. - * @param tableFilterPattern The table filter pattern. - * @param tableTypes The table types to include. - * @param includeSchema True to include the schema upon return, false to not include the schema. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getTables( - final String catalog, - final String dbSchemaFilterPattern, - final String tableFilterPattern, - final List tableTypes, - final boolean includeSchema, - final CallOption... options) { - final CommandGetTables.Builder builder = CommandGetTables.newBuilder(); - - if (catalog != null) { - builder.setCatalog(catalog); - } - - if (dbSchemaFilterPattern != null) { - builder.setDbSchemaFilterPattern(dbSchemaFilterPattern); - } - - if (tableFilterPattern != null) { - builder.setTableNameFilterPattern(tableFilterPattern); - } - - if (tableTypes != null) { - builder.addAllTableTypes(tableTypes); - } - builder.setIncludeSchema(includeSchema); - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getTables(String, String, String, List, boolean, CallOption...)} from - * the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_TABLES_SCHEMA} or {@link - * FlightSqlProducer.Schemas#GET_TABLES_SCHEMA_NO_SCHEMA}. - */ - public SchemaResult getTablesSchema(boolean includeSchema, final CallOption... options) { - final CommandGetTables command = - CommandGetTables.newBuilder().setIncludeSchema(includeSchema).build(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Request the primary keys for a table. - * - * @param tableRef An object which hold info about catalog, dbSchema and table. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getPrimaryKeys(final TableRef tableRef, final CallOption... options) { - final CommandGetPrimaryKeys.Builder builder = CommandGetPrimaryKeys.newBuilder(); - - if (tableRef.getCatalog() != null) { - builder.setCatalog(tableRef.getCatalog()); - } - - if (tableRef.getDbSchema() != null) { - builder.setDbSchema(tableRef.getDbSchema()); - } - - Objects.requireNonNull(tableRef.getTable()); - builder.setTable(tableRef.getTable()); - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getPrimaryKeys(TableRef, CallOption...)} from the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_PRIMARY_KEYS_SCHEMA}. - */ - public SchemaResult getPrimaryKeysSchema(final CallOption... options) { - final CommandGetPrimaryKeys command = CommandGetPrimaryKeys.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Retrieves a description about the foreign key columns that reference the primary key columns of - * the given table. - * - * @param tableRef An object which hold info about catalog, dbSchema and table. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getExportedKeys(final TableRef tableRef, final CallOption... options) { - Objects.requireNonNull(tableRef.getTable(), "Table cannot be null."); - - final CommandGetExportedKeys.Builder builder = CommandGetExportedKeys.newBuilder(); - - if (tableRef.getCatalog() != null) { - builder.setCatalog(tableRef.getCatalog()); - } - - if (tableRef.getDbSchema() != null) { - builder.setDbSchema(tableRef.getDbSchema()); - } - - Objects.requireNonNull(tableRef.getTable()); - builder.setTable(tableRef.getTable()); - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getExportedKeys(TableRef, CallOption...)} from the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_EXPORTED_KEYS_SCHEMA}. - */ - public SchemaResult getExportedKeysSchema(final CallOption... options) { - final CommandGetExportedKeys command = CommandGetExportedKeys.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Retrieves the foreign key columns for the given table. - * - * @param tableRef An object which hold info about catalog, dbSchema and table. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getImportedKeys(final TableRef tableRef, final CallOption... options) { - Objects.requireNonNull(tableRef.getTable(), "Table cannot be null."); - - final CommandGetImportedKeys.Builder builder = CommandGetImportedKeys.newBuilder(); - - if (tableRef.getCatalog() != null) { - builder.setCatalog(tableRef.getCatalog()); - } - - if (tableRef.getDbSchema() != null) { - builder.setDbSchema(tableRef.getDbSchema()); - } - - Objects.requireNonNull(tableRef.getTable()); - builder.setTable(tableRef.getTable()); - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getImportedKeys(TableRef, CallOption...)} from the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_IMPORTED_KEYS_SCHEMA}. - */ - public SchemaResult getImportedKeysSchema(final CallOption... options) { - final CommandGetImportedKeys command = CommandGetImportedKeys.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Retrieves a description of the foreign key columns that reference the given table's primary key - * columns (the foreign keys exported by a table). - * - * @param pkTableRef An object which hold info about catalog, dbSchema and table from a primary - * table. - * @param fkTableRef An object which hold info about catalog, dbSchema and table from a foreign - * table. - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getCrossReference( - final TableRef pkTableRef, final TableRef fkTableRef, final CallOption... options) { - Objects.requireNonNull(pkTableRef.getTable(), "Parent Table cannot be null."); - Objects.requireNonNull(fkTableRef.getTable(), "Foreign Table cannot be null."); - - final CommandGetCrossReference.Builder builder = CommandGetCrossReference.newBuilder(); - - if (pkTableRef.getCatalog() != null) { - builder.setPkCatalog(pkTableRef.getCatalog()); - } - - if (pkTableRef.getDbSchema() != null) { - builder.setPkDbSchema(pkTableRef.getDbSchema()); - } - - if (fkTableRef.getCatalog() != null) { - builder.setFkCatalog(fkTableRef.getCatalog()); - } - - if (fkTableRef.getDbSchema() != null) { - builder.setFkDbSchema(fkTableRef.getDbSchema()); - } - - builder.setPkTable(pkTableRef.getTable()); - builder.setFkTable(fkTableRef.getTable()); - - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getCrossReference(TableRef, TableRef, CallOption...)} from the - * server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_CROSS_REFERENCE_SCHEMA}. - */ - public SchemaResult getCrossReferenceSchema(final CallOption... options) { - final CommandGetCrossReference command = CommandGetCrossReference.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Request a list of table types. - * - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo getTableTypes(final CallOption... options) { - final CommandGetTableTypes.Builder builder = CommandGetTableTypes.newBuilder(); - final FlightDescriptor descriptor = - FlightDescriptor.command(Any.pack(builder.build()).toByteArray()); - return client.getInfo(descriptor, options); - } - - /** - * Get the schema of {@link #getTableTypes(CallOption...)} from the server. - * - *

    Should be identical to {@link FlightSqlProducer.Schemas#GET_TABLE_TYPES_SCHEMA}. - */ - public SchemaResult getTableTypesSchema(final CallOption... options) { - final CommandGetTableTypes command = CommandGetTableTypes.getDefaultInstance(); - final FlightDescriptor descriptor = FlightDescriptor.command(Any.pack(command).toByteArray()); - return client.getSchema(descriptor, options); - } - - /** - * Create a prepared statement for a SQL query on the server. - * - * @param query The query to prepare. - * @param options RPC-layer hints for this call. - * @return The representation of the prepared statement which exists on the server. - */ - public PreparedStatement prepare(String query, CallOption... options) { - return prepare(query, /*transaction*/ null, options); - } - - /** - * Create a prepared statement for a SQL query on the server. - * - * @param query The query to prepare. - * @param transaction The transaction that this query is part of. - * @param options RPC-layer hints for this call. - * @return The representation of the prepared statement which exists on the server. - */ - public PreparedStatement prepare(String query, Transaction transaction, CallOption... options) { - ActionCreatePreparedStatementRequest.Builder builder = - ActionCreatePreparedStatementRequest.newBuilder().setQuery(query); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - return new PreparedStatement( - client, - new Action( - FlightSqlUtils.FLIGHT_SQL_CREATE_PREPARED_STATEMENT.getType(), - Any.pack(builder.build()).toByteArray()), - options); - } - - /** - * Create a prepared statement for a Substrait plan on the server. - * - * @param plan The query to prepare. - * @param options RPC-layer hints for this call. - * @return The representation of the prepared statement which exists on the server. - */ - public PreparedStatement prepare(SubstraitPlan plan, CallOption... options) { - return prepare(plan, /*transaction*/ null, options); - } - - /** - * Create a prepared statement for a Substrait plan on the server. - * - * @param plan The query to prepare. - * @param transaction The transaction that this query is part of. - * @param options RPC-layer hints for this call. - * @return The representation of the prepared statement which exists on the server. - */ - public PreparedStatement prepare( - SubstraitPlan plan, Transaction transaction, CallOption... options) { - ActionCreatePreparedSubstraitPlanRequest.Builder builder = - ActionCreatePreparedSubstraitPlanRequest.newBuilder(); - builder - .getPlanBuilder() - .setPlan(ByteString.copyFrom(plan.getPlan())) - .setVersion(plan.getVersion()); - if (transaction != null) { - builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId())); - } - return new PreparedStatement( - client, - new Action( - FlightSqlUtils.FLIGHT_SQL_CREATE_PREPARED_SUBSTRAIT_PLAN.getType(), - Any.pack(builder.build()).toByteArray()), - options); - } - - /** Begin a transaction. */ - public Transaction beginTransaction(CallOption... options) { - final Action action = - new Action( - FlightSqlUtils.FLIGHT_SQL_BEGIN_TRANSACTION.getType(), - Any.pack(ActionBeginTransactionRequest.getDefaultInstance()).toByteArray()); - final Iterator preparedStatementResults = client.doAction(action, options); - final ActionBeginTransactionResult result = - FlightSqlUtils.unpackAndParseOrThrow( - preparedStatementResults.next().getBody(), ActionBeginTransactionResult.class); - preparedStatementResults.forEachRemaining((ignored) -> {}); - if (result.getTransactionId().isEmpty()) { - throw CallStatus.INTERNAL - .withDescription("Server returned an empty transaction ID") - .toRuntimeException(); - } - return new Transaction(result.getTransactionId().toByteArray()); - } - - /** Create a savepoint within a transaction. */ - public Savepoint beginSavepoint(Transaction transaction, String name, CallOption... options) { - Preconditions.checkArgument( - transaction.getTransactionId().length != 0, "Transaction must be initialized"); - ActionBeginSavepointRequest request = - ActionBeginSavepointRequest.newBuilder() - .setTransactionId(ByteString.copyFrom(transaction.getTransactionId())) - .setName(name) - .build(); - final Action action = - new Action( - FlightSqlUtils.FLIGHT_SQL_BEGIN_SAVEPOINT.getType(), Any.pack(request).toByteArray()); - final Iterator preparedStatementResults = client.doAction(action, options); - final ActionBeginSavepointResult result = - FlightSqlUtils.unpackAndParseOrThrow( - preparedStatementResults.next().getBody(), ActionBeginSavepointResult.class); - preparedStatementResults.forEachRemaining((ignored) -> {}); - if (result.getSavepointId().isEmpty()) { - throw CallStatus.INTERNAL - .withDescription("Server returned an empty transaction ID") - .toRuntimeException(); - } - return new Savepoint(result.getSavepointId().toByteArray()); - } - - /** Commit a transaction. */ - public void commit(Transaction transaction, CallOption... options) { - Preconditions.checkArgument( - transaction.getTransactionId().length != 0, "Transaction must be initialized"); - ActionEndTransactionRequest request = - ActionEndTransactionRequest.newBuilder() - .setTransactionId(ByteString.copyFrom(transaction.getTransactionId())) - .setActionValue( - ActionEndTransactionRequest.EndTransaction.END_TRANSACTION_COMMIT.getNumber()) - .build(); - final Action action = - new Action( - FlightSqlUtils.FLIGHT_SQL_END_TRANSACTION.getType(), Any.pack(request).toByteArray()); - final Iterator preparedStatementResults = client.doAction(action, options); - preparedStatementResults.forEachRemaining((ignored) -> {}); - } - - /** Release a savepoint. */ - public void release(Savepoint savepoint, CallOption... options) { - Preconditions.checkArgument( - savepoint.getSavepointId().length != 0, "Savepoint must be initialized"); - ActionEndSavepointRequest request = - ActionEndSavepointRequest.newBuilder() - .setSavepointId(ByteString.copyFrom(savepoint.getSavepointId())) - .setActionValue( - ActionEndSavepointRequest.EndSavepoint.END_SAVEPOINT_RELEASE.getNumber()) - .build(); - final Action action = - new Action( - FlightSqlUtils.FLIGHT_SQL_END_SAVEPOINT.getType(), Any.pack(request).toByteArray()); - final Iterator preparedStatementResults = client.doAction(action, options); - preparedStatementResults.forEachRemaining((ignored) -> {}); - } - - /** Rollback a transaction. */ - public void rollback(Transaction transaction, CallOption... options) { - Preconditions.checkArgument( - transaction.getTransactionId().length != 0, "Transaction must be initialized"); - ActionEndTransactionRequest request = - ActionEndTransactionRequest.newBuilder() - .setTransactionId(ByteString.copyFrom(transaction.getTransactionId())) - .setActionValue( - ActionEndTransactionRequest.EndTransaction.END_TRANSACTION_ROLLBACK.getNumber()) - .build(); - final Action action = - new Action( - FlightSqlUtils.FLIGHT_SQL_END_TRANSACTION.getType(), Any.pack(request).toByteArray()); - final Iterator preparedStatementResults = client.doAction(action, options); - preparedStatementResults.forEachRemaining((ignored) -> {}); - } - - /** Rollback to a savepoint. */ - public void rollback(Savepoint savepoint, CallOption... options) { - Preconditions.checkArgument( - savepoint.getSavepointId().length != 0, "Savepoint must be initialized"); - ActionEndSavepointRequest request = - ActionEndSavepointRequest.newBuilder() - .setSavepointId(ByteString.copyFrom(savepoint.getSavepointId())) - .setActionValue( - ActionEndSavepointRequest.EndSavepoint.END_SAVEPOINT_RELEASE.getNumber()) - .build(); - final Action action = - new Action( - FlightSqlUtils.FLIGHT_SQL_END_SAVEPOINT.getType(), Any.pack(request).toByteArray()); - final Iterator preparedStatementResults = client.doAction(action, options); - preparedStatementResults.forEachRemaining((ignored) -> {}); - } - - /** - * Cancel execution of a distributed query. - * - * @param request The query to cancel. - * @param options Call options. - * @return The server response. - */ - public CancelFlightInfoResult cancelFlightInfo( - CancelFlightInfoRequest request, CallOption... options) { - return client.cancelFlightInfo(request, options); - } - - /** - * Explicitly cancel a running query. - * - *

    This lets a single client explicitly cancel work, no matter how many clients are - * involved/whether the query is distributed or not, given server support. The - * transaction/statement is not rolled back; it is the application's job to commit or rollback as - * appropriate. This only indicates the client no longer wishes to read the remainder of the query - * results or continue submitting data. - * - * @deprecated Prefer {@link #cancelFlightInfo}. - */ - @Deprecated - public CancelResult cancelQuery(FlightInfo info, CallOption... options) { - ActionCancelQueryRequest request = - ActionCancelQueryRequest.newBuilder() - .setInfo(ByteString.copyFrom(info.serialize())) - .build(); - final Action action = - new Action( - FlightSqlUtils.FLIGHT_SQL_CANCEL_QUERY.getType(), Any.pack(request).toByteArray()); - final Iterator preparedStatementResults = client.doAction(action, options); - final ActionCancelQueryResult result = - FlightSqlUtils.unpackAndParseOrThrow( - preparedStatementResults.next().getBody(), ActionCancelQueryResult.class); - preparedStatementResults.forEachRemaining((ignored) -> {}); - switch (result.getResult()) { - case CANCEL_RESULT_UNSPECIFIED: - return CancelResult.UNSPECIFIED; - case CANCEL_RESULT_CANCELLED: - return CancelResult.CANCELLED; - case CANCEL_RESULT_CANCELLING: - return CancelResult.CANCELLING; - case CANCEL_RESULT_NOT_CANCELLABLE: - return CancelResult.NOT_CANCELLABLE; - case UNRECOGNIZED: - default: - throw CallStatus.INTERNAL - .withDescription("Unknown result: " + result.getResult()) - .toRuntimeException(); - } - } - - /** - * Request the server to extend the lifetime of a query result set. - * - * @param request The result set partition. - * @param options Call options. - * @return The new endpoint with an updated expiration time. - */ - public FlightEndpoint renewFlightEndpoint( - RenewFlightEndpointRequest request, CallOption... options) { - return client.renewFlightEndpoint(request, options); - } - - public SetSessionOptionsResult setSessionOptions( - SetSessionOptionsRequest request, CallOption... options) { - return client.setSessionOptions(request, options); - } - - public GetSessionOptionsResult getSessionOptions( - GetSessionOptionsRequest request, CallOption... options) { - return client.getSessionOptions(request, options); - } - - public CloseSessionResult closeSession(CloseSessionRequest request, CallOption... options) { - return client.closeSession(request, options); - } - - @Override - public void close() throws Exception { - AutoCloseables.close(client); - } - - /** Class to encapsulate Flight SQL bulk ingest request options. * */ - public static class ExecuteIngestOptions { - private final String table; - private final TableDefinitionOptions tableDefinitionOptions; - private final boolean useTemporaryTable; - private final String catalog; - private final String schema; - private final Map options; - - /** - * Constructor. - * - * @param table The table to load data into. - * @param tableDefinitionOptions The behavior for handling the table definition. - * @param catalog The catalog of the destination table to load data into. If null, a - * backend-specific default may be used. - * @param schema The schema of the destination table to load data into. If null, a - * backend-specific default may be used. - * @param options Backend-specific options. Can be null if there are no options to be set. - */ - public ExecuteIngestOptions( - String table, - TableDefinitionOptions tableDefinitionOptions, - String catalog, - String schema, - Map options) { - this(table, tableDefinitionOptions, false, catalog, schema, options); - } - - /** - * Constructor. - * - * @param table The table to load data into. - * @param tableDefinitionOptions The behavior for handling the table definition. - * @param useTemporaryTable Use a temporary table for bulk ingestion. Temporary table may get - * placed in a backend-specific schema and/or catalog and gets dropped at the end of the - * session. If backend does not support ingesting using a temporary table or an explicit - * choice of schema or catalog is incompatible with the server's namespacing decision, an - * error is returned as part of {@link #executeIngest} request. - * @param catalog The catalog of the destination table to load data into. If null, a - * backend-specific default may be used. - * @param schema The schema of the destination table to load data into. If null, a - * backend-specific default may be used. - * @param options Backend-specific options. Can be null if there are no options to be set. - */ - public ExecuteIngestOptions( - String table, - TableDefinitionOptions tableDefinitionOptions, - boolean useTemporaryTable, - String catalog, - String schema, - Map options) { - this.table = table; - this.tableDefinitionOptions = tableDefinitionOptions; - this.useTemporaryTable = useTemporaryTable; - this.catalog = catalog; - this.schema = schema; - this.options = options; - } - - protected void updateCommandBuilder(CommandStatementIngest.Builder builder) { - builder.setTable(table); - builder.setTableDefinitionOptions(tableDefinitionOptions); - builder.setTemporary(useTemporaryTable); - if (!isNull(catalog)) { - builder.setCatalog(catalog); - } - if (!isNull(schema)) { - builder.setSchema(schema); - } - if (!isNull(options)) { - builder.putAllOptions(options); - } - } - } - - /** Helper class to encapsulate Flight SQL prepared statement logic. */ - public static class PreparedStatement implements AutoCloseable { - private final FlightClient client; - private final ActionCreatePreparedStatementResult preparedStatementResult; - private VectorSchemaRoot parameterBindingRoot; - private boolean isClosed; - private Schema resultSetSchema; - private Schema parameterSchema; - - PreparedStatement(FlightClient client, Action action, CallOption... options) { - this.client = client; - - final Iterator preparedStatementResults = client.doAction(action, options); - preparedStatementResult = - FlightSqlUtils.unpackAndParseOrThrow( - preparedStatementResults.next().getBody(), ActionCreatePreparedStatementResult.class); - isClosed = false; - } - - /** - * Set the {@link #parameterBindingRoot} containing the parameter binding from a {@link - * PreparedStatement} operation. - * - * @param parameterBindingRoot a {@code VectorSchemaRoot} object containing the values to be - * used in the {@code PreparedStatement} setters. - */ - public void setParameters(final VectorSchemaRoot parameterBindingRoot) { - if (parameterBindingRoot == this.parameterBindingRoot) { - // Nothing to do if we're attempting to set the same parameters again. - return; - } - clearParameters(); - this.parameterBindingRoot = parameterBindingRoot; - } - - /** - * Closes the {@link #parameterBindingRoot}, which contains the parameter binding from a {@link - * PreparedStatement} operation, releasing its resources. - */ - public void clearParameters() { - if (parameterBindingRoot != null) { - parameterBindingRoot.close(); - } - } - - /** - * Returns the Schema of the resultset. - * - * @return the Schema of the resultset. - */ - public Schema getResultSetSchema() { - if (resultSetSchema == null) { - final ByteString bytes = preparedStatementResult.getDatasetSchema(); - resultSetSchema = deserializeSchema(bytes); - } - return resultSetSchema; - } - - /** - * Returns the Schema of the parameters. - * - * @return the Schema of the parameters. - */ - public Schema getParameterSchema() { - if (parameterSchema == null) { - final ByteString bytes = preparedStatementResult.getParameterSchema(); - parameterSchema = deserializeSchema(bytes); - } - return parameterSchema; - } - - /** Get the schema of the result set (should be identical to {@link #getResultSetSchema()}). */ - public SchemaResult fetchSchema(CallOption... options) { - checkOpen(); - - final FlightDescriptor descriptor = - FlightDescriptor.command( - Any.pack( - CommandPreparedStatementQuery.newBuilder() - .setPreparedStatementHandle( - preparedStatementResult.getPreparedStatementHandle()) - .build()) - .toByteArray()); - return client.getSchema(descriptor, options); - } - - private Schema deserializeSchema(final ByteString bytes) { - try { - return bytes.isEmpty() - ? new Schema(Collections.emptyList()) - : MessageSerializer.deserializeSchema( - new ReadChannel( - Channels.newChannel(new ByteArrayInputStream(bytes.toByteArray())))); - } catch (final IOException e) { - throw new RuntimeException("Failed to deserialize schema", e); - } - } - - /** - * Executes the prepared statement query on the server. - * - * @param options RPC-layer hints for this call. - * @return a FlightInfo object representing the stream(s) to fetch. - */ - public FlightInfo execute(final CallOption... options) { - checkOpen(); - - FlightDescriptor descriptor = - FlightDescriptor.command( - Any.pack( - CommandPreparedStatementQuery.newBuilder() - .setPreparedStatementHandle( - preparedStatementResult.getPreparedStatementHandle()) - .build()) - .toByteArray()); - - if (parameterBindingRoot != null && parameterBindingRoot.getRowCount() > 0) { - try (final SyncPutListener putListener = putParameters(descriptor, options)) { - if (getParameterSchema().getFields().size() > 0 - && parameterBindingRoot != null - && parameterBindingRoot.getRowCount() > 0) { - final PutResult read = putListener.read(); - if (read != null) { - try (final ArrowBuf metadata = read.getApplicationMetadata()) { - final FlightSql.DoPutPreparedStatementResult doPutPreparedStatementResult = - FlightSql.DoPutPreparedStatementResult.parseFrom(metadata.nioBuffer()); - descriptor = - FlightDescriptor.command( - Any.pack( - CommandPreparedStatementQuery.newBuilder() - .setPreparedStatementHandle( - doPutPreparedStatementResult.getPreparedStatementHandle()) - .build()) - .toByteArray()); - } - } - } - } catch (final InterruptedException e) { - throw CallStatus.CANCELLED.withCause(e).toRuntimeException(); - } catch (final ExecutionException e) { - throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException(); - } catch (final InvalidProtocolBufferException e) { - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } - } - - return client.getInfo(descriptor, options); - } - - private SyncPutListener putParameters(FlightDescriptor descriptor, CallOption... options) { - final SyncPutListener putListener = new SyncPutListener(); - - FlightClient.ClientStreamListener listener = - client.startPut(descriptor, parameterBindingRoot, putListener, options); - - listener.putNext(); - listener.completed(); - listener.getResult(); - - return putListener; - } - - /** - * Checks whether this client is open. - * - * @throws IllegalStateException if client is closed. - */ - protected final void checkOpen() { - Preconditions.checkState(!isClosed, "Statement closed"); - } - - /** - * Executes the prepared statement update on the server. - * - * @param options RPC-layer hints for this call. - * @return the count of updated records - */ - public long executeUpdate(final CallOption... options) { - checkOpen(); - final FlightDescriptor descriptor = - FlightDescriptor.command( - Any.pack( - CommandPreparedStatementUpdate.newBuilder() - .setPreparedStatementHandle( - preparedStatementResult.getPreparedStatementHandle()) - .build()) - .toByteArray()); - setParameters(parameterBindingRoot == null ? VectorSchemaRoot.of() : parameterBindingRoot); - SyncPutListener putListener = putParameters(descriptor, options); - - try { - final PutResult read = putListener.read(); - try (final ArrowBuf metadata = read.getApplicationMetadata()) { - final DoPutUpdateResult doPutUpdateResult = - DoPutUpdateResult.parseFrom(metadata.nioBuffer()); - return doPutUpdateResult.getRecordCount(); - } - } catch (final InterruptedException e) { - throw CallStatus.CANCELLED.withCause(e).toRuntimeException(); - } catch (final ExecutionException e) { - throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException(); - } catch (final InvalidProtocolBufferException e) { - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } - } - - /** - * Closes the client. - * - * @param options RPC-layer hints for this call. - */ - public void close(final CallOption... options) { - if (isClosed) { - return; - } - isClosed = true; - final Action action = - new Action( - FlightSqlUtils.FLIGHT_SQL_CLOSE_PREPARED_STATEMENT.getType(), - Any.pack( - ActionClosePreparedStatementRequest.newBuilder() - .setPreparedStatementHandle( - preparedStatementResult.getPreparedStatementHandle()) - .build()) - .toByteArray()); - final Iterator closePreparedStatementResults = client.doAction(action, options); - closePreparedStatementResults.forEachRemaining(result -> {}); - clearParameters(); - } - - @Override - public void close() { - close(new CallOption[0]); - } - - /** - * Returns if the prepared statement is already closed. - * - * @return true if the prepared statement is already closed. - */ - public boolean isClosed() { - return isClosed; - } - } - - /** A handle for an active savepoint. */ - public static class Savepoint { - private final byte[] transactionId; - - public Savepoint(byte[] transactionId) { - this.transactionId = transactionId; - } - - public byte[] getSavepointId() { - return transactionId; - } - } - - /** A handle for an active transaction. */ - public static class Transaction { - private final byte[] transactionId; - - public Transaction(byte[] transactionId) { - this.transactionId = transactionId; - } - - public byte[] getTransactionId() { - return transactionId; - } - } - - /** A wrapper around a Substrait plan and a Substrait version. */ - public static final class SubstraitPlan { - private final byte[] plan; - private final String version; - - public SubstraitPlan(byte[] plan, String version) { - this.plan = Preconditions.checkNotNull(plan); - this.version = Preconditions.checkNotNull(version); - } - - public byte[] getPlan() { - return plan; - } - - public String getVersion() { - return version; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - SubstraitPlan that = (SubstraitPlan) o; - - if (!Arrays.equals(getPlan(), that.getPlan())) { - return false; - } - return getVersion().equals(that.getVersion()); - } - - @Override - public int hashCode() { - int result = Arrays.hashCode(getPlan()); - result = 31 * result + getVersion().hashCode(); - return result; - } - - @Override - public String toString() { - return "SubstraitPlan{" - + "plan=" - + Arrays.toString(plan) - + ", version='" - + version - + '\'' - + '}'; - } - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlColumnMetadata.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlColumnMetadata.java deleted file mode 100644 index 1bcc55a6605b5..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlColumnMetadata.java +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -/** - * Metadata for a column in a Flight SQL query. - * - *

    This can be used with FlightSqlClient to access column's metadata contained in schemas - * returned by GetTables and query execution as follows: - * - *

    - *   FlightSqlColumnMetadata metadata = new FlightSqlColumnMetadata(field.getMetadata());
    - *   Integer precision = metadata.getPrecision();
    - * 
    - * - * FlightSqlProducer can use this to set metadata on a column in a schema as follows: - * - *
    - *   FlightSqlColumnMetadata metadata = new FlightSqlColumnMetadata.Builder()
    - *         .precision(10)
    - *         .scale(5)
    - *         .build();
    - *   Field field = new Field("column", new FieldType(..., metadata.getMetadataMap()), null);
    - * 
    - */ -public class FlightSqlColumnMetadata { - - private static final String CATALOG_NAME = "ARROW:FLIGHT:SQL:CATALOG_NAME"; - private static final String SCHEMA_NAME = "ARROW:FLIGHT:SQL:SCHEMA_NAME"; - private static final String TABLE_NAME = "ARROW:FLIGHT:SQL:TABLE_NAME"; - private static final String TYPE_NAME = "ARROW:FLIGHT:SQL:TYPE_NAME"; - private static final String PRECISION = "ARROW:FLIGHT:SQL:PRECISION"; - private static final String SCALE = "ARROW:FLIGHT:SQL:SCALE"; - private static final String IS_AUTO_INCREMENT = "ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT"; - private static final String IS_CASE_SENSITIVE = "ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE"; - private static final String IS_READ_ONLY = "ARROW:FLIGHT:SQL:IS_READ_ONLY"; - private static final String IS_SEARCHABLE = "ARROW:FLIGHT:SQL:IS_SEARCHABLE"; - - private static final String BOOLEAN_TRUE_STR = "1"; - private static final String BOOLEAN_FALSE_STR = "0"; - - private final Map metadataMap; - - /** Creates a new instance of FlightSqlColumnMetadata. */ - public FlightSqlColumnMetadata(Map metadataMap) { - this.metadataMap = new HashMap<>(metadataMap); - } - - /** - * Returns the metadata map. - * - * @return The metadata map. - */ - public Map getMetadataMap() { - return Collections.unmodifiableMap(metadataMap); - } - - /** - * Returns the catalog name. - * - * @return The catalog name. - */ - public String getCatalogName() { - return metadataMap.get(CATALOG_NAME); - } - - /** - * Returns the schema name. - * - * @return The schema name. - */ - public String getSchemaName() { - return metadataMap.get(SCHEMA_NAME); - } - - /** - * Returns the table name. - * - * @return The table name. - */ - public String getTableName() { - return metadataMap.get(TABLE_NAME); - } - - /** - * Returns the type name. - * - * @return The type name. - */ - public String getTypeName() { - return metadataMap.get(TYPE_NAME); - } - - /** - * Returns the precision / column size. - * - * @return The precision / column size. - */ - public Integer getPrecision() { - String value = metadataMap.get(PRECISION); - if (value == null) { - return null; - } - - return Integer.valueOf(value); - } - - /** - * Returns the scale / decimal digits. - * - * @return The scale / decimal digits. - */ - public Integer getScale() { - String value = metadataMap.get(SCALE); - if (value == null) { - return null; - } - - return Integer.valueOf(value); - } - - /** - * Returns if the column is auto incremented. - * - * @return True if the column is auto incremented, false otherwise. - */ - public Boolean isAutoIncrement() { - String value = metadataMap.get(IS_AUTO_INCREMENT); - if (value == null) { - return null; - } - - return stringToBoolean(value); - } - - /** - * Returns if the column is case-sensitive. - * - * @return True if the column is case-sensitive, false otherwise. - */ - public Boolean isCaseSensitive() { - String value = metadataMap.get(IS_CASE_SENSITIVE); - if (value == null) { - return null; - } - - return stringToBoolean(value); - } - - /** - * Returns if the column is read only. - * - * @return True if the column is read only, false otherwise. - */ - public Boolean isReadOnly() { - String value = metadataMap.get(IS_READ_ONLY); - if (value == null) { - return null; - } - - return stringToBoolean(value); - } - - /** - * Returns if the column is searchable. - * - * @return True if the column is searchable, false otherwise. - */ - public Boolean isSearchable() { - String value = metadataMap.get(IS_SEARCHABLE); - if (value == null) { - return null; - } - - return stringToBoolean(value); - } - - /** Builder of FlightSqlColumnMetadata, used on FlightSqlProducer implementations. */ - public static class Builder { - private final Map metadataMap; - - /** Creates a new instance of FlightSqlColumnMetadata.Builder. */ - public Builder() { - this.metadataMap = new HashMap<>(); - } - - /** - * Sets the catalog name. - * - * @param catalogName the catalog name. - * @return This builder. - */ - public Builder catalogName(String catalogName) { - metadataMap.put(CATALOG_NAME, catalogName); - return this; - } - - /** - * Sets the schema name. - * - * @param schemaName The schema name. - * @return This builder. - */ - public Builder schemaName(String schemaName) { - metadataMap.put(SCHEMA_NAME, schemaName); - return this; - } - - /** - * Sets the table name. - * - * @param tableName The table name. - * @return This builder. - */ - public Builder tableName(String tableName) { - metadataMap.put(TABLE_NAME, tableName); - return this; - } - - /** - * Sets the type name. - * - * @param typeName The type name. - * @return This builder. - */ - public Builder typeName(String typeName) { - metadataMap.put(TYPE_NAME, typeName); - return this; - } - - /** - * Sets the precision / column size. - * - * @param precision The precision / column size. - * @return This builder. - */ - public Builder precision(int precision) { - metadataMap.put(PRECISION, Integer.toString(precision)); - return this; - } - - /** - * Sets the scale / decimal digits. - * - * @param scale The scale / decimal digits. - * @return This builder. - */ - public Builder scale(int scale) { - metadataMap.put(SCALE, Integer.toString(scale)); - return this; - } - - /** - * Sets if the column is auto incremented. - * - * @param isAutoIncrement True if the column is auto incremented. - * @return This builder. - */ - public Builder isAutoIncrement(boolean isAutoIncrement) { - metadataMap.put(IS_AUTO_INCREMENT, booleanToString(isAutoIncrement)); - return this; - } - - /** - * Sets if the column is case-sensitive. - * - * @param isCaseSensitive If the column is case-sensitive. - * @return This builder. - */ - public Builder isCaseSensitive(boolean isCaseSensitive) { - metadataMap.put(IS_CASE_SENSITIVE, booleanToString(isCaseSensitive)); - return this; - } - - /** - * Sets if the column is read only. - * - * @param isReadOnly If the column is read only. - * @return This builder. - */ - public Builder isReadOnly(boolean isReadOnly) { - metadataMap.put(IS_READ_ONLY, booleanToString(isReadOnly)); - return this; - } - - /** - * Sets if the column is searchable. - * - * @param isSearchable If the column is searchable. - * @return This builder. - */ - public Builder isSearchable(boolean isSearchable) { - metadataMap.put(IS_SEARCHABLE, booleanToString(isSearchable)); - return this; - } - - /** - * Builds a new instance of FlightSqlColumnMetadata. - * - * @return A new instance of FlightSqlColumnMetadata. - */ - public FlightSqlColumnMetadata build() { - return new FlightSqlColumnMetadata(metadataMap); - } - } - - private static String booleanToString(boolean boolValue) { - return boolValue ? BOOLEAN_TRUE_STR : BOOLEAN_FALSE_STR; - } - - private static boolean stringToBoolean(String value) { - return value.equals(BOOLEAN_TRUE_STR); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java deleted file mode 100644 index 9465e5ff88053..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java +++ /dev/null @@ -1,1207 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import static java.util.Arrays.asList; -import static java.util.Collections.singletonList; -import static java.util.stream.IntStream.range; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointResult; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginTransactionRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginTransactionResult; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionCancelQueryRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedSubstraitPlanRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionEndSavepointRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.ActionEndTransactionRequest; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCrossReference; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetDbSchemas; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetExportedKeys; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetImportedKeys; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetXdbcTypeInfo; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementSubstraitPlan; -import static org.apache.arrow.vector.complex.MapVector.DATA_VECTOR_NAME; -import static org.apache.arrow.vector.complex.MapVector.KEY_NAME; -import static org.apache.arrow.vector.complex.MapVector.VALUE_NAME; -import static org.apache.arrow.vector.types.Types.MinorType.BIGINT; -import static org.apache.arrow.vector.types.Types.MinorType.BIT; -import static org.apache.arrow.vector.types.Types.MinorType.INT; -import static org.apache.arrow.vector.types.Types.MinorType.LIST; -import static org.apache.arrow.vector.types.Types.MinorType.STRUCT; -import static org.apache.arrow.vector.types.Types.MinorType.UINT4; -import static org.apache.arrow.vector.types.Types.MinorType.VARCHAR; - -import com.google.common.collect.ImmutableList; -import com.google.protobuf.Any; -import com.google.protobuf.InvalidProtocolBufferException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.ByteBuffer; -import java.util.List; -import org.apache.arrow.flight.Action; -import org.apache.arrow.flight.ActionType; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.CancelFlightInfoRequest; -import org.apache.arrow.flight.CancelStatus; -import org.apache.arrow.flight.CloseSessionRequest; -import org.apache.arrow.flight.CloseSessionResult; -import org.apache.arrow.flight.FlightConstants; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.GetSessionOptionsRequest; -import org.apache.arrow.flight.GetSessionOptionsResult; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.RenewFlightEndpointRequest; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.SchemaResult; -import org.apache.arrow.flight.SetSessionOptionsRequest; -import org.apache.arrow.flight.SetSessionOptionsResult; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionClosePreparedStatementRequest; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementRequest; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCatalogs; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetPrimaryKeys; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetSqlInfo; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTableTypes; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate; -import org.apache.arrow.flight.sql.impl.FlightSql.DoPutUpdateResult; -import org.apache.arrow.flight.sql.impl.FlightSql.TicketStatementQuery; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.Union; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; - -/** API to Implement an Arrow Flight SQL producer. */ -public interface FlightSqlProducer extends FlightProducer, AutoCloseable { - /** - * Depending on the provided command, method either: 1. Return information about a SQL query, or - * 2. Return information about a prepared statement. In this case, parameters binding is allowed. - * - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return information about the given SQL query, or the given prepared statement. - */ - @Override - default FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) { - final Any command = FlightSqlUtils.parseOrThrow(descriptor.getCommand()); - - if (command.is(CommandStatementQuery.class)) { - return getFlightInfoStatement( - FlightSqlUtils.unpackOrThrow(command, CommandStatementQuery.class), context, descriptor); - } else if (command.is(CommandStatementSubstraitPlan.class)) { - return getFlightInfoSubstraitPlan( - FlightSqlUtils.unpackOrThrow(command, CommandStatementSubstraitPlan.class), - context, - descriptor); - } else if (command.is(CommandPreparedStatementQuery.class)) { - return getFlightInfoPreparedStatement( - FlightSqlUtils.unpackOrThrow(command, CommandPreparedStatementQuery.class), - context, - descriptor); - } else if (command.is(CommandGetCatalogs.class)) { - return getFlightInfoCatalogs( - FlightSqlUtils.unpackOrThrow(command, CommandGetCatalogs.class), context, descriptor); - } else if (command.is(CommandGetDbSchemas.class)) { - return getFlightInfoSchemas( - FlightSqlUtils.unpackOrThrow(command, CommandGetDbSchemas.class), context, descriptor); - } else if (command.is(CommandGetTables.class)) { - return getFlightInfoTables( - FlightSqlUtils.unpackOrThrow(command, CommandGetTables.class), context, descriptor); - } else if (command.is(CommandGetTableTypes.class)) { - return getFlightInfoTableTypes( - FlightSqlUtils.unpackOrThrow(command, CommandGetTableTypes.class), context, descriptor); - } else if (command.is(CommandGetSqlInfo.class)) { - return getFlightInfoSqlInfo( - FlightSqlUtils.unpackOrThrow(command, CommandGetSqlInfo.class), context, descriptor); - } else if (command.is(CommandGetPrimaryKeys.class)) { - return getFlightInfoPrimaryKeys( - FlightSqlUtils.unpackOrThrow(command, CommandGetPrimaryKeys.class), context, descriptor); - } else if (command.is(CommandGetExportedKeys.class)) { - return getFlightInfoExportedKeys( - FlightSqlUtils.unpackOrThrow(command, CommandGetExportedKeys.class), context, descriptor); - } else if (command.is(CommandGetImportedKeys.class)) { - return getFlightInfoImportedKeys( - FlightSqlUtils.unpackOrThrow(command, CommandGetImportedKeys.class), context, descriptor); - } else if (command.is(CommandGetCrossReference.class)) { - return getFlightInfoCrossReference( - FlightSqlUtils.unpackOrThrow(command, CommandGetCrossReference.class), - context, - descriptor); - } else if (command.is(CommandGetXdbcTypeInfo.class)) { - return getFlightInfoTypeInfo( - FlightSqlUtils.unpackOrThrow(command, CommandGetXdbcTypeInfo.class), context, descriptor); - } - - throw CallStatus.INVALID_ARGUMENT - .withDescription("Unrecognized request: " + command.getTypeUrl()) - .toRuntimeException(); - } - - /** - * Returns the schema of the result produced by the SQL query. - * - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return the result set schema. - */ - @Override - default SchemaResult getSchema(CallContext context, FlightDescriptor descriptor) { - final Any command = FlightSqlUtils.parseOrThrow(descriptor.getCommand()); - - if (command.is(CommandStatementQuery.class)) { - return getSchemaStatement( - FlightSqlUtils.unpackOrThrow(command, CommandStatementQuery.class), context, descriptor); - } else if (command.is(CommandPreparedStatementQuery.class)) { - return getSchemaPreparedStatement( - FlightSqlUtils.unpackOrThrow(command, CommandPreparedStatementQuery.class), - context, - descriptor); - } else if (command.is(CommandStatementSubstraitPlan.class)) { - return getSchemaSubstraitPlan( - FlightSqlUtils.unpackOrThrow(command, CommandStatementSubstraitPlan.class), - context, - descriptor); - } else if (command.is(CommandGetCatalogs.class)) { - return new SchemaResult(Schemas.GET_CATALOGS_SCHEMA); - } else if (command.is(CommandGetCrossReference.class)) { - return new SchemaResult(Schemas.GET_CROSS_REFERENCE_SCHEMA); - } else if (command.is(CommandGetDbSchemas.class)) { - return new SchemaResult(Schemas.GET_SCHEMAS_SCHEMA); - } else if (command.is(CommandGetExportedKeys.class)) { - return new SchemaResult(Schemas.GET_EXPORTED_KEYS_SCHEMA); - } else if (command.is(CommandGetImportedKeys.class)) { - return new SchemaResult(Schemas.GET_IMPORTED_KEYS_SCHEMA); - } else if (command.is(CommandGetPrimaryKeys.class)) { - return new SchemaResult(Schemas.GET_PRIMARY_KEYS_SCHEMA); - } else if (command.is(CommandGetTables.class)) { - if (FlightSqlUtils.unpackOrThrow(command, CommandGetTables.class).getIncludeSchema()) { - return new SchemaResult(Schemas.GET_TABLES_SCHEMA); - } - return new SchemaResult(Schemas.GET_TABLES_SCHEMA_NO_SCHEMA); - } else if (command.is(CommandGetTableTypes.class)) { - return new SchemaResult(Schemas.GET_TABLE_TYPES_SCHEMA); - } else if (command.is(CommandGetSqlInfo.class)) { - return new SchemaResult(Schemas.GET_SQL_INFO_SCHEMA); - } else if (command.is(CommandGetXdbcTypeInfo.class)) { - return new SchemaResult(Schemas.GET_TYPE_INFO_SCHEMA); - } - - throw CallStatus.INVALID_ARGUMENT - .withDescription("Unrecognized request: " + command.getTypeUrl()) - .toRuntimeException(); - } - - /** - * Depending on the provided command, method either: 1. Return data for a stream produced by - * executing the provided SQL query, or 2. Return data for a prepared statement. In this case, - * parameters binding is allowed. - * - * @param context Per-call context. - * @param ticket The application-defined ticket identifying this stream. - * @param listener An interface for sending data back to the client. - */ - @Override - default void getStream(CallContext context, Ticket ticket, ServerStreamListener listener) { - final Any command; - - try { - command = Any.parseFrom(ticket.getBytes()); - } catch (InvalidProtocolBufferException e) { - listener.error(e); - return; - } - - if (command.is(TicketStatementQuery.class)) { - getStreamStatement( - FlightSqlUtils.unpackOrThrow(command, TicketStatementQuery.class), context, listener); - } else if (command.is(CommandPreparedStatementQuery.class)) { - getStreamPreparedStatement( - FlightSqlUtils.unpackOrThrow(command, CommandPreparedStatementQuery.class), - context, - listener); - } else if (command.is(CommandGetCatalogs.class)) { - getStreamCatalogs(context, listener); - } else if (command.is(CommandGetDbSchemas.class)) { - getStreamSchemas( - FlightSqlUtils.unpackOrThrow(command, CommandGetDbSchemas.class), context, listener); - } else if (command.is(CommandGetTables.class)) { - getStreamTables( - FlightSqlUtils.unpackOrThrow(command, CommandGetTables.class), context, listener); - } else if (command.is(CommandGetTableTypes.class)) { - getStreamTableTypes(context, listener); - } else if (command.is(CommandGetSqlInfo.class)) { - getStreamSqlInfo( - FlightSqlUtils.unpackOrThrow(command, CommandGetSqlInfo.class), context, listener); - } else if (command.is(CommandGetPrimaryKeys.class)) { - getStreamPrimaryKeys( - FlightSqlUtils.unpackOrThrow(command, CommandGetPrimaryKeys.class), context, listener); - } else if (command.is(CommandGetExportedKeys.class)) { - getStreamExportedKeys( - FlightSqlUtils.unpackOrThrow(command, CommandGetExportedKeys.class), context, listener); - } else if (command.is(CommandGetImportedKeys.class)) { - getStreamImportedKeys( - FlightSqlUtils.unpackOrThrow(command, CommandGetImportedKeys.class), context, listener); - } else if (command.is(CommandGetCrossReference.class)) { - getStreamCrossReference( - FlightSqlUtils.unpackOrThrow(command, CommandGetCrossReference.class), context, listener); - } else if (command.is(CommandGetXdbcTypeInfo.class)) { - getStreamTypeInfo( - FlightSqlUtils.unpackOrThrow(command, CommandGetXdbcTypeInfo.class), context, listener); - } else { - throw CallStatus.INVALID_ARGUMENT - .withDescription("The defined request is invalid.") - .toRuntimeException(); - } - } - - /** - * Depending on the provided command, method either: 1. Execute provided SQL query as an update - * statement, or 2. Execute provided update SQL query prepared statement. In this case, parameters - * binding is allowed, or 3. Binds parameters to the provided prepared statement, or 4. Bulk - * ingests data provided through the flightStream. - * - * @param context Per-call context. - * @param flightStream The data stream being uploaded. - * @param ackStream The data stream listener for update result acknowledgement. - * @return a Runnable to process the stream. - */ - @Override - default Runnable acceptPut( - CallContext context, FlightStream flightStream, StreamListener ackStream) { - final Any command = FlightSqlUtils.parseOrThrow(flightStream.getDescriptor().getCommand()); - - if (command.is(CommandStatementUpdate.class)) { - return acceptPutStatement( - FlightSqlUtils.unpackOrThrow(command, CommandStatementUpdate.class), - context, - flightStream, - ackStream); - } else if (command.is(CommandStatementIngest.class)) { - return acceptPutStatementBulkIngest( - FlightSqlUtils.unpackOrThrow(command, CommandStatementIngest.class), - context, - flightStream, - ackStream); - } else if (command.is(CommandStatementSubstraitPlan.class)) { - return acceptPutSubstraitPlan( - FlightSqlUtils.unpackOrThrow(command, CommandStatementSubstraitPlan.class), - context, - flightStream, - ackStream); - } else if (command.is(CommandPreparedStatementUpdate.class)) { - return acceptPutPreparedStatementUpdate( - FlightSqlUtils.unpackOrThrow(command, CommandPreparedStatementUpdate.class), - context, - flightStream, - ackStream); - } else if (command.is(CommandPreparedStatementQuery.class)) { - return acceptPutPreparedStatementQuery( - FlightSqlUtils.unpackOrThrow(command, CommandPreparedStatementQuery.class), - context, - flightStream, - ackStream); - } - - throw CallStatus.INVALID_ARGUMENT - .withDescription("The defined request is invalid.") - .toRuntimeException(); - } - - /** - * Lists all available Flight SQL actions. - * - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - @Override - default void listActions(CallContext context, StreamListener listener) { - FlightSqlUtils.FLIGHT_SQL_ACTIONS.forEach(listener::onNext); - listener.onCompleted(); - } - - /** - * Performs the requested Flight SQL action. - * - * @param context Per-call context. - * @param action Client-supplied parameters. - * @param listener A stream of responses. - */ - @Override - default void doAction(CallContext context, Action action, StreamListener listener) { - final String actionType = action.getType(); - - if (actionType.equals(FlightSqlUtils.FLIGHT_SQL_BEGIN_SAVEPOINT.getType())) { - final ActionBeginSavepointRequest request = - FlightSqlUtils.unpackAndParseOrThrow(action.getBody(), ActionBeginSavepointRequest.class); - beginSavepoint(request, context, new ProtoListener<>(listener)); - } else if (actionType.equals(FlightSqlUtils.FLIGHT_SQL_BEGIN_TRANSACTION.getType())) { - final ActionBeginTransactionRequest request = - FlightSqlUtils.unpackAndParseOrThrow( - action.getBody(), ActionBeginTransactionRequest.class); - beginTransaction(request, context, new ProtoListener<>(listener)); - } else if (actionType.equals(FlightSqlUtils.FLIGHT_SQL_CANCEL_QUERY.getType())) { - //noinspection deprecation - final ActionCancelQueryRequest request = - FlightSqlUtils.unpackAndParseOrThrow(action.getBody(), ActionCancelQueryRequest.class); - final FlightInfo info; - try { - info = FlightInfo.deserialize(request.getInfo().asReadOnlyByteBuffer()); - } catch (IOException | URISyntaxException e) { - listener.onError( - CallStatus.INTERNAL - .withDescription("Could not unpack FlightInfo: " + e) - .withCause(e) - .toRuntimeException()); - return; - } - cancelQuery(info, context, new CancelListener(listener)); - } else if (actionType.equals(FlightSqlUtils.FLIGHT_SQL_CREATE_PREPARED_STATEMENT.getType())) { - final ActionCreatePreparedStatementRequest request = - FlightSqlUtils.unpackAndParseOrThrow( - action.getBody(), ActionCreatePreparedStatementRequest.class); - createPreparedStatement(request, context, listener); - } else if (actionType.equals( - FlightSqlUtils.FLIGHT_SQL_CREATE_PREPARED_SUBSTRAIT_PLAN.getType())) { - final ActionCreatePreparedSubstraitPlanRequest request = - FlightSqlUtils.unpackAndParseOrThrow( - action.getBody(), ActionCreatePreparedSubstraitPlanRequest.class); - createPreparedSubstraitPlan(request, context, new ProtoListener<>(listener)); - } else if (actionType.equals(FlightSqlUtils.FLIGHT_SQL_CLOSE_PREPARED_STATEMENT.getType())) { - final ActionClosePreparedStatementRequest request = - FlightSqlUtils.unpackAndParseOrThrow( - action.getBody(), ActionClosePreparedStatementRequest.class); - closePreparedStatement(request, context, new NoResultListener(listener)); - } else if (actionType.equals(FlightSqlUtils.FLIGHT_SQL_END_SAVEPOINT.getType())) { - ActionEndSavepointRequest request = - FlightSqlUtils.unpackAndParseOrThrow(action.getBody(), ActionEndSavepointRequest.class); - endSavepoint(request, context, new NoResultListener(listener)); - } else if (actionType.equals(FlightSqlUtils.FLIGHT_SQL_END_TRANSACTION.getType())) { - ActionEndTransactionRequest request = - FlightSqlUtils.unpackAndParseOrThrow(action.getBody(), ActionEndTransactionRequest.class); - endTransaction(request, context, new NoResultListener(listener)); - } else if (actionType.equals(FlightConstants.CANCEL_FLIGHT_INFO.getType())) { - final CancelFlightInfoRequest request; - try { - request = CancelFlightInfoRequest.deserialize(ByteBuffer.wrap(action.getBody())); - } catch (IOException | URISyntaxException e) { - listener.onError( - CallStatus.INTERNAL - .withDescription("Could not unpack FlightInfo: " + e) - .withCause(e) - .toRuntimeException()); - return; - } - cancelFlightInfo(request, context, new CancelStatusListener(listener)); - } else if (actionType.equals(FlightConstants.RENEW_FLIGHT_ENDPOINT.getType())) { - final RenewFlightEndpointRequest request; - try { - request = RenewFlightEndpointRequest.deserialize(ByteBuffer.wrap(action.getBody())); - } catch (IOException | URISyntaxException e) { - listener.onError( - CallStatus.INTERNAL - .withDescription("Could not unpack FlightInfo: " + e) - .withCause(e) - .toRuntimeException()); - return; - } - renewFlightEndpoint(request, context, new FlightEndpointListener(listener)); - } else if (actionType.equals(FlightConstants.SET_SESSION_OPTIONS.getType())) { - final SetSessionOptionsRequest request; - try { - request = SetSessionOptionsRequest.deserialize(ByteBuffer.wrap(action.getBody())); - } catch (IOException e) { - listener.onError( - CallStatus.INTERNAL - .withDescription("Could not unpack SetSessionOptionsRequest: " + e) - .withCause(e) - .toRuntimeException()); - return; - } - setSessionOptions(request, context, new SetSessionOptionsResultListener(listener)); - } else if (actionType.equals(FlightConstants.GET_SESSION_OPTIONS.getType())) { - final GetSessionOptionsRequest request; - try { - request = GetSessionOptionsRequest.deserialize(ByteBuffer.wrap(action.getBody())); - } catch (IOException e) { - listener.onError( - CallStatus.INTERNAL - .withDescription("Could not unpack GetSessionOptionsRequest: " + e) - .withCause(e) - .toRuntimeException()); - return; - } - getSessionOptions(request, context, new GetSessionOptionsResultListener(listener)); - } else if (actionType.equals(FlightConstants.CLOSE_SESSION.getType())) { - final CloseSessionRequest request; - try { - request = CloseSessionRequest.deserialize(ByteBuffer.wrap(action.getBody())); - } catch (IOException e) { - listener.onError( - CallStatus.INTERNAL - .withDescription("Could not unpack CloseSessionRequest: " + e) - .withCause(e) - .toRuntimeException()); - return; - } - closeSession(request, context, new CloseSessionResultListener(listener)); - } else { - throw CallStatus.INVALID_ARGUMENT - .withDescription("Unrecognized request: " + action.getType()) - .toRuntimeException(); - } - } - - /** - * Create a savepoint within a transaction. - * - * @param request The savepoint request. - * @param context Per-call context. - * @param listener The newly created savepoint ID. - */ - default void beginSavepoint( - ActionBeginSavepointRequest request, - CallContext context, - StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Begin a transaction. - * - * @param request The transaction request. - * @param context Per-call context. - * @param listener The newly created transaction ID. - */ - default void beginTransaction( - ActionBeginTransactionRequest request, - CallContext context, - StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Explicitly cancel a query. - * - * @param request The CancelFlightInfoRequest for the query to cancel. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - default void cancelFlightInfo( - CancelFlightInfoRequest request, CallContext context, StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Explicitly cancel a query. - * - * @param info The FlightInfo of the query to cancel. - * @param context Per-call context. - * @param listener Whether cancellation succeeded. - * @deprecated Prefer {@link #cancelFlightInfo(CancelFlightInfoRequest, CallContext, - * StreamListener)}. - */ - @Deprecated - default void cancelQuery( - FlightInfo info, CallContext context, StreamListener listener) { - CancelFlightInfoRequest request = new CancelFlightInfoRequest(info); - cancelFlightInfo( - request, - context, - new StreamListener() { - @Override - public void onNext(CancelStatus val) { - switch (val) { - case UNSPECIFIED: - listener.onNext(CancelResult.UNSPECIFIED); - break; - case CANCELLED: - listener.onNext(CancelResult.CANCELLED); - break; - case CANCELLING: - listener.onNext(CancelResult.CANCELLING); - break; - case NOT_CANCELLABLE: - listener.onNext(CancelResult.NOT_CANCELLABLE); - break; - default: - // XXX: CheckStyle requires a default clause which arguably makes the code worse. - throw new AssertionError("Unknown enum variant " + val); - } - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } - }); - } - - /** - * Set server session options(s). - * - * @param request The session options to set. For *DBC driver compatibility, servers should - * support converting values from strings. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - default void setSessionOptions( - SetSessionOptionsRequest request, - CallContext context, - StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Get server session option(s). - * - * @param request The (empty) GetSessionOptionsRequest. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - default void getSessionOptions( - GetSessionOptionsRequest request, - CallContext context, - StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Close/invalidate the session. - * - * @param request The (empty) CloseSessionRequest. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - default void closeSession( - CloseSessionRequest request, - CallContext context, - StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Creates a prepared statement on the server and returns a handle and metadata for in a {@link - * ActionCreatePreparedStatementResult} object in a {@link Result} object. - * - * @param request The sql command to generate the prepared statement. - * @param context Per-call context. - * @param listener A stream of responses. - */ - void createPreparedStatement( - ActionCreatePreparedStatementRequest request, - CallContext context, - StreamListener listener); - - /** - * Pre-compile a Substrait plan. - * - * @param request The plan. - * @param context Per-call context. - * @param listener The resulting prepared statement. - */ - default void createPreparedSubstraitPlan( - ActionCreatePreparedSubstraitPlanRequest request, - CallContext context, - StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Closes a prepared statement on the server. No result is expected. - * - * @param request The sql command to generate the prepared statement. - * @param context Per-call context. - * @param listener A stream of responses. - */ - void closePreparedStatement( - ActionClosePreparedStatementRequest request, - CallContext context, - StreamListener listener); - - /** - * Release or roll back to a savepoint. - * - * @param request The savepoint, and whether to release/rollback. - * @param context Per-call context. - * @param listener Call {@link StreamListener#onCompleted()} or {@link - * StreamListener#onError(Throwable)} when done; do not send a result. - */ - default void endSavepoint( - ActionEndSavepointRequest request, CallContext context, StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Commit or roll back to a transaction. - * - * @param request The transaction, and whether to release/rollback. - * @param context Per-call context. - * @param listener Call {@link StreamListener#onCompleted()} or {@link - * StreamListener#onError(Throwable)} when done; do not send a result. - */ - default void endTransaction( - ActionEndTransactionRequest request, CallContext context, StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** - * Evaluate a SQL query. - * - * @param command The SQL query. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoStatement( - CommandStatementQuery command, CallContext context, FlightDescriptor descriptor); - - /** - * Evaluate a Substrait plan. - * - * @param command The Substrait plan. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - default FlightInfo getFlightInfoSubstraitPlan( - CommandStatementSubstraitPlan command, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.toRuntimeException(); - } - - /** - * Gets information about a particular prepared statement data stream. - * - * @param command The prepared statement to generate the data stream. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoPreparedStatement( - CommandPreparedStatementQuery command, CallContext context, FlightDescriptor descriptor); - - /** - * Get the result schema for a SQL query. - * - * @param command The SQL query. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return the schema of the result set. - */ - SchemaResult getSchemaStatement( - CommandStatementQuery command, CallContext context, FlightDescriptor descriptor); - - /** - * Get the schema of the result set of a prepared statement. - * - * @param command The prepared statement handle. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return the schema of the result set. - */ - default SchemaResult getSchemaPreparedStatement( - CommandPreparedStatementQuery command, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED - .withDescription("GetSchema with CommandPreparedStatementQuery is not implemented") - .toRuntimeException(); - } - - /** - * Get the result schema for a Substrait plan. - * - * @param command The Substrait plan. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Schema for the stream. - */ - default SchemaResult getSchemaSubstraitPlan( - CommandStatementSubstraitPlan command, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.toRuntimeException(); - } - - /** - * Returns data for a SQL query based data stream. - * - * @param ticket Ticket message containing the statement handle. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamStatement( - TicketStatementQuery ticket, CallContext context, ServerStreamListener listener); - - /** - * Returns data for a particular prepared statement query instance. - * - * @param command The prepared statement to generate the data stream. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamPreparedStatement( - CommandPreparedStatementQuery command, CallContext context, ServerStreamListener listener); - - /** - * Accepts uploaded data for a particular SQL query based data stream. - * - *

    `PutResult`s must be in the form of a {@link DoPutUpdateResult}. - * - * @param command The sql command to generate the data stream. - * @param context Per-call context. - * @param flightStream The data stream being uploaded. - * @param ackStream The result data stream. - * @return A runnable to process the stream. - */ - Runnable acceptPutStatement( - CommandStatementUpdate command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream); - - /** - * Accepts uploaded data for a particular bulk ingest data stream. - * - *

    `PutResult`s must be in the form of a {@link DoPutUpdateResult}. - * - * @param command The bulk ingestion request. - * @param context Per-call context. - * @param flightStream The data stream being uploaded. - * @param ackStream The result data stream. - * @return A runnable to process the stream. - */ - default Runnable acceptPutStatementBulkIngest( - CommandStatementIngest command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - return () -> { - ackStream.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - }; - } - - /** - * Handle a Substrait plan with uploaded data. - * - * @param command The Substrait plan to evaluate. - * @param context Per-call context. - * @param flightStream The data stream being uploaded. - * @param ackStream The result data stream. - * @return A runnable to process the stream. - */ - default Runnable acceptPutSubstraitPlan( - CommandStatementSubstraitPlan command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - return () -> { - ackStream.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - }; - } - - /** - * Accepts uploaded data for a particular prepared statement data stream. - * - *

    `PutResult`s must be in the form of a {@link DoPutUpdateResult}. - * - * @param command The prepared statement to generate the data stream. - * @param context Per-call context. - * @param flightStream The data stream being uploaded. - * @param ackStream The result data stream. - * @return A runnable to process the stream. - */ - Runnable acceptPutPreparedStatementUpdate( - CommandPreparedStatementUpdate command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream); - - /** - * Accepts uploaded parameter values for a particular prepared statement query. - * - * @param command The prepared statement the parameter values will bind to. - * @param context Per-call context. - * @param flightStream The data stream being uploaded. - * @param ackStream The result data stream. - * @return A runnable to process the stream. - */ - Runnable acceptPutPreparedStatementQuery( - CommandPreparedStatementQuery command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream); - - /** - * Returns the SQL Info of the server by returning a {@link CommandGetSqlInfo} in a {@link - * Result}. - * - * @param request request filter parameters. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoSqlInfo( - CommandGetSqlInfo request, CallContext context, FlightDescriptor descriptor); - - /** - * Returns data for SQL info based data stream. - * - * @param command The command to generate the data stream. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamSqlInfo( - CommandGetSqlInfo command, CallContext context, ServerStreamListener listener); - - /** - * Returns a description of all the data types supported by source. - * - * @param request request filter parameters. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoTypeInfo( - CommandGetXdbcTypeInfo request, CallContext context, FlightDescriptor descriptor); - - /** - * Returns data for type info based data stream. - * - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamTypeInfo( - CommandGetXdbcTypeInfo request, CallContext context, ServerStreamListener listener); - - /** - * Returns the available catalogs by returning a stream of {@link CommandGetCatalogs} objects in - * {@link Result} objects. - * - * @param request request filter parameters. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoCatalogs( - CommandGetCatalogs request, CallContext context, FlightDescriptor descriptor); - - /** - * Returns data for catalogs based data stream. - * - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamCatalogs(CallContext context, ServerStreamListener listener); - - /** - * Returns the available schemas by returning a stream of {@link CommandGetDbSchemas} objects in - * {@link Result} objects. - * - * @param request request filter parameters. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoSchemas( - CommandGetDbSchemas request, CallContext context, FlightDescriptor descriptor); - - /** - * Returns data for schemas based data stream. - * - * @param command The command to generate the data stream. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamSchemas( - CommandGetDbSchemas command, CallContext context, ServerStreamListener listener); - - /** - * Returns the available tables by returning a stream of {@link CommandGetTables} objects in - * {@link Result} objects. - * - * @param request request filter parameters. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoTables( - CommandGetTables request, CallContext context, FlightDescriptor descriptor); - - /** - * Returns data for tables based data stream. - * - * @param command The command to generate the data stream. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamTables( - CommandGetTables command, CallContext context, ServerStreamListener listener); - - /** - * Returns the available table types by returning a stream of {@link CommandGetTableTypes} objects - * in {@link Result} objects. - * - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoTableTypes( - CommandGetTableTypes request, CallContext context, FlightDescriptor descriptor); - - /** - * Returns data for table types based data stream. - * - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamTableTypes(CallContext context, ServerStreamListener listener); - - /** - * Returns the available primary keys by returning a stream of {@link CommandGetPrimaryKeys} - * objects in {@link Result} objects. - * - * @param request request filter parameters. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoPrimaryKeys( - CommandGetPrimaryKeys request, CallContext context, FlightDescriptor descriptor); - - /** - * Returns data for primary keys based data stream. - * - * @param command The command to generate the data stream. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamPrimaryKeys( - CommandGetPrimaryKeys command, CallContext context, ServerStreamListener listener); - - /** - * Retrieves a description of the foreign key columns that reference the given table's primary key - * columns {@link CommandGetExportedKeys} objects in {@link Result} objects. - * - * @param request request filter parameters. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoExportedKeys( - CommandGetExportedKeys request, CallContext context, FlightDescriptor descriptor); - - /** - * Retrieves a description of the primary key columns that are referenced by given table's foreign - * key columns {@link CommandGetImportedKeys} objects in {@link Result} objects. - * - * @param request request filter parameters. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoImportedKeys( - CommandGetImportedKeys request, CallContext context, FlightDescriptor descriptor); - - /** - * Retrieve a description of the foreign key columns that reference the given table's primary key - * columns {@link CommandGetCrossReference} objects in {@link Result} objects. - * - * @param request request filter parameters. - * @param context Per-call context. - * @param descriptor The descriptor identifying the data stream. - * @return Metadata about the stream. - */ - FlightInfo getFlightInfoCrossReference( - CommandGetCrossReference request, CallContext context, FlightDescriptor descriptor); - - /** - * Returns data for foreign keys based data stream. - * - * @param command The command to generate the data stream. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamExportedKeys( - CommandGetExportedKeys command, CallContext context, ServerStreamListener listener); - - /** - * Returns data for foreign keys based data stream. - * - * @param command The command to generate the data stream. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamImportedKeys( - CommandGetImportedKeys command, CallContext context, ServerStreamListener listener); - - /** - * Returns data for cross reference based data stream. - * - * @param command The command to generate the data stream. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - void getStreamCrossReference( - CommandGetCrossReference command, CallContext context, ServerStreamListener listener); - - /** - * Renew the duration of the given endpoint. - * - * @param request The endpoint to renew. - * @param context Per-call context. - * @param listener An interface for sending data back to the client. - */ - default void renewFlightEndpoint( - RenewFlightEndpointRequest request, - CallContext context, - StreamListener listener) { - listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException()); - } - - /** Default schema templates for the {@link FlightSqlProducer}. */ - final class Schemas { - public static final Schema GET_TABLES_SCHEMA = - new Schema( - asList( - Field.nullable("catalog_name", VARCHAR.getType()), - Field.nullable("db_schema_name", VARCHAR.getType()), - Field.notNullable("table_name", VARCHAR.getType()), - Field.notNullable("table_type", VARCHAR.getType()), - Field.notNullable("table_schema", MinorType.VARBINARY.getType()))); - public static final Schema GET_TABLES_SCHEMA_NO_SCHEMA = - new Schema( - asList( - Field.nullable("catalog_name", VARCHAR.getType()), - Field.nullable("db_schema_name", VARCHAR.getType()), - Field.notNullable("table_name", VARCHAR.getType()), - Field.notNullable("table_type", VARCHAR.getType()))); - public static final Schema GET_CATALOGS_SCHEMA = - new Schema(singletonList(Field.notNullable("catalog_name", VARCHAR.getType()))); - public static final Schema GET_TABLE_TYPES_SCHEMA = - new Schema(singletonList(Field.notNullable("table_type", VARCHAR.getType()))); - public static final Schema GET_SCHEMAS_SCHEMA = - new Schema( - asList( - Field.nullable("catalog_name", VARCHAR.getType()), - Field.notNullable("db_schema_name", VARCHAR.getType()))); - private static final Schema GET_IMPORTED_EXPORTED_AND_CROSS_REFERENCE_KEYS_SCHEMA = - new Schema( - asList( - Field.nullable("pk_catalog_name", VARCHAR.getType()), - Field.nullable("pk_db_schema_name", VARCHAR.getType()), - Field.notNullable("pk_table_name", VARCHAR.getType()), - Field.notNullable("pk_column_name", VARCHAR.getType()), - Field.nullable("fk_catalog_name", VARCHAR.getType()), - Field.nullable("fk_db_schema_name", VARCHAR.getType()), - Field.notNullable("fk_table_name", VARCHAR.getType()), - Field.notNullable("fk_column_name", VARCHAR.getType()), - Field.notNullable("key_sequence", INT.getType()), - Field.nullable("fk_key_name", VARCHAR.getType()), - Field.nullable("pk_key_name", VARCHAR.getType()), - Field.notNullable("update_rule", MinorType.UINT1.getType()), - Field.notNullable("delete_rule", MinorType.UINT1.getType()))); - public static final Schema GET_IMPORTED_KEYS_SCHEMA = - GET_IMPORTED_EXPORTED_AND_CROSS_REFERENCE_KEYS_SCHEMA; - public static final Schema GET_EXPORTED_KEYS_SCHEMA = - GET_IMPORTED_EXPORTED_AND_CROSS_REFERENCE_KEYS_SCHEMA; - public static final Schema GET_CROSS_REFERENCE_SCHEMA = - GET_IMPORTED_EXPORTED_AND_CROSS_REFERENCE_KEYS_SCHEMA; - private static final List GET_SQL_INFO_DENSE_UNION_SCHEMA_FIELDS = - asList( - Field.notNullable("string_value", VARCHAR.getType()), - Field.notNullable("bool_value", BIT.getType()), - Field.notNullable("bigint_value", BIGINT.getType()), - Field.notNullable("int32_bitmask", INT.getType()), - new Field( - "string_list", - FieldType.notNullable(LIST.getType()), - singletonList(Field.nullable("item", VARCHAR.getType()))), - new Field( - "int32_to_int32_list_map", - FieldType.notNullable(new ArrowType.Map(false)), - singletonList( - new Field( - DATA_VECTOR_NAME, - new FieldType(false, STRUCT.getType(), null), - ImmutableList.of( - Field.notNullable(KEY_NAME, INT.getType()), - new Field( - VALUE_NAME, - FieldType.nullable(LIST.getType()), - singletonList(Field.nullable("item", INT.getType())))))))); - public static final Schema GET_SQL_INFO_SCHEMA = - new Schema( - asList( - Field.notNullable("info_name", UINT4.getType()), - new Field( - "value", - FieldType.notNullable( - new Union( - UnionMode.Dense, - range(0, GET_SQL_INFO_DENSE_UNION_SCHEMA_FIELDS.size()).toArray())), - GET_SQL_INFO_DENSE_UNION_SCHEMA_FIELDS))); - public static final Schema GET_TYPE_INFO_SCHEMA = - new Schema( - asList( - Field.notNullable("type_name", VARCHAR.getType()), - Field.notNullable("data_type", INT.getType()), - Field.nullable("column_size", INT.getType()), - Field.nullable("literal_prefix", VARCHAR.getType()), - Field.nullable("literal_suffix", VARCHAR.getType()), - new Field( - "create_params", - FieldType.nullable(LIST.getType()), - singletonList(Field.notNullable("item", VARCHAR.getType()))), - Field.notNullable("nullable", INT.getType()), - Field.notNullable("case_sensitive", BIT.getType()), - Field.notNullable("searchable", INT.getType()), - Field.nullable("unsigned_attribute", BIT.getType()), - Field.notNullable("fixed_prec_scale", BIT.getType()), - Field.nullable("auto_increment", BIT.getType()), - Field.nullable("local_type_name", VARCHAR.getType()), - Field.nullable("minimum_scale", INT.getType()), - Field.nullable("maximum_scale", INT.getType()), - Field.notNullable("sql_data_type", INT.getType()), - Field.nullable("datetime_subcode", INT.getType()), - Field.nullable("num_prec_radix", INT.getType()), - Field.nullable("interval_precision", INT.getType()))); - public static final Schema GET_PRIMARY_KEYS_SCHEMA = - new Schema( - asList( - Field.nullable("catalog_name", VARCHAR.getType()), - Field.nullable("db_schema_name", VARCHAR.getType()), - Field.notNullable("table_name", VARCHAR.getType()), - Field.notNullable("column_name", VARCHAR.getType()), - Field.notNullable("key_sequence", INT.getType()), - Field.nullable("key_name", VARCHAR.getType()))); - - private Schemas() { - // Prevent instantiation. - } - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java deleted file mode 100644 index 9e13e57d66c65..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import com.google.common.collect.ImmutableList; -import com.google.protobuf.Any; -import com.google.protobuf.InvalidProtocolBufferException; -import com.google.protobuf.Message; -import java.util.List; -import org.apache.arrow.flight.ActionType; -import org.apache.arrow.flight.CallStatus; - -/** Utilities to work with Flight SQL semantics. */ -public final class FlightSqlUtils { - - public static final ActionType FLIGHT_SQL_BEGIN_SAVEPOINT = - new ActionType( - "BeginSavepoint", - "Create a new savepoint.\n" - + "Request Message: ActionBeginSavepointRequest\n" - + "Response Message: ActionBeginSavepointResult"); - - public static final ActionType FLIGHT_SQL_BEGIN_TRANSACTION = - new ActionType( - "BeginTransaction", - "Start a new transaction.\n" - + "Request Message: ActionBeginTransactionRequest\n" - + "Response Message: ActionBeginTransactionResult"); - public static final ActionType FLIGHT_SQL_CREATE_PREPARED_STATEMENT = - new ActionType( - "CreatePreparedStatement", - "Creates a reusable prepared statement resource on the server. \n" - + "Request Message: ActionCreatePreparedStatementRequest\n" - + "Response Message: ActionCreatePreparedStatementResult"); - - public static final ActionType FLIGHT_SQL_CLOSE_PREPARED_STATEMENT = - new ActionType( - "ClosePreparedStatement", - "Closes a reusable prepared statement resource on the server. \n" - + "Request Message: ActionClosePreparedStatementRequest\n" - + "Response Message: N/A"); - - public static final ActionType FLIGHT_SQL_CREATE_PREPARED_SUBSTRAIT_PLAN = - new ActionType( - "CreatePreparedSubstraitPlan", - "Creates a reusable prepared statement resource on the server.\n" - + "Request Message: ActionCreatePreparedSubstraitPlanRequest\n" - + "Response Message: ActionCreatePreparedStatementResult"); - - public static final ActionType FLIGHT_SQL_CANCEL_QUERY = - new ActionType( - "CancelQuery", - "Explicitly cancel a running query.\n" - + "Request Message: ActionCancelQueryRequest\n" - + "Response Message: ActionCancelQueryResult"); - - public static final ActionType FLIGHT_SQL_END_SAVEPOINT = - new ActionType( - "EndSavepoint", - "End a savepoint.\n" - + "Request Message: ActionEndSavepointRequest\n" - + "Response Message: N/A"); - public static final ActionType FLIGHT_SQL_END_TRANSACTION = - new ActionType( - "EndTransaction", - "End a transaction.\n" - + "Request Message: ActionEndTransactionRequest\n" - + "Response Message: N/A"); - - public static final List FLIGHT_SQL_ACTIONS = - ImmutableList.of( - FLIGHT_SQL_BEGIN_SAVEPOINT, - FLIGHT_SQL_BEGIN_TRANSACTION, - FLIGHT_SQL_CREATE_PREPARED_STATEMENT, - FLIGHT_SQL_CLOSE_PREPARED_STATEMENT, - FLIGHT_SQL_CREATE_PREPARED_SUBSTRAIT_PLAN, - FLIGHT_SQL_CANCEL_QUERY, - FLIGHT_SQL_END_SAVEPOINT, - FLIGHT_SQL_END_TRANSACTION); - - /** - * Helper to parse {@link com.google.protobuf.Any} objects to the specific protobuf object. - * - * @param source the raw bytes source value. - * @return the materialized protobuf object. - */ - public static Any parseOrThrow(byte[] source) { - try { - return Any.parseFrom(source); - } catch (final InvalidProtocolBufferException e) { - throw CallStatus.INVALID_ARGUMENT - .withDescription("Received invalid message from remote.") - .withCause(e) - .toRuntimeException(); - } - } - - /** - * Helper to unpack {@link com.google.protobuf.Any} objects to the specific protobuf object. - * - * @param source the parsed Source value. - * @param as the class to unpack as. - * @param the class to unpack as. - * @return the materialized protobuf object. - */ - public static T unpackOrThrow(Any source, Class as) { - try { - return source.unpack(as); - } catch (final InvalidProtocolBufferException e) { - throw CallStatus.INVALID_ARGUMENT - .withDescription("Provided message cannot be unpacked as " + as.getName() + ": " + e) - .withCause(e) - .toRuntimeException(); - } - } - - /** - * Helper to parse and unpack {@link com.google.protobuf.Any} objects to the specific protobuf - * object. - * - * @param source the raw bytes source value. - * @param as the class to unpack as. - * @param the class to unpack as. - * @return the materialized protobuf object. - */ - public static T unpackAndParseOrThrow(byte[] source, Class as) { - return unpackOrThrow(parseOrThrow(source), as); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/GetSessionOptionsResultListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/GetSessionOptionsResultListener.java deleted file mode 100644 index 8590eda35b48c..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/GetSessionOptionsResultListener.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.GetSessionOptionsResult; -import org.apache.arrow.flight.Result; - -/** Typed StreamListener for getSessionOptions. */ -public class GetSessionOptionsResultListener - implements FlightProducer.StreamListener { - private final FlightProducer.StreamListener listener; - - GetSessionOptionsResultListener(FlightProducer.StreamListener listener) { - this.listener = listener; - } - - @Override - public void onNext(GetSessionOptionsResult val) { - listener.onNext(new Result(val.serialize().array())); - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java deleted file mode 100644 index 72fcae8c18003..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.SchemaResult; -import org.apache.arrow.flight.sql.impl.FlightSql; - -/** A {@link FlightSqlProducer} that throws on all FlightSql-specific operations. */ -public class NoOpFlightSqlProducer implements FlightSqlProducer { - @Override - public void createPreparedStatement( - FlightSql.ActionCreatePreparedStatementRequest request, - CallContext context, - StreamListener listener) { - listener.onError( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public void closePreparedStatement( - FlightSql.ActionClosePreparedStatementRequest request, - CallContext context, - StreamListener listener) { - listener.onError( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfoStatement( - FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public FlightInfo getFlightInfoPreparedStatement( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public SchemaResult getSchemaStatement( - FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamStatement( - FlightSql.TicketStatementQuery ticket, CallContext context, ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public void getStreamPreparedStatement( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public Runnable acceptPutStatement( - FlightSql.CommandStatementUpdate command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public Runnable acceptPutStatementBulkIngest( - FlightSql.CommandStatementIngest command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - return () -> { - ackStream.onError( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - }; - } - - @Override - public Runnable acceptPutPreparedStatementUpdate( - FlightSql.CommandPreparedStatementUpdate command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public Runnable acceptPutPreparedStatementQuery( - FlightSql.CommandPreparedStatementQuery command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public FlightInfo getFlightInfoSqlInfo( - FlightSql.CommandGetSqlInfo request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamSqlInfo( - FlightSql.CommandGetSqlInfo command, CallContext context, ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfoTypeInfo( - FlightSql.CommandGetXdbcTypeInfo request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamTypeInfo( - FlightSql.CommandGetXdbcTypeInfo request, - CallContext context, - ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfoCatalogs( - FlightSql.CommandGetCatalogs request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamCatalogs(CallContext context, ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfoSchemas( - FlightSql.CommandGetDbSchemas request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamSchemas( - FlightSql.CommandGetDbSchemas command, CallContext context, ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfoTables( - FlightSql.CommandGetTables request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamTables( - FlightSql.CommandGetTables command, CallContext context, ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfoTableTypes( - FlightSql.CommandGetTableTypes request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamTableTypes(CallContext context, ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfoPrimaryKeys( - FlightSql.CommandGetPrimaryKeys request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamPrimaryKeys( - FlightSql.CommandGetPrimaryKeys command, CallContext context, ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public FlightInfo getFlightInfoExportedKeys( - FlightSql.CommandGetExportedKeys request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public FlightInfo getFlightInfoImportedKeys( - FlightSql.CommandGetImportedKeys request, CallContext context, FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public FlightInfo getFlightInfoCrossReference( - FlightSql.CommandGetCrossReference request, - CallContext context, - FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public void getStreamExportedKeys( - FlightSql.CommandGetExportedKeys command, - CallContext context, - ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public void getStreamImportedKeys( - FlightSql.CommandGetImportedKeys command, - CallContext context, - ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public void getStreamCrossReference( - FlightSql.CommandGetCrossReference command, - CallContext context, - ServerStreamListener listener) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - @Override - public void close() throws Exception {} - - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) { - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoResultListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoResultListener.java deleted file mode 100644 index cda9589db010d..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoResultListener.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.Result; - -/** A StreamListener for actions that do not return results. */ -class NoResultListener implements FlightProducer.StreamListener { - private final FlightProducer.StreamListener listener; - - NoResultListener(FlightProducer.StreamListener listener) { - this.listener = listener; - } - - @Override - public void onNext(Result val) { - throw new UnsupportedOperationException("Do not call onNext on this listener."); - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/ProtoListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/ProtoListener.java deleted file mode 100644 index e1d9a57335b04..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/ProtoListener.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import com.google.protobuf.Any; -import com.google.protobuf.Message; -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.Result; - -/** - * A StreamListener that accepts a particular type. - * - * @param The message type to accept. - */ -class ProtoListener implements FlightProducer.StreamListener { - private final FlightProducer.StreamListener listener; - - ProtoListener(FlightProducer.StreamListener listener) { - this.listener = listener; - } - - @Override - public void onNext(T val) { - listener.onNext(new Result(Any.pack(val).toByteArray())); - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SetSessionOptionsResultListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SetSessionOptionsResultListener.java deleted file mode 100644 index f7c4539795bf2..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SetSessionOptionsResultListener.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import org.apache.arrow.flight.FlightProducer; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.SetSessionOptionsResult; - -/** Typed StreamListener for setSessionOptions. */ -public class SetSessionOptionsResultListener - implements FlightProducer.StreamListener { - private final FlightProducer.StreamListener listener; - - SetSessionOptionsResultListener(FlightProducer.StreamListener listener) { - this.listener = listener; - } - - @Override - public void onNext(SetSessionOptionsResult val) { - listener.onNext(new Result(val.serialize().array())); - } - - @Override - public void onError(Throwable t) { - listener.onError(t); - } - - @Override - public void onCompleted() { - listener.onCompleted(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java deleted file mode 100644 index cbe4989d14744..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java +++ /dev/null @@ -1,1119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql; - -import static java.util.stream.IntStream.range; -import static org.apache.arrow.flight.FlightProducer.ServerStreamListener; -import static org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedTransaction; -import static org.apache.arrow.flight.sql.util.SqlInfoOptionsUtils.createBitmaskFromEnums; - -import com.google.protobuf.ProtocolMessageEnum; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Consumer; -import java.util.function.ObjIntConsumer; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlInfo; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlNullOrdering; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlOuterJoinsSupportLevel; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedElementActions; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedGroupBy; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedPositionedCommands; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedResultSetType; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedSubqueries; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedUnions; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlTransactionIsolationLevel; -import org.apache.arrow.flight.sql.impl.FlightSql.SupportedAnsi92SqlGrammarLevel; -import org.apache.arrow.flight.sql.impl.FlightSql.SupportedSqlGrammar; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableBitHolder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableVarCharHolder; - -/** - * Auxiliary class meant to facilitate the implementation of {@link - * FlightSqlProducer#getStreamSqlInfo}. - * - *

    Usage requires the user to add the required SqlInfo values using the {@code with*} methods - * like {@link SqlInfoBuilder#withFlightSqlServerName(String)}, and request it back through the - * {@link SqlInfoBuilder#send(List, ServerStreamListener)} method. - */ -@SuppressWarnings({"unused"}) -public class SqlInfoBuilder { - private final Map> providers = new HashMap<>(); - - /** - * Gets a {@link NullableVarCharHolder} from the provided {@code string} using the provided {@code - * buf}. - * - * @param string the {@link StandardCharsets#UTF_8}-encoded text input to store onto the holder. - * @param buf the {@link ArrowBuf} from which to create the new holder. - * @return a new {@link NullableVarCharHolder} with the provided input data {@code string}. - */ - public static NullableVarCharHolder getHolderForUtf8(final String string, final ArrowBuf buf) { - final byte[] bytes = string.getBytes(StandardCharsets.UTF_8); - buf.setBytes(0, bytes); - final NullableVarCharHolder holder = new NullableVarCharHolder(); - holder.buffer = buf; - holder.end = bytes.length; - holder.isSet = 1; - return holder; - } - - /** - * Sets a value for {@link SqlInfo#FLIGHT_SQL_SERVER_NAME} in the builder. - * - * @param value the value for {@link SqlInfo#FLIGHT_SQL_SERVER_NAME} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withFlightSqlServerName(final String value) { - return withStringProvider(SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#FLIGHT_SQL_SERVER_VERSION} in the builder. - * - * @param value the value for {@link SqlInfo#FLIGHT_SQL_SERVER_VERSION} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withFlightSqlServerVersion(final String value) { - return withStringProvider(SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#FLIGHT_SQL_SERVER_ARROW_VERSION} in the builder. - * - * @param value the value for {@link SqlInfo#FLIGHT_SQL_SERVER_ARROW_VERSION} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withFlightSqlServerArrowVersion(final String value) { - return withStringProvider(SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE, value); - } - - /** Set a value for SQL support. */ - public SqlInfoBuilder withFlightSqlServerSql(boolean value) { - return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_SQL_VALUE, value); - } - - /** Set a value for Substrait support. */ - public SqlInfoBuilder withFlightSqlServerSubstrait(boolean value) { - return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_VALUE, value); - } - - /** Set a value for Substrait minimum version support. */ - public SqlInfoBuilder withFlightSqlServerSubstraitMinVersion(String value) { - return withStringProvider(SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION_VALUE, value); - } - - /** Set a value for Substrait maximum version support. */ - public SqlInfoBuilder withFlightSqlServerSubstraitMaxVersion(String value) { - return withStringProvider(SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION_VALUE, value); - } - - /** Set a value for transaction support. */ - public SqlInfoBuilder withFlightSqlServerTransaction(SqlSupportedTransaction value) { - return withIntProvider(SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_VALUE, value.getNumber()); - } - - /** Set a value for query cancellation support. */ - public SqlInfoBuilder withFlightSqlServerCancel(boolean value) { - return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE, value); - } - - /** Set a value for bulk ingestion support. */ - public SqlInfoBuilder withFlightSqlServerBulkIngestion(boolean value) { - return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION_VALUE, value); - } - - /** Set a value for transaction support for bulk ingestion. */ - public SqlInfoBuilder withFlightSqlServerBulkIngestionTransaction(boolean value) { - return withBooleanProvider( - SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED_VALUE, value); - } - - /** Set a value for statement timeouts. */ - public SqlInfoBuilder withFlightSqlServerStatementTimeout(int value) { - return withIntProvider(SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE, value); - } - - /** Set a value for transaction timeouts. */ - public SqlInfoBuilder withFlightSqlServerTransactionTimeout(int value) { - return withIntProvider(SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_IDENTIFIER_QUOTE_CHAR} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_IDENTIFIER_QUOTE_CHAR} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlIdentifierQuoteChar(final String value) { - return withStringProvider(SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SEARCH_STRING_ESCAPE} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SEARCH_STRING_ESCAPE} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSearchStringEscape(final String value) { - return withStringProvider(SqlInfo.SQL_SEARCH_STRING_ESCAPE_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_EXTRA_NAME_CHARACTERS} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_EXTRA_NAME_CHARACTERS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlExtraNameCharacters(final String value) { - return withStringProvider(SqlInfo.SQL_EXTRA_NAME_CHARACTERS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SCHEMA_TERM} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SCHEMA_TERM} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSchemaTerm(final String value) { - return withStringProvider(SqlInfo.SQL_SCHEMA_TERM_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_CATALOG_TERM} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_CATALOG_TERM} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlCatalogTerm(final String value) { - return withStringProvider(SqlInfo.SQL_CATALOG_TERM_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_PROCEDURE_TERM} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_PROCEDURE_TERM} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlProcedureTerm(final String value) { - return withStringProvider(SqlInfo.SQL_PROCEDURE_TERM_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_DDL_CATALOG} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_DDL_CATALOG} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlDdlCatalog(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_DDL_CATALOG_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_DDL_SCHEMA} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_DDL_SCHEMA} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlDdlSchema(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_DDL_SCHEMA_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_DDL_TABLE} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_DDL_TABLE} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlDdlTable(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_DDL_TABLE_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#FLIGHT_SQL_SERVER_READ_ONLY} in the builder. - * - * @param value the value for {@link SqlInfo#FLIGHT_SQL_SERVER_READ_ONLY} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withFlightSqlServerReadOnly(final boolean value) { - return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_COLUMN_ALIASING} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SUPPORTS_COLUMN_ALIASING} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsColumnAliasing(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SUPPORTS_COLUMN_ALIASING_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_NULL_PLUS_NULL_IS_NULL} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_NULL_PLUS_NULL_IS_NULL} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlNullPlusNullIsNull(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_NULL_PLUS_NULL_IS_NULL_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_TABLE_CORRELATION_NAMES} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SUPPORTS_TABLE_CORRELATION_NAMES} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsTableCorrelationNames(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SUPPORTS_TABLE_CORRELATION_NAMES_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES} to be - * set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsDifferentTableCorrelationNames(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsExpressionsInOrderBy(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_ORDER_BY_UNRELATED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SUPPORTS_ORDER_BY_UNRELATED} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsOrderByUnrelated(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SUPPORTS_ORDER_BY_UNRELATED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsLikeEscapeClause(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_NON_NULLABLE_COLUMNS} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SUPPORTS_NON_NULLABLE_COLUMNS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsNonNullableColumns(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SUPPORTS_NON_NULLABLE_COLUMNS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY} to be - * set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsIntegrityEnhancementFacility(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_CATALOG_AT_START} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_CATALOG_AT_START} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlCatalogAtStart(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_CATALOG_AT_START_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SELECT_FOR_UPDATE_SUPPORTED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SELECT_FOR_UPDATE_SUPPORTED} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSelectForUpdateSupported(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SELECT_FOR_UPDATE_SUPPORTED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_STORED_PROCEDURES_SUPPORTED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_STORED_PROCEDURES_SUPPORTED} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlStoredProceduresSupported(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_STORED_PROCEDURES_SUPPORTED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_CORRELATED_SUBQUERIES_SUPPORTED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_CORRELATED_SUBQUERIES_SUPPORTED} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlCorrelatedSubqueriesSupported(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_CORRELATED_SUBQUERIES_SUPPORTED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_ROW_SIZE_INCLUDES_BLOBS} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_ROW_SIZE_INCLUDES_BLOBS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxRowSizeIncludesBlobs(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_MAX_ROW_SIZE_INCLUDES_BLOBS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_TRANSACTIONS_SUPPORTED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_TRANSACTIONS_SUPPORTED} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlTransactionsSupported(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_TRANSACTIONS_SUPPORTED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT} to be - * set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlDataDefinitionCausesTransactionCommit(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED} to be - * set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlDataDefinitionsInTransactionsIgnored(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_BATCH_UPDATES_SUPPORTED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_BATCH_UPDATES_SUPPORTED} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlBatchUpdatesSupported(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_BATCH_UPDATES_SUPPORTED_VALUE, value); - } - - /** - * Sets a value for { @link SqlInfo#SQL_SAVEPOINTS_SUPPORTED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_SAVEPOINTS_SUPPORTED} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSavepointsSupported(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_SAVEPOINTS_SUPPORTED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_NAMED_PARAMETERS_SUPPORTED} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_NAMED_PARAMETERS_SUPPORTED} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlNamedParametersSupported(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_NAMED_PARAMETERS_SUPPORTED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_LOCATORS_UPDATE_COPY} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_LOCATORS_UPDATE_COPY} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlLocatorsUpdateCopy(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_LOCATORS_UPDATE_COPY_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED} in the - * builder. - * - * @param value the value for {@link SqlInfo#SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED} to - * be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlStoredFunctionsUsingCallSyntaxSupported(final boolean value) { - return withBooleanProvider( - SqlInfo.SQL_STORED_FUNCTIONS_USING_CALL_SYNTAX_SUPPORTED_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_IDENTIFIER_CASE} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_IDENTIFIER_CASE} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlIdentifierCase(final SqlSupportedCaseSensitivity value) { - return withBitIntProvider(SqlInfo.SQL_IDENTIFIER_CASE_VALUE, value.getNumber()); - } - - /** - * Sets a value for {@link SqlInfo#SQL_QUOTED_IDENTIFIER_CASE} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_QUOTED_IDENTIFIER_CASE} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlQuotedIdentifierCase(final SqlSupportedCaseSensitivity value) { - return withBitIntProvider(SqlInfo.SQL_QUOTED_IDENTIFIER_CASE_VALUE, value.getNumber()); - } - - /** - * Sets a value for {@link SqlInfo#SQL_ALL_TABLES_ARE_SELECTABLE} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_ALL_TABLES_ARE_SELECTABLE} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlAllTablesAreSelectable(final boolean value) { - return withBooleanProvider(SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_NULL_ORDERING} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_NULL_ORDERING} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlNullOrdering(final SqlNullOrdering value) { - return withBitIntProvider(SqlInfo.SQL_NULL_ORDERING_VALUE, value.getNumber()); - } - - /** - * Sets a value SqlInf @link SqlInfo#SQL_MAX_BINARY_LITERAL_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_BINARY_LITERAL_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxBinaryLiteralLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_BINARY_LITERAL_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_CHAR_LITERAL_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_CHAR_LITERAL_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxCharLiteralLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_CHAR_LITERAL_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_COLUMN_NAME_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_COLUMN_NAME_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxColumnNameLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_COLUMN_NAME_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_GROUP_BY} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_GROUP_BY} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxColumnsInGroupBy(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_COLUMNS_IN_GROUP_BY_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_INDEX} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_INDEX} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxColumnsInIndex(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_COLUMNS_IN_INDEX_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_ORDER_BY} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_ORDER_BY} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxColumnsInOrderBy(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_COLUMNS_IN_ORDER_BY_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_SELECT} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_SELECT} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxColumnsInSelect(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_COLUMNS_IN_SELECT_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_TABLE} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_TABLE} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxColumnsInTable(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_COLUMNS_IN_TABLE_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_CONNECTIONS} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_CONNECTIONS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxConnections(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_CONNECTIONS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_CURSOR_NAME_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_CURSOR_NAME_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxCursorNameLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_CURSOR_NAME_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_INDEX_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_INDEX_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxIndexLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_INDEX_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_DB_SCHEMA_NAME_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_DB_SCHEMA_NAME_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlDbSchemaNameLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_DB_SCHEMA_NAME_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_PROCEDURE_NAME_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_PROCEDURE_NAME_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxProcedureNameLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_PROCEDURE_NAME_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_CATALOG_NAME_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_CATALOG_NAME_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxCatalogNameLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_CATALOG_NAME_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_ROW_SIZE} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_ROW_SIZE} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxRowSize(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_ROW_SIZE_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_STATEMENT_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_STATEMENT_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxStatementLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_STATEMENT_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_STATEMENTS} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_STATEMENTS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxStatements(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_STATEMENTS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_TABLE_NAME_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_TABLE_NAME_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxTableNameLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_TABLE_NAME_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_TABLES_IN_SELECT} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_TABLES_IN_SELECT} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxTablesInSelect(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_TABLES_IN_SELECT_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_MAX_USERNAME_LENGTH} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_MAX_USERNAME_LENGTH} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlMaxUsernameLength(final long value) { - return withBitIntProvider(SqlInfo.SQL_MAX_USERNAME_LENGTH_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_DEFAULT_TRANSACTION_ISOLATION} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_DEFAULT_TRANSACTION_ISOLATION} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlDefaultTransactionIsolation(final long value) { - return withBitIntProvider(SqlInfo.SQL_DEFAULT_TRANSACTION_ISOLATION_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTED_GROUP_BY} in the builder. - * - * @param values the value for {@link SqlInfo#SQL_SUPPORTED_GROUP_BY} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportedGroupBy(final SqlSupportedGroupBy... values) { - return withEnumProvider(SqlInfo.SQL_SUPPORTED_GROUP_BY_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTED_GRAMMAR} in the builder. - * - * @param values the value for {@link SqlInfo#SQL_SUPPORTED_GRAMMAR} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportedGrammar(final SupportedSqlGrammar... values) { - return withEnumProvider(SqlInfo.SQL_SUPPORTED_GRAMMAR_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_ANSI92_SUPPORTED_LEVEL} in the builder. - * - * @param values the value for {@link SqlInfo#SQL_ANSI92_SUPPORTED_LEVEL} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlAnsi92SupportedLevel( - final SupportedAnsi92SqlGrammarLevel... values) { - return withEnumProvider(SqlInfo.SQL_ANSI92_SUPPORTED_LEVEL_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SCHEMAS_SUPPORTED_ACTIONS} in the builder. - * - * @param values the value for {@link SqlInfo#SQL_SCHEMAS_SUPPORTED_ACTIONS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSchemasSupportedActions(final SqlSupportedElementActions... values) { - return withEnumProvider(SqlInfo.SQL_SCHEMAS_SUPPORTED_ACTIONS_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_CATALOGS_SUPPORTED_ACTIONS} in the builder. - * - * @param values the value for {@link SqlInfo#SQL_CATALOGS_SUPPORTED_ACTIONS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlCatalogsSupportedActions( - final SqlSupportedElementActions... values) { - return withEnumProvider(SqlInfo.SQL_CATALOGS_SUPPORTED_ACTIONS_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTED_POSITIONED_COMMANDS} in the builder. - * - * @param values the value for {@link SqlInfo#SQL_SUPPORTED_POSITIONED_COMMANDS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportedPositionedCommands( - final SqlSupportedPositionedCommands... values) { - return withEnumProvider(SqlInfo.SQL_SUPPORTED_POSITIONED_COMMANDS_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTED_SUBQUERIES} in the builder. - * - * @param values the value for {@link SqlInfo#SQL_SUPPORTED_SUBQUERIES} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSubQueriesSupported(final SqlSupportedSubqueries... values) { - return withEnumProvider(SqlInfo.SQL_SUPPORTED_SUBQUERIES_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTED_UNIONS} in the builder. - * - * @param values the values for {@link SqlInfo#SQL_SUPPORTED_UNIONS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportedUnions(final SqlSupportedUnions... values) { - return withEnumProvider(SqlInfo.SQL_SUPPORTED_UNIONS_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_OUTER_JOINS_SUPPORT_LEVEL} in the builder. - * - * @param value the value for {@link SqlInfo#SQL_OUTER_JOINS_SUPPORT_LEVEL} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlOuterJoinSupportLevel(final SqlOuterJoinsSupportLevel... value) { - return withEnumProvider(SqlInfo.SQL_OUTER_JOINS_SUPPORT_LEVEL_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS} in the builder. - * - * @param values the values for {@link SqlInfo#SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS} to be - * set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportedTransactionsIsolationLevels( - final SqlTransactionIsolationLevel... values) { - return withEnumProvider(SqlInfo.SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTED_RESULT_SET_TYPES} in the builder. - * - * @param values the values for {@link SqlInfo#SQL_SUPPORTED_RESULT_SET_TYPES} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportedResultSetTypes(final SqlSupportedResultSetType... values) { - return withEnumProvider(SqlInfo.SQL_SUPPORTED_RESULT_SET_TYPES_VALUE, values); - } - - /** - * Sets a value for {@link SqlInfo#SQL_KEYWORDS} in the builder. - * - * @param value the values for {@link SqlInfo#SQL_KEYWORDS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlKeywords(final String[] value) { - return withStringArrayProvider(SqlInfo.SQL_KEYWORDS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_NUMERIC_FUNCTIONS} in the builder. - * - * @param value the values for {@link SqlInfo#SQL_NUMERIC_FUNCTIONS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlNumericFunctions(final String[] value) { - return withStringArrayProvider(SqlInfo.SQL_NUMERIC_FUNCTIONS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_STRING_FUNCTIONS} in the builder. - * - * @param value the values for {@link SqlInfo#SQL_STRING_FUNCTIONS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlStringFunctions(final String[] value) { - return withStringArrayProvider(SqlInfo.SQL_STRING_FUNCTIONS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SYSTEM_FUNCTIONS} in the builder. - * - * @param value the values for {@link SqlInfo#SQL_SYSTEM_FUNCTIONS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSystemFunctions(final String[] value) { - return withStringArrayProvider(SqlInfo.SQL_SYSTEM_FUNCTIONS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_DATETIME_FUNCTIONS} in the builder. - * - * @param value the values for {@link SqlInfo#SQL_DATETIME_FUNCTIONS} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlDatetimeFunctions(final String[] value) { - return withStringArrayProvider(SqlInfo.SQL_DATETIME_FUNCTIONS_VALUE, value); - } - - /** - * Sets a value for {@link SqlInfo#SQL_SUPPORTS_CONVERT} in the builder. - * - * @param value the values for {@link SqlInfo#SQL_SUPPORTS_CONVERT} to be set. - * @return the SqlInfoBuilder itself. - */ - public SqlInfoBuilder withSqlSupportsConvert(final Map> value) { - return withIntToIntListMapProvider(SqlInfo.SQL_SUPPORTS_CONVERT_VALUE, value); - } - - private void addProvider(final int sqlInfo, final ObjIntConsumer provider) { - providers.put(sqlInfo, provider); - } - - private SqlInfoBuilder withEnumProvider(final int sqlInfo, final ProtocolMessageEnum[] values) { - return withIntProvider(sqlInfo, (int) createBitmaskFromEnums(values)); - } - - private SqlInfoBuilder withIntProvider(final int sqlInfo, final int value) { - addProvider(sqlInfo, (root, index) -> setDataForIntField(root, index, sqlInfo, value)); - return this; - } - - private SqlInfoBuilder withBitIntProvider(final int sqlInfo, final long value) { - addProvider(sqlInfo, (root, index) -> setDataForBigIntField(root, index, sqlInfo, value)); - return this; - } - - private SqlInfoBuilder withBooleanProvider(final int sqlInfo, final boolean value) { - addProvider(sqlInfo, (root, index) -> setDataForBooleanField(root, index, sqlInfo, value)); - return this; - } - - private SqlInfoBuilder withStringProvider(final int sqlInfo, final String value) { - addProvider(sqlInfo, (root, index) -> setDataForUtf8Field(root, index, sqlInfo, value)); - return this; - } - - private SqlInfoBuilder withStringArrayProvider(final int sqlInfo, final String[] value) { - addProvider(sqlInfo, (root, index) -> setDataVarCharListField(root, index, sqlInfo, value)); - return this; - } - - private SqlInfoBuilder withIntToIntListMapProvider( - final int sqlInfo, final Map> value) { - addProvider(sqlInfo, (root, index) -> setIntToIntListMapField(root, index, sqlInfo, value)); - return this; - } - - /** - * Send the requested information to given ServerStreamListener. - * - * @param infos List of SqlInfo to be sent. - * @param listener ServerStreamListener to send data to. - */ - public void send(List infos, final ServerStreamListener listener) { - if (infos == null || infos.isEmpty()) { - infos = new ArrayList<>(providers.keySet()); - } - try (final BufferAllocator allocator = new RootAllocator(); - final VectorSchemaRoot root = - VectorSchemaRoot.create(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, allocator)) { - final int rows = infos.size(); - for (int i = 0; i < rows; i++) { - providers.get(infos.get(i)).accept(root, i); - } - root.setRowCount(rows); - listener.start(root); - listener.putNext(); - } catch (final Throwable throwable) { - listener.error(throwable); - } finally { - listener.completed(); - } - } - - private void setInfoName(final VectorSchemaRoot root, final int index, final int info) { - final UInt4Vector infoName = (UInt4Vector) root.getVector("info_name"); - infoName.setSafe(index, info); - } - - private void setValues( - final VectorSchemaRoot root, - final int index, - final byte typeId, - final Consumer dataSetter) { - final DenseUnionVector values = (DenseUnionVector) root.getVector("value"); - values.setTypeId(index, typeId); - dataSetter.accept(values); - } - - /** - * Executes the given action on an ad-hoc, newly created instance of {@link ArrowBuf}. - * - * @param executor the action to take. - */ - private void onCreateArrowBuf(final Consumer executor) { - try (final BufferAllocator allocator = new RootAllocator(); - final ArrowBuf buf = allocator.buffer(1024)) { - executor.accept(buf); - } - } - - private void setDataForUtf8Field( - final VectorSchemaRoot root, final int index, final int sqlInfo, final String value) { - setInfoName(root, index, sqlInfo); - onCreateArrowBuf( - buf -> { - final Consumer producer = - values -> values.setSafe(index, getHolderForUtf8(value, buf)); - setValues(root, index, (byte) 0, producer); - }); - } - - private void setDataForIntField( - final VectorSchemaRoot root, final int index, final int sqlInfo, final int value) { - setInfoName(root, index, sqlInfo); - final NullableIntHolder dataHolder = new NullableIntHolder(); - dataHolder.isSet = 1; - dataHolder.value = value; - setValues(root, index, (byte) 3, values -> values.setSafe(index, dataHolder)); - } - - private void setDataForBigIntField( - final VectorSchemaRoot root, final int index, final int sqlInfo, final long value) { - setInfoName(root, index, sqlInfo); - final NullableBigIntHolder dataHolder = new NullableBigIntHolder(); - dataHolder.isSet = 1; - dataHolder.value = value; - setValues(root, index, (byte) 2, values -> values.setSafe(index, dataHolder)); - } - - private void setDataForBooleanField( - final VectorSchemaRoot root, final int index, final int sqlInfo, final boolean value) { - setInfoName(root, index, sqlInfo); - final NullableBitHolder dataHolder = new NullableBitHolder(); - dataHolder.isSet = 1; - dataHolder.value = value ? 1 : 0; - setValues(root, index, (byte) 1, values -> values.setSafe(index, dataHolder)); - } - - private void setDataVarCharListField( - final VectorSchemaRoot root, final int index, final int sqlInfo, final String[] values) { - final DenseUnionVector denseUnion = (DenseUnionVector) root.getVector("value"); - final ListVector listVector = denseUnion.getList((byte) 4); - final int listIndex = listVector.getValueCount(); - final int denseUnionValueCount = index + 1; - final int listVectorValueCount = listIndex + 1; - denseUnion.setValueCount(denseUnionValueCount); - listVector.setValueCount(listVectorValueCount); - - final UnionListWriter writer = listVector.getWriter(); - writer.setPosition(listIndex); - writer.startList(); - final int length = values.length; - range(0, length) - .forEach( - i -> - onCreateArrowBuf( - buf -> { - final byte[] bytes = values[i].getBytes(StandardCharsets.UTF_8); - buf.setBytes(0, bytes); - writer.writeVarChar(0, bytes.length, buf); - })); - writer.endList(); - writer.setValueCount(listVectorValueCount); - - denseUnion.setTypeId(index, (byte) 4); - denseUnion.getOffsetBuffer().setInt(index * 4L, listIndex); - setInfoName(root, index, sqlInfo); - } - - private void setIntToIntListMapField( - final VectorSchemaRoot root, - final int index, - final int sqlInfo, - final Map> values) { - final DenseUnionVector denseUnion = (DenseUnionVector) root.getVector("value"); - final MapVector mapVector = denseUnion.getMap((byte) 5); - final int mapIndex = mapVector.getValueCount(); - denseUnion.setValueCount(index + 1); - mapVector.setValueCount(mapIndex + 1); - - final UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(mapIndex); - mapWriter.startMap(); - values.forEach( - (key, value) -> { - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(key); - final BaseWriter.ListWriter listWriter = mapWriter.value().list(); - listWriter.startList(); - for (final int v : value) { - listWriter.integer().writeInt(v); - } - listWriter.endList(); - mapWriter.endEntry(); - }); - mapWriter.endMap(); - mapWriter.setValueCount(mapIndex + 1); - - denseUnion.setTypeId(index, (byte) 5); - denseUnion.getOffsetBuffer().setInt(index * 4L, mapIndex); - setInfoName(root, index, sqlInfo); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/example/FlightSqlClientDemoApp.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/example/FlightSqlClientDemoApp.java deleted file mode 100644 index 7dbbe4c39583a..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/example/FlightSqlClientDemoApp.java +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.example; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.flight.CallOption; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.util.TableRef; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.DefaultParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; - -/** Flight SQL Client Demo CLI Application. */ -public class FlightSqlClientDemoApp implements AutoCloseable { - public final List callOptions = new ArrayList<>(); - public final BufferAllocator allocator; - public FlightSqlClient flightSqlClient; - - public FlightSqlClientDemoApp(final BufferAllocator bufferAllocator) { - allocator = bufferAllocator; - } - - public static void main(final String[] args) throws Exception { - final Options options = new Options(); - - options.addRequiredOption("host", "host", true, "Host to connect to"); - options.addRequiredOption("port", "port", true, "Port to connect to"); - options.addRequiredOption("command", "command", true, "Method to run"); - - options.addOption("query", "query", true, "Query"); - options.addOption("catalog", "catalog", true, "Catalog"); - options.addOption("schema", "schema", true, "Schema"); - options.addOption("table", "table", true, "Table"); - - CommandLineParser parser = new DefaultParser(); - HelpFormatter formatter = new HelpFormatter(); - CommandLine cmd; - - try { - cmd = parser.parse(options, args); - try (final FlightSqlClientDemoApp thisApp = - new FlightSqlClientDemoApp(new RootAllocator(Integer.MAX_VALUE))) { - thisApp.executeApp(cmd); - } - - } catch (final ParseException e) { - System.out.println(e.getMessage()); - formatter.printHelp("FlightSqlClientDemoApp -host localhost -port 32010 ...", options); - throw e; - } - } - - /** - * Gets the current {@link CallOption} as an array; usually used as an argument in {@link - * FlightSqlClient} methods. - * - * @return current {@link CallOption} array. - */ - public CallOption[] getCallOptions() { - return callOptions.toArray(new CallOption[0]); - } - - /** - * Calls {@link FlightSqlClientDemoApp#createFlightSqlClient(String, int)} in order to create a - * {@link FlightSqlClient} to be used in future calls, and then calls {@link - * FlightSqlClientDemoApp#executeCommand(CommandLine)} to execute the command parsed at execution. - * - * @param cmd parsed {@link CommandLine}; often the result of {@link DefaultParser#parse(Options, - * String[])}. - */ - public void executeApp(final CommandLine cmd) throws Exception { - final String host = cmd.getOptionValue("host").trim(); - final int port = Integer.parseInt(cmd.getOptionValue("port").trim()); - - createFlightSqlClient(host, port); - executeCommand(cmd); - } - - /** - * Parses the "{@code command}" CLI argument and redirects to the appropriate method. - * - * @param cmd parsed {@link CommandLine}; often the result of {@link DefaultParser#parse(Options, - * String[])}. - */ - public void executeCommand(CommandLine cmd) throws Exception { - switch (cmd.getOptionValue("command").trim()) { - case "Execute": - exampleExecute(cmd.getOptionValue("query")); - break; - case "ExecuteUpdate": - exampleExecuteUpdate(cmd.getOptionValue("query")); - break; - case "GetCatalogs": - exampleGetCatalogs(); - break; - case "GetSchemas": - exampleGetSchemas(cmd.getOptionValue("catalog"), cmd.getOptionValue("schema")); - break; - case "GetTableTypes": - exampleGetTableTypes(); - break; - case "GetTables": - exampleGetTables( - cmd.getOptionValue("catalog"), - cmd.getOptionValue("schema"), - cmd.getOptionValue("table")); - break; - case "GetExportedKeys": - exampleGetExportedKeys( - cmd.getOptionValue("catalog"), - cmd.getOptionValue("schema"), - cmd.getOptionValue("table")); - break; - case "GetImportedKeys": - exampleGetImportedKeys( - cmd.getOptionValue("catalog"), - cmd.getOptionValue("schema"), - cmd.getOptionValue("table")); - break; - case "GetPrimaryKeys": - exampleGetPrimaryKeys( - cmd.getOptionValue("catalog"), - cmd.getOptionValue("schema"), - cmd.getOptionValue("table")); - break; - default: - System.out.println( - "Command used is not valid! Please use one of: \n" - + "[\"ExecuteUpdate\",\n" - + "\"Execute\",\n" - + "\"GetCatalogs\",\n" - + "\"GetSchemas\",\n" - + "\"GetTableTypes\",\n" - + "\"GetTables\",\n" - + "\"GetExportedKeys\",\n" - + "\"GetImportedKeys\",\n" - + "\"GetPrimaryKeys\"]"); - } - } - - /** - * Creates a {@link FlightSqlClient} to be used with the example methods. - * - * @param host client's hostname. - * @param port client's port. - */ - public void createFlightSqlClient(final String host, final int port) { - final Location clientLocation = Location.forGrpcInsecure(host, port); - flightSqlClient = new FlightSqlClient(FlightClient.builder(allocator, clientLocation).build()); - } - - private void exampleExecute(final String query) throws Exception { - printFlightInfoResults(flightSqlClient.execute(query, getCallOptions())); - } - - private void exampleExecuteUpdate(final String query) { - System.out.println( - "Updated: " + flightSqlClient.executeUpdate(query, getCallOptions()) + "rows."); - } - - private void exampleGetCatalogs() throws Exception { - printFlightInfoResults(flightSqlClient.getCatalogs(getCallOptions())); - } - - private void exampleGetSchemas(final String catalog, final String schema) throws Exception { - printFlightInfoResults(flightSqlClient.getSchemas(catalog, schema, getCallOptions())); - } - - private void exampleGetTableTypes() throws Exception { - printFlightInfoResults(flightSqlClient.getTableTypes(getCallOptions())); - } - - private void exampleGetTables(final String catalog, final String schema, final String table) - throws Exception { - // For now, this won't filter by table types. - printFlightInfoResults( - flightSqlClient.getTables(catalog, schema, table, null, false, getCallOptions())); - } - - private void exampleGetExportedKeys(final String catalog, final String schema, final String table) - throws Exception { - printFlightInfoResults( - flightSqlClient.getExportedKeys(TableRef.of(catalog, schema, table), getCallOptions())); - } - - private void exampleGetImportedKeys(final String catalog, final String schema, final String table) - throws Exception { - printFlightInfoResults( - flightSqlClient.getImportedKeys(TableRef.of(catalog, schema, table), getCallOptions())); - } - - private void exampleGetPrimaryKeys(final String catalog, final String schema, final String table) - throws Exception { - printFlightInfoResults( - flightSqlClient.getPrimaryKeys(TableRef.of(catalog, schema, table), getCallOptions())); - } - - private void printFlightInfoResults(final FlightInfo flightInfo) throws Exception { - final FlightStream stream = - flightSqlClient.getStream(flightInfo.getEndpoints().get(0).getTicket(), getCallOptions()); - while (stream.next()) { - try (final VectorSchemaRoot root = stream.getRoot()) { - System.out.println(root.contentToTSVString()); - } - } - stream.close(); - } - - @Override - public void close() throws Exception { - flightSqlClient.close(); - allocator.close(); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtils.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtils.java deleted file mode 100644 index 3d3d3921022a9..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtils.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.util; - -import com.google.protobuf.ProtocolMessageEnum; -import java.util.Arrays; -import java.util.Collection; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlInfo; - -/** Utility class for {@link SqlInfo} and {@link FlightSqlClient#getSqlInfo} option parsing. */ -public final class SqlInfoOptionsUtils { - private SqlInfoOptionsUtils() { - // Prevent instantiation. - } - - /** - * Returns whether the provided {@code bitmask} points to the provided {@link ProtocolMessageEnum} - * by comparing {@link ProtocolMessageEnum#getNumber} with the respective bit index of the {@code - * bitmask}. - * - * @param enumInstance the protobuf message enum to use. - * @param bitmask the bitmask response from {@link FlightSqlClient#getSqlInfo}. - * @return whether the provided {@code bitmask} points to the specified {@code enumInstance}. - */ - public static boolean doesBitmaskTranslateToEnum( - final ProtocolMessageEnum enumInstance, final long bitmask) { - return ((bitmask >> enumInstance.getNumber()) & 1) == 1; - } - - /** - * Creates a bitmask that translates to the specified {@code enums}. - * - * @param enums the {@link ProtocolMessageEnum} instances to represent as bitmask. - * @return the bitmask. - */ - public static long createBitmaskFromEnums(final ProtocolMessageEnum... enums) { - return createBitmaskFromEnums(Arrays.asList(enums)); - } - - /** - * Creates a bitmask that translates to the specified {@code enums}. - * - * @param enums the {@link ProtocolMessageEnum} instances to represent as bitmask. - * @return the bitmask. - */ - public static long createBitmaskFromEnums(final Collection enums) { - return enums.stream() - .mapToInt(ProtocolMessageEnum::getNumber) - .map(bitIndexToSet -> 1 << bitIndexToSet) - .reduce((firstBitmask, secondBitmask) -> firstBitmask | secondBitmask) - .orElse(0); - } -} diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/TableRef.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/TableRef.java deleted file mode 100644 index 15167a8f00d53..0000000000000 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/TableRef.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.util; - -/** A helper class to reference a table to be passed to the flight sql client. */ -public class TableRef { - private final String catalog; - private final String dbSchema; - private final String table; - - /** - * The complete constructor for the TableRef class. - * - * @param catalog the catalog from a table. - * @param dbSchema the database schema from a table. - * @param table the table name from a table. - */ - public TableRef(String catalog, String dbSchema, String table) { - this.catalog = catalog; - this.dbSchema = dbSchema; - this.table = table; - } - - /** - * A static initializer of the TableRef with all the arguments. - * - * @param catalog the catalog from a table. - * @param dbSchema the database schema from a table. - * @param table the table name from a table. - * @return A TableRef object. - */ - public static TableRef of(String catalog, String dbSchema, String table) { - return new TableRef(catalog, dbSchema, table); - } - - /** - * Retrieve the catalog from the object. - * - * @return the catalog. - */ - public String getCatalog() { - return catalog; - } - - /** - * Retrieves the db schema from the object. - * - * @return the dbSchema - */ - public String getDbSchema() { - return dbSchema; - } - - /** - * Retrieves the table from the object. - * - * @return the table. - */ - public String getTable() { - return table; - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java deleted file mode 100644 index f6bfbcd2e5457..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.example; - -import java.io.Serializable; - -public class DoPutPreparedStatementResultPOJO implements Serializable { - private String query; - private byte[] parameters; - - public DoPutPreparedStatementResultPOJO(String query, byte[] parameters) { - this.query = query; - this.parameters = parameters.clone(); - } - - public String getQuery() { - return query; - } - - public byte[] getParameters() { - return parameters; - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java deleted file mode 100644 index f9d0551a3aa22..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java +++ /dev/null @@ -1,1639 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.example; - -import static com.google.common.base.Strings.emptyToNull; -import static com.google.protobuf.Any.pack; -import static com.google.protobuf.ByteString.copyFrom; -import static java.lang.String.format; -import static java.util.Collections.singletonList; -import static java.util.Objects.isNull; -import static java.util.UUID.randomUUID; -import static java.util.stream.IntStream.range; -import static org.apache.arrow.adapter.jdbc.JdbcToArrow.sqlToArrowVectorIterator; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.jdbcToArrowSchema; -import static org.apache.arrow.flight.sql.impl.FlightSql.*; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCrossReference; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetDbSchemas; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetExportedKeys; -import static org.apache.arrow.flight.sql.impl.FlightSql.CommandGetImportedKeys; -import static org.apache.arrow.flight.sql.impl.FlightSql.DoPutUpdateResult; -import static org.apache.arrow.flight.sql.impl.FlightSql.TicketStatementQuery; -import static org.apache.arrow.util.Preconditions.checkState; -import static org.slf4j.LoggerFactory.getLogger; - -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.protobuf.ByteString; -import com.google.protobuf.Message; -import com.google.protobuf.ProtocolStringList; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.NoSuchFileException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.SQLSyntaxErrorException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.Properties; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.function.BiConsumer; -import java.util.function.Consumer; -import java.util.function.Predicate; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; -import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; -import org.apache.arrow.adapter.jdbc.JdbcParameterBinder; -import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.Criteria; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.Result; -import org.apache.arrow.flight.SchemaResult; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.flight.sql.SqlInfoBuilder; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionClosePreparedStatementRequest; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementRequest; -import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetCatalogs; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetPrimaryKeys; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetSqlInfo; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTableTypes; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.ipc.WriteChannel; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.apache.commons.dbcp2.ConnectionFactory; -import org.apache.commons.dbcp2.DriverManagerConnectionFactory; -import org.apache.commons.dbcp2.PoolableConnection; -import org.apache.commons.dbcp2.PoolableConnectionFactory; -import org.apache.commons.dbcp2.PoolingDataSource; -import org.apache.commons.pool2.ObjectPool; -import org.apache.commons.pool2.impl.GenericObjectPool; -import org.apache.commons.text.StringEscapeUtils; -import org.slf4j.Logger; - -/** - * Example {@link FlightSqlProducer} implementation showing an Apache Derby backed Flight SQL server - * that generally supports all current features of Flight SQL. - */ -public class FlightSqlExample implements FlightSqlProducer, AutoCloseable { - private static final Logger LOGGER = getLogger(FlightSqlExample.class); - protected static final Calendar DEFAULT_CALENDAR = JdbcToArrowUtils.getUtcCalendar(); - public static final String DB_NAME = "derbyDB"; - private final String databaseUri; - // ARROW-15315: Use ExecutorService to simulate an async scenario - private final ExecutorService executorService = Executors.newFixedThreadPool(10); - private final Location location; - protected final PoolingDataSource dataSource; - protected final BufferAllocator rootAllocator = new RootAllocator(); - private final Cache> - preparedStatementLoadingCache; - private final Cache> statementLoadingCache; - private final SqlInfoBuilder sqlInfoBuilder; - - public static void main(String[] args) throws Exception { - Location location = Location.forGrpcInsecure("localhost", 55555); - final FlightSqlExample example = new FlightSqlExample(location, DB_NAME); - Location listenLocation = Location.forGrpcInsecure("0.0.0.0", 55555); - try (final BufferAllocator allocator = new RootAllocator(); - final FlightServer server = - FlightServer.builder(allocator, listenLocation, example).build()) { - server.start(); - server.awaitTermination(); - } - } - - public FlightSqlExample(final Location location, final String dbName) { - // TODO Constructor should not be doing work. - checkState(removeDerbyDatabaseIfExists(dbName), "Failed to clear Derby database!"); - checkState(populateDerbyDatabase(dbName), "Failed to populate Derby database!"); - databaseUri = "jdbc:derby:target/" + dbName; - final ConnectionFactory connectionFactory = - new DriverManagerConnectionFactory(databaseUri, new Properties()); - final PoolableConnectionFactory poolableConnectionFactory = - new PoolableConnectionFactory(connectionFactory, null); - final ObjectPool connectionPool = - new GenericObjectPool<>(poolableConnectionFactory); - - poolableConnectionFactory.setPool(connectionPool); - // PoolingDataSource takes ownership of `connectionPool` - dataSource = new PoolingDataSource<>(connectionPool); - - preparedStatementLoadingCache = - CacheBuilder.newBuilder() - .maximumSize(100) - .expireAfterWrite(10, TimeUnit.MINUTES) - .removalListener(new StatementRemovalListener()) - .build(); - - statementLoadingCache = - CacheBuilder.newBuilder() - .maximumSize(100) - .expireAfterWrite(10, TimeUnit.MINUTES) - .removalListener(new StatementRemovalListener<>()) - .build(); - - this.location = location; - - sqlInfoBuilder = new SqlInfoBuilder(); - try (final Connection connection = dataSource.getConnection()) { - final DatabaseMetaData metaData = connection.getMetaData(); - - sqlInfoBuilder - .withFlightSqlServerName(metaData.getDatabaseProductName()) - .withFlightSqlServerVersion(metaData.getDatabaseProductVersion()) - .withFlightSqlServerArrowVersion(metaData.getDriverVersion()) - .withFlightSqlServerReadOnly(metaData.isReadOnly()) - .withFlightSqlServerSql(true) - .withFlightSqlServerSubstrait(false) - .withFlightSqlServerTransaction(SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_NONE) - .withSqlIdentifierQuoteChar(metaData.getIdentifierQuoteString()) - .withSqlDdlCatalog(metaData.supportsCatalogsInDataManipulation()) - .withSqlDdlSchema(metaData.supportsSchemasInDataManipulation()) - .withSqlDdlTable(metaData.allTablesAreSelectable()) - .withSqlIdentifierCase( - metaData.storesMixedCaseIdentifiers() - ? SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_CASE_INSENSITIVE - : metaData.storesUpperCaseIdentifiers() - ? SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UPPERCASE - : metaData.storesLowerCaseIdentifiers() - ? SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_LOWERCASE - : SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UNKNOWN) - .withSqlQuotedIdentifierCase( - metaData.storesMixedCaseQuotedIdentifiers() - ? SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_CASE_INSENSITIVE - : metaData.storesUpperCaseQuotedIdentifiers() - ? SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UPPERCASE - : metaData.storesLowerCaseQuotedIdentifiers() - ? SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_LOWERCASE - : SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UNKNOWN) - .withSqlAllTablesAreSelectable(true) - .withSqlNullOrdering(SqlNullOrdering.SQL_NULLS_SORTED_AT_END) - .withSqlMaxColumnsInTable(42) - .withFlightSqlServerBulkIngestion(true) - .withFlightSqlServerBulkIngestionTransaction(false); - } catch (SQLException e) { - throw new RuntimeException(e); - } - } - - public static boolean removeDerbyDatabaseIfExists(final String dbName) { - final Path path = Paths.get("target" + File.separator + dbName); - - try (final Stream walk = Files.walk(path)) { - /* - * Iterate over all paths to delete, mapping each path to the outcome of its own - * deletion as a boolean representing whether each individual operation was - * successful; then reduce all booleans into a single answer. - * If for whatever reason the resulting `Stream` is empty, throw an `IOException`; - * this not expected. - */ - boolean unused = - walk.sorted(Comparator.reverseOrder()) - .map(Path::toFile) - .map(File::delete) - .reduce(Boolean::logicalAnd) - .orElseThrow(IOException::new); - } catch (NoSuchFileException e) { - /* - * The only acceptable scenario for an `IOException` to be thrown here is if - * an attempt to delete an non-existing file takes place -- which should be - * alright, since they would be deleted anyway. - */ - LOGGER.error(format("No existing Derby database to delete.: <%s>", e.getMessage()), e); - return true; - } catch (Exception e) { - LOGGER.error(format("Failed attempt to clear DerbyDB.: <%s>", e.getMessage()), e); - return false; - } - return true; - } - - private static boolean populateDerbyDatabase(final String dbName) { - try (final Connection connection = - DriverManager.getConnection("jdbc:derby:target/" + dbName + ";create=true"); - Statement statement = connection.createStatement()) { - - dropTable(statement, "intTable"); - dropTable(statement, "foreignTable"); - statement.execute( - "CREATE TABLE foreignTable (" - + "id INT not null primary key GENERATED ALWAYS AS IDENTITY (START WITH 1, INCREMENT BY 1), " - + "foreignName varchar(100), " - + "value int)"); - statement.execute( - "CREATE TABLE intTable (" - + "id INT not null primary key GENERATED ALWAYS AS IDENTITY (START WITH 1, INCREMENT BY 1), " - + "keyName varchar(100), " - + "value int, " - + "foreignId int references foreignTable(id))"); - statement.execute("INSERT INTO foreignTable (foreignName, value) VALUES ('keyOne', 1)"); - statement.execute("INSERT INTO foreignTable (foreignName, value) VALUES ('keyTwo', 0)"); - statement.execute("INSERT INTO foreignTable (foreignName, value) VALUES ('keyThree', -1)"); - statement.execute("INSERT INTO intTable (keyName, value, foreignId) VALUES ('one', 1, 1)"); - statement.execute("INSERT INTO intTable (keyName, value, foreignId) VALUES ('zero', 0, 1)"); - statement.execute( - "INSERT INTO intTable (keyName, value, foreignId) VALUES ('negative one', -1, 1)"); - } catch (final SQLException e) { - LOGGER.error(format("Failed attempt to populate DerbyDB: <%s>", e.getMessage()), e); - return false; - } - return true; - } - - private static void dropTable(final Statement statement, final String tableName) - throws SQLException { - try { - statement.execute("DROP TABLE " + tableName); - } catch (SQLException e) { - // sql error code for "object does not exist"; which is fine, we're trying to delete the table - // see https://db.apache.org/derby/docs/10.17/ref/rrefexcept71493.html - if (!"42Y55".equals(e.getSQLState())) { - throw e; - } - } - } - - private static ArrowType getArrowTypeFromJdbcType( - final int jdbcDataType, final int precision, final int scale) { - try { - return JdbcToArrowUtils.getArrowTypeFromJdbcType( - new JdbcFieldInfo(jdbcDataType, precision, scale), DEFAULT_CALENDAR); - } catch (UnsupportedOperationException ignored) { - return ArrowType.Utf8.INSTANCE; - } - } - - private static void saveToVector(final Byte data, final UInt1Vector vector, final int index) { - vectorConsumer( - data, - vector, - fieldVector -> fieldVector.setNull(index), - (theData, fieldVector) -> fieldVector.setSafe(index, theData)); - } - - private static void saveToVector(final Byte data, final BitVector vector, final int index) { - vectorConsumer( - data, - vector, - fieldVector -> fieldVector.setNull(index), - (theData, fieldVector) -> fieldVector.setSafe(index, theData)); - } - - private static void saveToVector(final String data, final VarCharVector vector, final int index) { - preconditionCheckSaveToVector(vector, index); - vectorConsumer( - data, - vector, - fieldVector -> fieldVector.setNull(index), - (theData, fieldVector) -> fieldVector.setSafe(index, new Text(theData))); - } - - private static void saveToVector(final Integer data, final IntVector vector, final int index) { - preconditionCheckSaveToVector(vector, index); - vectorConsumer( - data, - vector, - fieldVector -> fieldVector.setNull(index), - (theData, fieldVector) -> fieldVector.setSafe(index, theData)); - } - - private static void saveToVector( - final byte[] data, final VarBinaryVector vector, final int index) { - preconditionCheckSaveToVector(vector, index); - vectorConsumer( - data, - vector, - fieldVector -> fieldVector.setNull(index), - (theData, fieldVector) -> fieldVector.setSafe(index, theData)); - } - - private static void preconditionCheckSaveToVector(final FieldVector vector, final int index) { - Objects.requireNonNull(vector, "vector cannot be null."); - checkState(index >= 0, "Index must be a positive number!"); - } - - private static void vectorConsumer( - final T data, - final V vector, - final Consumer consumerIfNullable, - final BiConsumer defaultConsumer) { - if (isNull(data)) { - consumerIfNullable.accept(vector); - return; - } - defaultConsumer.accept(data, vector); - } - - private static VectorSchemaRoot getSchemasRoot( - final ResultSet data, final BufferAllocator allocator) throws SQLException { - final VarCharVector catalogs = new VarCharVector("catalog_name", allocator); - final VarCharVector schemas = - new VarCharVector( - "db_schema_name", FieldType.notNullable(MinorType.VARCHAR.getType()), allocator); - final List vectors = ImmutableList.of(catalogs, schemas); - vectors.forEach(FieldVector::allocateNew); - final Map vectorToColumnName = - ImmutableMap.of( - catalogs, "TABLE_CATALOG", - schemas, "TABLE_SCHEM"); - saveToVectors(vectorToColumnName, data); - final int rows = - vectors.stream() - .map(FieldVector::getValueCount) - .findAny() - .orElseThrow(IllegalStateException::new); - vectors.forEach(vector -> vector.setValueCount(rows)); - return new VectorSchemaRoot(vectors); - } - - private static int saveToVectors( - final Map vectorToColumnName, final ResultSet data, boolean emptyToNull) - throws SQLException { - Predicate alwaysTrue = (resultSet) -> true; - return saveToVectors(vectorToColumnName, data, emptyToNull, alwaysTrue); - } - - @SuppressWarnings("StringSplitter") - private static int saveToVectors( - final Map vectorToColumnName, - final ResultSet data, - boolean emptyToNull, - Predicate resultSetPredicate) - throws SQLException { - Objects.requireNonNull(vectorToColumnName, "vectorToColumnName cannot be null."); - Objects.requireNonNull(data, "data cannot be null."); - final Set> entrySet = vectorToColumnName.entrySet(); - int rows = 0; - - while (data.next()) { - if (!resultSetPredicate.test(data)) { - continue; - } - for (final Entry vectorToColumn : entrySet) { - final T vector = vectorToColumn.getKey(); - final String columnName = vectorToColumn.getValue(); - if (vector instanceof VarCharVector) { - String thisData = data.getString(columnName); - saveToVector( - emptyToNull ? emptyToNull(thisData) : thisData, (VarCharVector) vector, rows); - } else if (vector instanceof IntVector) { - final int intValue = data.getInt(columnName); - saveToVector(data.wasNull() ? null : intValue, (IntVector) vector, rows); - } else if (vector instanceof UInt1Vector) { - final byte byteValue = data.getByte(columnName); - saveToVector(data.wasNull() ? null : byteValue, (UInt1Vector) vector, rows); - } else if (vector instanceof BitVector) { - final byte byteValue = data.getByte(columnName); - saveToVector(data.wasNull() ? null : byteValue, (BitVector) vector, rows); - } else if (vector instanceof ListVector) { - String createParamsValues = data.getString(columnName); - - UnionListWriter writer = ((ListVector) vector).getWriter(); - - BufferAllocator allocator = vector.getAllocator(); - final ArrowBuf buf = allocator.buffer(1024); - - writer.setPosition(rows); - writer.startList(); - - if (createParamsValues != null) { - String[] split = createParamsValues.split(","); - - range(0, split.length) - .forEach( - i -> { - byte[] bytes = split[i].getBytes(StandardCharsets.UTF_8); - Preconditions.checkState( - bytes.length < 1024, - "The amount of bytes is greater than what the ArrowBuf supports"); - buf.setBytes(0, bytes); - writer.varChar().writeVarChar(0, bytes.length, buf); - }); - } - buf.close(); - writer.endList(); - } else { - throw CallStatus.INVALID_ARGUMENT - .withDescription("Provided vector not supported") - .toRuntimeException(); - } - } - rows++; - } - for (final Entry vectorToColumn : entrySet) { - vectorToColumn.getKey().setValueCount(rows); - } - - return rows; - } - - private static void saveToVectors( - final Map vectorToColumnName, final ResultSet data) throws SQLException { - saveToVectors(vectorToColumnName, data, false); - } - - private static VectorSchemaRoot getTableTypesRoot( - final ResultSet data, final BufferAllocator allocator) throws SQLException { - return getRoot(data, allocator, "table_type", "TABLE_TYPE"); - } - - private static VectorSchemaRoot getCatalogsRoot( - final ResultSet data, final BufferAllocator allocator) throws SQLException { - return getRoot(data, allocator, "catalog_name", "TABLE_CATALOG"); - } - - private static VectorSchemaRoot getRoot( - final ResultSet data, - final BufferAllocator allocator, - final String fieldVectorName, - final String columnName) - throws SQLException { - final VarCharVector dataVector = - new VarCharVector( - fieldVectorName, FieldType.notNullable(MinorType.VARCHAR.getType()), allocator); - saveToVectors(ImmutableMap.of(dataVector, columnName), data); - final int rows = dataVector.getValueCount(); - dataVector.setValueCount(rows); - return new VectorSchemaRoot(singletonList(dataVector)); - } - - private static VectorSchemaRoot getTypeInfoRoot( - CommandGetXdbcTypeInfo request, ResultSet typeInfo, final BufferAllocator allocator) - throws SQLException { - Preconditions.checkNotNull(allocator, "BufferAllocator cannot be null."); - - VectorSchemaRoot root = VectorSchemaRoot.create(Schemas.GET_TYPE_INFO_SCHEMA, allocator); - - Map mapper = new HashMap<>(); - mapper.put(root.getVector("type_name"), "TYPE_NAME"); - mapper.put(root.getVector("data_type"), "DATA_TYPE"); - mapper.put(root.getVector("column_size"), "PRECISION"); - mapper.put(root.getVector("literal_prefix"), "LITERAL_PREFIX"); - mapper.put(root.getVector("literal_suffix"), "LITERAL_SUFFIX"); - mapper.put(root.getVector("create_params"), "CREATE_PARAMS"); - mapper.put(root.getVector("nullable"), "NULLABLE"); - mapper.put(root.getVector("case_sensitive"), "CASE_SENSITIVE"); - mapper.put(root.getVector("searchable"), "SEARCHABLE"); - mapper.put(root.getVector("unsigned_attribute"), "UNSIGNED_ATTRIBUTE"); - mapper.put(root.getVector("fixed_prec_scale"), "FIXED_PREC_SCALE"); - mapper.put(root.getVector("auto_increment"), "AUTO_INCREMENT"); - mapper.put(root.getVector("local_type_name"), "LOCAL_TYPE_NAME"); - mapper.put(root.getVector("minimum_scale"), "MINIMUM_SCALE"); - mapper.put(root.getVector("maximum_scale"), "MAXIMUM_SCALE"); - mapper.put(root.getVector("sql_data_type"), "SQL_DATA_TYPE"); - mapper.put(root.getVector("datetime_subcode"), "SQL_DATETIME_SUB"); - mapper.put(root.getVector("num_prec_radix"), "NUM_PREC_RADIX"); - - Predicate predicate; - if (request.hasDataType()) { - predicate = - (resultSet) -> { - try { - return resultSet.getInt("DATA_TYPE") == request.getDataType(); - } catch (SQLException e) { - throw new RuntimeException(e); - } - }; - } else { - predicate = resultSet -> true; - } - - int rows = saveToVectors(mapper, typeInfo, true, predicate); - - root.setRowCount(rows); - return root; - } - - private static VectorSchemaRoot getTablesRoot( - final DatabaseMetaData databaseMetaData, - final BufferAllocator allocator, - final boolean includeSchema, - final String catalog, - final String schemaFilterPattern, - final String tableFilterPattern, - final String... tableTypes) - throws SQLException, IOException { - /* - * TODO Fix DerbyDB inconsistency if possible. - * During the early development of this prototype, an inconsistency has been found in the database - * used for this demonstration; as DerbyDB does not operate with the concept of catalogs, fetching - * the catalog name for a given table from `DatabaseMetadata#getColumns` and `DatabaseMetadata#getSchemas` - * returns null, as expected. However, the inconsistency lies in the fact that accessing the same - * information -- that is, the catalog name for a given table -- from `DatabaseMetadata#getSchemas` - * returns an empty String.The temporary workaround for this was making sure we convert the empty Strings - * to null using `com.google.common.base.Strings#emptyToNull`. - */ - Objects.requireNonNull(allocator, "BufferAllocator cannot be null."); - final VarCharVector catalogNameVector = new VarCharVector("catalog_name", allocator); - final VarCharVector schemaNameVector = new VarCharVector("db_schema_name", allocator); - final VarCharVector tableNameVector = - new VarCharVector( - "table_name", FieldType.notNullable(MinorType.VARCHAR.getType()), allocator); - final VarCharVector tableTypeVector = - new VarCharVector( - "table_type", FieldType.notNullable(MinorType.VARCHAR.getType()), allocator); - - final List vectors = new ArrayList<>(4); - vectors.add(catalogNameVector); - vectors.add(schemaNameVector); - vectors.add(tableNameVector); - vectors.add(tableTypeVector); - - vectors.forEach(FieldVector::allocateNew); - - final Map vectorToColumnName = - ImmutableMap.of( - catalogNameVector, "TABLE_CAT", - schemaNameVector, "TABLE_SCHEM", - tableNameVector, "TABLE_NAME", - tableTypeVector, "TABLE_TYPE"); - - try (final ResultSet data = - Objects.requireNonNull( - databaseMetaData, - format("%s cannot be null.", databaseMetaData.getClass().getName())) - .getTables(catalog, schemaFilterPattern, tableFilterPattern, tableTypes)) { - - saveToVectors(vectorToColumnName, data, true); - final int rows = - vectors.stream() - .map(FieldVector::getValueCount) - .findAny() - .orElseThrow(IllegalStateException::new); - vectors.forEach(vector -> vector.setValueCount(rows)); - - if (includeSchema) { - final VarBinaryVector tableSchemaVector = - new VarBinaryVector( - "table_schema", FieldType.notNullable(MinorType.VARBINARY.getType()), allocator); - tableSchemaVector.allocateNew(rows); - - try (final ResultSet columnsData = - databaseMetaData.getColumns(catalog, schemaFilterPattern, tableFilterPattern, null)) { - final Map> tableToFields = new HashMap<>(); - - while (columnsData.next()) { - final String catalogName = columnsData.getString("TABLE_CAT"); - final String schemaName = columnsData.getString("TABLE_SCHEM"); - final String tableName = columnsData.getString("TABLE_NAME"); - final String typeName = columnsData.getString("TYPE_NAME"); - final String fieldName = columnsData.getString("COLUMN_NAME"); - final int dataType = columnsData.getInt("DATA_TYPE"); - final boolean isNullable = - columnsData.getInt("NULLABLE") != DatabaseMetaData.columnNoNulls; - final int precision = columnsData.getInt("COLUMN_SIZE"); - final int scale = columnsData.getInt("DECIMAL_DIGITS"); - boolean isAutoIncrement = - Objects.equals(columnsData.getString("IS_AUTOINCREMENT"), "YES"); - - final List fields = - tableToFields.computeIfAbsent(tableName, tableName_ -> new ArrayList<>()); - - final FlightSqlColumnMetadata columnMetadata = - new FlightSqlColumnMetadata.Builder() - .catalogName(catalogName) - .schemaName(schemaName) - .tableName(tableName) - .typeName(typeName) - .precision(precision) - .scale(scale) - .isAutoIncrement(isAutoIncrement) - .build(); - - final Field field = - new Field( - fieldName, - new FieldType( - isNullable, - getArrowTypeFromJdbcType(dataType, precision, scale), - null, - columnMetadata.getMetadataMap()), - null); - fields.add(field); - } - - for (int index = 0; index < rows; index++) { - final String tableName = tableNameVector.getObject(index).toString(); - final Schema schema = new Schema(tableToFields.get(tableName)); - saveToVector( - copyFrom(serializeMetadata(schema)).toByteArray(), tableSchemaVector, index); - } - } - - tableSchemaVector.setValueCount(rows); - vectors.add(tableSchemaVector); - } - } - - return new VectorSchemaRoot(vectors); - } - - private static ByteBuffer serializeMetadata(final Schema schema) { - final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - try { - MessageSerializer.serialize(new WriteChannel(Channels.newChannel(outputStream)), schema); - - return ByteBuffer.wrap(outputStream.toByteArray()); - } catch (final IOException e) { - throw new RuntimeException("Failed to serialize schema", e); - } - } - - private static String getRootAsCSVNoHeader(final VectorSchemaRoot root) { - StringBuilder sb = new StringBuilder(); - Schema schema = root.getSchema(); - int rowCount = root.getRowCount(); - List fieldVectors = root.getFieldVectors(); - - List row = new ArrayList<>(schema.getFields().size()); - for (int i = 0; i < rowCount; i++) { - if (i > 0) { - sb.append("\n"); - } - row.clear(); - for (FieldVector v : fieldVectors) { - row.add(v.getObject(i)); - } - printRowAsCSV(sb, row); - } - return sb.toString(); - } - - private static void printRowAsCSV(StringBuilder sb, List values) { - sb.append( - values.stream() - .map(v -> isNull(v) ? "" : v.toString()) - .map(StringEscapeUtils::escapeCsv) - .collect(Collectors.joining(","))); - } - - @Override - public void getStreamPreparedStatement( - final CommandPreparedStatementQuery command, - final CallContext context, - final ServerStreamListener listener) { - final ByteString handle = command.getPreparedStatementHandle(); - StatementContext statementContext = - preparedStatementLoadingCache.getIfPresent(handle); - Objects.requireNonNull(statementContext); - final PreparedStatement statement = statementContext.getStatement(); - try (final ResultSet resultSet = statement.executeQuery()) { - final Schema schema = jdbcToArrowSchema(resultSet.getMetaData(), DEFAULT_CALENDAR); - try (final VectorSchemaRoot vectorSchemaRoot = - VectorSchemaRoot.create(schema, rootAllocator)) { - final VectorLoader loader = new VectorLoader(vectorSchemaRoot); - listener.start(vectorSchemaRoot); - - final ArrowVectorIterator iterator = sqlToArrowVectorIterator(resultSet, rootAllocator); - while (iterator.hasNext()) { - final VectorSchemaRoot batch = iterator.next(); - if (batch.getRowCount() == 0) { - break; - } - final VectorUnloader unloader = new VectorUnloader(batch); - loader.load(unloader.getRecordBatch()); - listener.putNext(); - vectorSchemaRoot.clear(); - } - - listener.putNext(); - } - } catch (final SQLException | IOException e) { - LOGGER.error(format("Failed to getStreamPreparedStatement: <%s>.", e.getMessage()), e); - listener.error( - CallStatus.INTERNAL - .withDescription("Failed to prepare statement: " + e) - .toRuntimeException()); - } finally { - listener.completed(); - } - } - - @Override - public void closePreparedStatement( - final ActionClosePreparedStatementRequest request, - final CallContext context, - final StreamListener listener) { - // Running on another thread - Future unused = - executorService.submit( - () -> { - try { - preparedStatementLoadingCache.invalidate(request.getPreparedStatementHandle()); - } catch (final Exception e) { - listener.onError(e); - return; - } - listener.onCompleted(); - }); - } - - @Override - public FlightInfo getFlightInfoStatement( - final CommandStatementQuery request, - final CallContext context, - final FlightDescriptor descriptor) { - ByteString handle = copyFrom(randomUUID().toString().getBytes(StandardCharsets.UTF_8)); - - try { - // Ownership of the connection will be passed to the context. Do NOT close! - final Connection connection = dataSource.getConnection(); - final Statement statement = - connection.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); - final String query = request.getQuery(); - final StatementContext statementContext = new StatementContext<>(statement, query); - - statementLoadingCache.put(handle, statementContext); - final ResultSet resultSet = statement.executeQuery(query); - - TicketStatementQuery ticket = - TicketStatementQuery.newBuilder().setStatementHandle(handle).build(); - return getFlightInfoForSchema( - ticket, descriptor, jdbcToArrowSchema(resultSet.getMetaData(), DEFAULT_CALENDAR)); - } catch (final SQLException e) { - LOGGER.error( - format("There was a problem executing the prepared statement: <%s>.", e.getMessage()), e); - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } - } - - @Override - public FlightInfo getFlightInfoPreparedStatement( - final CommandPreparedStatementQuery command, - final CallContext context, - final FlightDescriptor descriptor) { - final ByteString preparedStatementHandle = command.getPreparedStatementHandle(); - StatementContext statementContext = - preparedStatementLoadingCache.getIfPresent(preparedStatementHandle); - try { - assert statementContext != null; - PreparedStatement statement = statementContext.getStatement(); - - ResultSetMetaData metaData = statement.getMetaData(); - return getFlightInfoForSchema( - command, descriptor, jdbcToArrowSchema(metaData, DEFAULT_CALENDAR)); - } catch (final SQLException e) { - LOGGER.error( - format("There was a problem executing the prepared statement: <%s>.", e.getMessage()), e); - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } - } - - @Override - public SchemaResult getSchemaStatement( - final CommandStatementQuery command, - final CallContext context, - final FlightDescriptor descriptor) { - throw CallStatus.UNIMPLEMENTED.toRuntimeException(); - } - - @Override - public void close() throws Exception { - try { - preparedStatementLoadingCache.cleanUp(); - } catch (Throwable t) { - LOGGER.error(format("Failed to close resources: <%s>", t.getMessage()), t); - } - - AutoCloseables.close(dataSource, rootAllocator); - } - - @Override - public void listFlights( - CallContext context, Criteria criteria, StreamListener listener) { - // TODO - build example implementation - throw CallStatus.UNIMPLEMENTED.toRuntimeException(); - } - - @Override - public void createPreparedStatement( - final ActionCreatePreparedStatementRequest request, - final CallContext context, - final StreamListener listener) { - // Running on another thread - Future unused = - executorService.submit( - () -> { - try { - final ByteString preparedStatementHandle = - copyFrom(request.getQuery().getBytes(StandardCharsets.UTF_8)); - // Ownership of the connection will be passed to the context. Do NOT close! - final Connection connection = dataSource.getConnection(); - final PreparedStatement preparedStatement = - connection.prepareStatement( - request.getQuery(), - ResultSet.TYPE_SCROLL_INSENSITIVE, - ResultSet.CONCUR_READ_ONLY); - final StatementContext preparedStatementContext = - new StatementContext<>(preparedStatement, request.getQuery()); - - preparedStatementLoadingCache.put( - preparedStatementHandle, preparedStatementContext); - - final Schema parameterSchema = - jdbcToArrowSchema(preparedStatement.getParameterMetaData(), DEFAULT_CALENDAR); - - final ResultSetMetaData metaData = preparedStatement.getMetaData(); - final ByteString bytes = - isNull(metaData) - ? ByteString.EMPTY - : ByteString.copyFrom( - serializeMetadata(jdbcToArrowSchema(metaData, DEFAULT_CALENDAR))); - final ActionCreatePreparedStatementResult result = - ActionCreatePreparedStatementResult.newBuilder() - .setDatasetSchema(bytes) - .setParameterSchema(copyFrom(serializeMetadata(parameterSchema))) - .setPreparedStatementHandle(preparedStatementHandle) - .build(); - listener.onNext(new Result(pack(result).toByteArray())); - } catch (final SQLException e) { - listener.onError( - CallStatus.INTERNAL - .withDescription("Failed to create prepared statement: " + e) - .toRuntimeException()); - return; - } catch (final Throwable t) { - listener.onError( - CallStatus.INTERNAL - .withDescription("Unknown error: " + t) - .toRuntimeException()); - return; - } - listener.onCompleted(); - }); - } - - @Override - public void doExchange(CallContext context, FlightStream reader, ServerStreamListener writer) { - // TODO - build example implementation - throw CallStatus.UNIMPLEMENTED.toRuntimeException(); - } - - @Override - public Runnable acceptPutStatement( - CommandStatementUpdate command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - final String query = command.getQuery(); - - return () -> { - try (final Connection connection = dataSource.getConnection(); - final Statement statement = connection.createStatement()) { - final int result = statement.executeUpdate(query); - - final DoPutUpdateResult build = - DoPutUpdateResult.newBuilder().setRecordCount(result).build(); - - try (final ArrowBuf buffer = rootAllocator.buffer(build.getSerializedSize())) { - buffer.writeBytes(build.toByteArray()); - ackStream.onNext(PutResult.metadata(buffer)); - ackStream.onCompleted(); - } - } catch (SQLSyntaxErrorException e) { - ackStream.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("Failed to execute statement (invalid syntax): " + e) - .toRuntimeException()); - } catch (SQLException e) { - ackStream.onError( - CallStatus.INTERNAL - .withDescription("Failed to execute statement: " + e) - .toRuntimeException()); - } - }; - } - - @Override - public Runnable acceptPutStatementBulkIngest( - CommandStatementIngest command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - - final String schema = command.hasSchema() ? command.getSchema() : null; - final String table = command.getTable(); - final boolean temporary = command.getTemporary(); - final boolean transactionId = command.hasTransactionId(); - final TableDefinitionOptions tableDefinitionOptions = - command.hasTableDefinitionOptions() ? command.getTableDefinitionOptions() : null; - - return () -> { - TableExistsOption ifExists = TableExistsOption.TABLE_EXISTS_OPTION_APPEND; - if (temporary) { - ackStream.onError( - CallStatus.UNIMPLEMENTED - .withDescription("Bulk ingestion using temporary tables is not supported") - .toRuntimeException()); - } else if (transactionId) { - ackStream.onError( - CallStatus.UNIMPLEMENTED - .withDescription( - "Bulk ingestion automatically happens in a transaction. Specifying explicit transaction is not supported.") - .toRuntimeException()); - } else if (isNull(tableDefinitionOptions)) { - ackStream.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("TableDefinitionOptions not provided.") - .toRuntimeException()); - } else { - TableNotExistOption ifNotExist = tableDefinitionOptions.getIfNotExist(); - ifExists = tableDefinitionOptions.getIfExists(); - - if (!TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL.equals(ifNotExist)) { - ackStream.onError( - CallStatus.UNIMPLEMENTED - .withDescription( - "Only supported option is TABLE_NOT_EXIST_OPTION_FAIL for TableNotExistsOption.") - .toRuntimeException()); - } else if (TableExistsOption.TABLE_EXISTS_OPTION_UNSPECIFIED.equals(ifExists)) { - ackStream.onError( - CallStatus.INVALID_ARGUMENT - .withDescription("TableExistsOption must be specified") - .toRuntimeException()); - } else if (TableExistsOption.TABLE_EXISTS_OPTION_FAIL.equals(ifExists)) { - ackStream.onError( - CallStatus.UNIMPLEMENTED - .withDescription("TABLE_EXISTS_OPTION_FAIL is not supported.") - .toRuntimeException()); - } - } - - Path tempFile = null; - try { - tempFile = Files.createTempFile(null, null); - - VectorSchemaRoot root = null; - int counter = 0; - while (flightStream.next()) { - if (counter > 0) { - Files.writeString(tempFile, "\n", StandardCharsets.UTF_8, StandardOpenOption.APPEND); - } - counter += 1; - root = flightStream.getRoot(); - Files.writeString( - tempFile, - getRootAsCSVNoHeader(root), - StandardCharsets.UTF_8, - StandardOpenOption.APPEND); - } - - if (counter > 0) { - Files.writeString(tempFile, "\n", StandardCharsets.UTF_8, StandardOpenOption.APPEND); - } - - if (!isNull(root)) { - String header = - root.getSchema().getFields().stream() - .map(Field::getName) - .collect(Collectors.joining(",")); - - try (final Connection connection = dataSource.getConnection(); - final PreparedStatement preparedStatement = - connection.prepareStatement( - "CALL SYSCS_UTIL.SYSCS_IMPORT_DATA (?,?,?,null,?,?,?,?,?)")) { - - preparedStatement.setString(1, schema); - preparedStatement.setString(2, table); - preparedStatement.setString(3, header); - preparedStatement.setString(4, tempFile.toString()); - preparedStatement.setString(5, ","); - preparedStatement.setString(6, "\""); - preparedStatement.setString(7, "UTF-8"); - preparedStatement.setInt( - 8, TableExistsOption.TABLE_EXISTS_OPTION_REPLACE.equals(ifExists) ? 1 : 0); - preparedStatement.execute(); - - final DoPutUpdateResult build = - DoPutUpdateResult.newBuilder().setRecordCount(-1).build(); - - try (final ArrowBuf buffer = rootAllocator.buffer(build.getSerializedSize())) { - buffer.writeBytes(build.toByteArray()); - ackStream.onNext(PutResult.metadata(buffer)); - ackStream.onCompleted(); - } - } catch (SQLException e) { - ackStream.onError( - CallStatus.INTERNAL - .withDescription("Failed to execute bulk ingest: " + e) - .toRuntimeException()); - } - } - } catch (IOException e) { - ackStream.onError( - CallStatus.INTERNAL - .withDescription("Failed to create temp file for bulk loading: " + e) - .toRuntimeException()); - } finally { - if (!isNull(tempFile)) { - try { - Files.delete(tempFile); - } catch (IOException e) { - // - } - } - } - }; - } - - @Override - public Runnable acceptPutPreparedStatementUpdate( - CommandPreparedStatementUpdate command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - final StatementContext statement = - preparedStatementLoadingCache.getIfPresent(command.getPreparedStatementHandle()); - - return () -> { - if (statement == null) { - ackStream.onError( - CallStatus.NOT_FOUND - .withDescription("Prepared statement does not exist") - .toRuntimeException()); - return; - } - try { - final PreparedStatement preparedStatement = statement.getStatement(); - - while (flightStream.next()) { - final VectorSchemaRoot root = flightStream.getRoot(); - - final int rowCount = root.getRowCount(); - final int recordCount; - - if (rowCount == 0) { - preparedStatement.execute(); - recordCount = preparedStatement.getUpdateCount(); - } else { - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(preparedStatement, root).bindAll().build(); - while (binder.next()) { - preparedStatement.addBatch(); - } - final int[] recordCounts = preparedStatement.executeBatch(); - recordCount = Arrays.stream(recordCounts).sum(); - } - - final DoPutUpdateResult build = - DoPutUpdateResult.newBuilder().setRecordCount(recordCount).build(); - - try (final ArrowBuf buffer = rootAllocator.buffer(build.getSerializedSize())) { - buffer.writeBytes(build.toByteArray()); - ackStream.onNext(PutResult.metadata(buffer)); - } - } - } catch (SQLException e) { - ackStream.onError( - CallStatus.INTERNAL - .withDescription("Failed to execute update: " + e) - .toRuntimeException()); - return; - } - ackStream.onCompleted(); - }; - } - - @Override - public Runnable acceptPutPreparedStatementQuery( - CommandPreparedStatementQuery command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - final StatementContext statementContext = - preparedStatementLoadingCache.getIfPresent(command.getPreparedStatementHandle()); - - return () -> { - assert statementContext != null; - PreparedStatement preparedStatement = statementContext.getStatement(); - - try { - while (flightStream.next()) { - final VectorSchemaRoot root = flightStream.getRoot(); - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(preparedStatement, root).bindAll().build(); - while (binder.next()) { - // Do not execute() - will be done in a getStream call - } - } - - } catch (SQLException e) { - ackStream.onError( - CallStatus.INTERNAL - .withDescription("Failed to bind parameters: " + e.getMessage()) - .withCause(e) - .toRuntimeException()); - return; - } - - ackStream.onCompleted(); - }; - } - - @Override - public FlightInfo getFlightInfoSqlInfo( - final CommandGetSqlInfo request, - final CallContext context, - final FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_SQL_INFO_SCHEMA); - } - - @Override - public void getStreamSqlInfo( - final CommandGetSqlInfo command, - final CallContext context, - final ServerStreamListener listener) { - this.sqlInfoBuilder.send(command.getInfoList(), listener); - } - - @Override - public FlightInfo getFlightInfoTypeInfo( - CommandGetXdbcTypeInfo request, CallContext context, FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_TYPE_INFO_SCHEMA); - } - - @Override - public void getStreamTypeInfo( - CommandGetXdbcTypeInfo request, CallContext context, ServerStreamListener listener) { - try (final Connection connection = dataSource.getConnection(); - final ResultSet typeInfo = connection.getMetaData().getTypeInfo(); - final VectorSchemaRoot vectorSchemaRoot = - getTypeInfoRoot(request, typeInfo, rootAllocator)) { - listener.start(vectorSchemaRoot); - listener.putNext(); - } catch (SQLException e) { - LOGGER.error(format("Failed to getStreamCatalogs: <%s>.", e.getMessage()), e); - listener.error(e); - } finally { - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfoCatalogs( - final CommandGetCatalogs request, - final CallContext context, - final FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_CATALOGS_SCHEMA); - } - - @Override - public void getStreamCatalogs(final CallContext context, final ServerStreamListener listener) { - try (final Connection connection = dataSource.getConnection(); - final ResultSet catalogs = connection.getMetaData().getCatalogs(); - final VectorSchemaRoot vectorSchemaRoot = getCatalogsRoot(catalogs, rootAllocator)) { - listener.start(vectorSchemaRoot); - listener.putNext(); - } catch (SQLException e) { - LOGGER.error(format("Failed to getStreamCatalogs: <%s>.", e.getMessage()), e); - listener.error(e); - } finally { - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfoSchemas( - final CommandGetDbSchemas request, - final CallContext context, - final FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_SCHEMAS_SCHEMA); - } - - @Override - public void getStreamSchemas( - final CommandGetDbSchemas command, - final CallContext context, - final ServerStreamListener listener) { - final String catalog = command.hasCatalog() ? command.getCatalog() : null; - final String schemaFilterPattern = - command.hasDbSchemaFilterPattern() ? command.getDbSchemaFilterPattern() : null; - try (final Connection connection = dataSource.getConnection(); - final ResultSet schemas = - connection.getMetaData().getSchemas(catalog, schemaFilterPattern); - final VectorSchemaRoot vectorSchemaRoot = getSchemasRoot(schemas, rootAllocator)) { - listener.start(vectorSchemaRoot); - listener.putNext(); - } catch (SQLException e) { - LOGGER.error(format("Failed to getStreamSchemas: <%s>.", e.getMessage()), e); - listener.error(e); - } finally { - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfoTables( - final CommandGetTables request, - final CallContext context, - final FlightDescriptor descriptor) { - Schema schemaToUse = Schemas.GET_TABLES_SCHEMA; - if (!request.getIncludeSchema()) { - schemaToUse = Schemas.GET_TABLES_SCHEMA_NO_SCHEMA; - } - return getFlightInfoForSchema(request, descriptor, schemaToUse); - } - - @Override - public void getStreamTables( - final CommandGetTables command, - final CallContext context, - final ServerStreamListener listener) { - final String catalog = command.hasCatalog() ? command.getCatalog() : null; - final String schemaFilterPattern = - command.hasDbSchemaFilterPattern() ? command.getDbSchemaFilterPattern() : null; - final String tableFilterPattern = - command.hasTableNameFilterPattern() ? command.getTableNameFilterPattern() : null; - - final ProtocolStringList protocolStringList = command.getTableTypesList(); - final int protocolSize = protocolStringList.size(); - final String[] tableTypes = - protocolSize == 0 ? null : protocolStringList.toArray(new String[protocolSize]); - - try (final Connection connection = DriverManager.getConnection(databaseUri); - final VectorSchemaRoot vectorSchemaRoot = - getTablesRoot( - connection.getMetaData(), - rootAllocator, - command.getIncludeSchema(), - catalog, - schemaFilterPattern, - tableFilterPattern, - tableTypes)) { - listener.start(vectorSchemaRoot); - listener.putNext(); - } catch (SQLException | IOException e) { - LOGGER.error(format("Failed to getStreamTables: <%s>.", e.getMessage()), e); - listener.error(e); - } finally { - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfoTableTypes( - final CommandGetTableTypes request, - final CallContext context, - final FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_TABLE_TYPES_SCHEMA); - } - - @Override - public void getStreamTableTypes(final CallContext context, final ServerStreamListener listener) { - try (final Connection connection = dataSource.getConnection(); - final ResultSet tableTypes = connection.getMetaData().getTableTypes(); - final VectorSchemaRoot vectorSchemaRoot = getTableTypesRoot(tableTypes, rootAllocator)) { - listener.start(vectorSchemaRoot); - listener.putNext(); - } catch (SQLException e) { - LOGGER.error(format("Failed to getStreamTableTypes: <%s>.", e.getMessage()), e); - listener.error(e); - } finally { - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfoPrimaryKeys( - final CommandGetPrimaryKeys request, - final CallContext context, - final FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_PRIMARY_KEYS_SCHEMA); - } - - @Override - public void getStreamPrimaryKeys( - final CommandGetPrimaryKeys command, - final CallContext context, - final ServerStreamListener listener) { - - final String catalog = command.hasCatalog() ? command.getCatalog() : null; - final String schema = command.hasDbSchema() ? command.getDbSchema() : null; - final String table = command.getTable(); - - try (Connection connection = DriverManager.getConnection(databaseUri)) { - final ResultSet primaryKeys = connection.getMetaData().getPrimaryKeys(catalog, schema, table); - - final VarCharVector catalogNameVector = new VarCharVector("catalog_name", rootAllocator); - final VarCharVector schemaNameVector = new VarCharVector("db_schema_name", rootAllocator); - final VarCharVector tableNameVector = new VarCharVector("table_name", rootAllocator); - final VarCharVector columnNameVector = new VarCharVector("column_name", rootAllocator); - final IntVector keySequenceVector = new IntVector("key_sequence", rootAllocator); - final VarCharVector keyNameVector = new VarCharVector("key_name", rootAllocator); - - final List vectors = - new ArrayList<>( - ImmutableList.of( - catalogNameVector, - schemaNameVector, - tableNameVector, - columnNameVector, - keySequenceVector, - keyNameVector)); - vectors.forEach(FieldVector::allocateNew); - - int rows = 0; - for (; primaryKeys.next(); rows++) { - saveToVector(primaryKeys.getString("TABLE_CAT"), catalogNameVector, rows); - saveToVector(primaryKeys.getString("TABLE_SCHEM"), schemaNameVector, rows); - saveToVector(primaryKeys.getString("TABLE_NAME"), tableNameVector, rows); - saveToVector(primaryKeys.getString("COLUMN_NAME"), columnNameVector, rows); - final int key_seq = primaryKeys.getInt("KEY_SEQ"); - saveToVector(primaryKeys.wasNull() ? null : key_seq, keySequenceVector, rows); - saveToVector(primaryKeys.getString("PK_NAME"), keyNameVector, rows); - } - - try (final VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(vectors)) { - vectorSchemaRoot.setRowCount(rows); - - listener.start(vectorSchemaRoot); - listener.putNext(); - } - } catch (SQLException e) { - listener.error(e); - } finally { - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfoExportedKeys( - final CommandGetExportedKeys request, - final CallContext context, - final FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_EXPORTED_KEYS_SCHEMA); - } - - @Override - public void getStreamExportedKeys( - final CommandGetExportedKeys command, - final CallContext context, - final ServerStreamListener listener) { - String catalog = command.hasCatalog() ? command.getCatalog() : null; - String schema = command.hasDbSchema() ? command.getDbSchema() : null; - String table = command.getTable(); - - try (Connection connection = DriverManager.getConnection(databaseUri); - ResultSet keys = connection.getMetaData().getExportedKeys(catalog, schema, table); - VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { - listener.start(vectorSchemaRoot); - listener.putNext(); - } catch (SQLException e) { - listener.error(e); - } finally { - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfoImportedKeys( - final CommandGetImportedKeys request, - final CallContext context, - final FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_IMPORTED_KEYS_SCHEMA); - } - - @Override - public void getStreamImportedKeys( - final CommandGetImportedKeys command, - final CallContext context, - final ServerStreamListener listener) { - String catalog = command.hasCatalog() ? command.getCatalog() : null; - String schema = command.hasDbSchema() ? command.getDbSchema() : null; - String table = command.getTable(); - - try (Connection connection = DriverManager.getConnection(databaseUri); - ResultSet keys = connection.getMetaData().getImportedKeys(catalog, schema, table); - VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { - listener.start(vectorSchemaRoot); - listener.putNext(); - } catch (final SQLException e) { - listener.error(e); - } finally { - listener.completed(); - } - } - - @Override - public FlightInfo getFlightInfoCrossReference( - CommandGetCrossReference request, CallContext context, FlightDescriptor descriptor) { - return getFlightInfoForSchema(request, descriptor, Schemas.GET_CROSS_REFERENCE_SCHEMA); - } - - @Override - public void getStreamCrossReference( - CommandGetCrossReference command, CallContext context, ServerStreamListener listener) { - final String pkCatalog = command.hasPkCatalog() ? command.getPkCatalog() : null; - final String pkSchema = command.hasPkDbSchema() ? command.getPkDbSchema() : null; - final String fkCatalog = command.hasFkCatalog() ? command.getFkCatalog() : null; - final String fkSchema = command.hasFkDbSchema() ? command.getFkDbSchema() : null; - final String pkTable = command.getPkTable(); - final String fkTable = command.getFkTable(); - - try (Connection connection = DriverManager.getConnection(databaseUri); - ResultSet keys = - connection - .getMetaData() - .getCrossReference(pkCatalog, pkSchema, pkTable, fkCatalog, fkSchema, fkTable); - VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { - listener.start(vectorSchemaRoot); - listener.putNext(); - } catch (final SQLException e) { - listener.error(e); - } finally { - listener.completed(); - } - } - - private VectorSchemaRoot createVectors(ResultSet keys) throws SQLException { - final VarCharVector pkCatalogNameVector = new VarCharVector("pk_catalog_name", rootAllocator); - final VarCharVector pkSchemaNameVector = new VarCharVector("pk_db_schema_name", rootAllocator); - final VarCharVector pkTableNameVector = new VarCharVector("pk_table_name", rootAllocator); - final VarCharVector pkColumnNameVector = new VarCharVector("pk_column_name", rootAllocator); - final VarCharVector fkCatalogNameVector = new VarCharVector("fk_catalog_name", rootAllocator); - final VarCharVector fkSchemaNameVector = new VarCharVector("fk_db_schema_name", rootAllocator); - final VarCharVector fkTableNameVector = new VarCharVector("fk_table_name", rootAllocator); - final VarCharVector fkColumnNameVector = new VarCharVector("fk_column_name", rootAllocator); - final IntVector keySequenceVector = new IntVector("key_sequence", rootAllocator); - final VarCharVector fkKeyNameVector = new VarCharVector("fk_key_name", rootAllocator); - final VarCharVector pkKeyNameVector = new VarCharVector("pk_key_name", rootAllocator); - final UInt1Vector updateRuleVector = new UInt1Vector("update_rule", rootAllocator); - final UInt1Vector deleteRuleVector = new UInt1Vector("delete_rule", rootAllocator); - - Map vectorToColumnName = new HashMap<>(); - vectorToColumnName.put(pkCatalogNameVector, "PKTABLE_CAT"); - vectorToColumnName.put(pkSchemaNameVector, "PKTABLE_SCHEM"); - vectorToColumnName.put(pkTableNameVector, "PKTABLE_NAME"); - vectorToColumnName.put(pkColumnNameVector, "PKCOLUMN_NAME"); - vectorToColumnName.put(fkCatalogNameVector, "FKTABLE_CAT"); - vectorToColumnName.put(fkSchemaNameVector, "FKTABLE_SCHEM"); - vectorToColumnName.put(fkTableNameVector, "FKTABLE_NAME"); - vectorToColumnName.put(fkColumnNameVector, "FKCOLUMN_NAME"); - vectorToColumnName.put(keySequenceVector, "KEY_SEQ"); - vectorToColumnName.put(updateRuleVector, "UPDATE_RULE"); - vectorToColumnName.put(deleteRuleVector, "DELETE_RULE"); - vectorToColumnName.put(fkKeyNameVector, "FK_NAME"); - vectorToColumnName.put(pkKeyNameVector, "PK_NAME"); - - final VectorSchemaRoot vectorSchemaRoot = - VectorSchemaRoot.of( - pkCatalogNameVector, - pkSchemaNameVector, - pkTableNameVector, - pkColumnNameVector, - fkCatalogNameVector, - fkSchemaNameVector, - fkTableNameVector, - fkColumnNameVector, - keySequenceVector, - fkKeyNameVector, - pkKeyNameVector, - updateRuleVector, - deleteRuleVector); - - vectorSchemaRoot.allocateNew(); - final int rowCount = saveToVectors(vectorToColumnName, keys, true); - - vectorSchemaRoot.setRowCount(rowCount); - - return vectorSchemaRoot; - } - - @Override - public void getStreamStatement( - final TicketStatementQuery ticketStatementQuery, - final CallContext context, - final ServerStreamListener listener) { - final ByteString handle = ticketStatementQuery.getStatementHandle(); - final StatementContext statementContext = - Objects.requireNonNull(statementLoadingCache.getIfPresent(handle)); - try (final ResultSet resultSet = statementContext.getStatement().getResultSet()) { - final Schema schema = jdbcToArrowSchema(resultSet.getMetaData(), DEFAULT_CALENDAR); - try (VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, rootAllocator)) { - final VectorLoader loader = new VectorLoader(vectorSchemaRoot); - listener.start(vectorSchemaRoot); - - final ArrowVectorIterator iterator = sqlToArrowVectorIterator(resultSet, rootAllocator); - while (iterator.hasNext()) { - final VectorUnloader unloader = new VectorUnloader(iterator.next()); - loader.load(unloader.getRecordBatch()); - listener.putNext(); - vectorSchemaRoot.clear(); - } - - listener.putNext(); - } - } catch (SQLException | IOException e) { - LOGGER.error(format("Failed to getStreamPreparedStatement: <%s>.", e.getMessage()), e); - listener.error(e); - } finally { - listener.completed(); - statementLoadingCache.invalidate(handle); - } - } - - protected FlightInfo getFlightInfoForSchema( - final T request, final FlightDescriptor descriptor, final Schema schema) { - final Ticket ticket = new Ticket(pack(request).toByteArray()); - // TODO Support multiple endpoints. - final List endpoints = singletonList(new FlightEndpoint(ticket, location)); - - return new FlightInfo(schema, descriptor, endpoints, -1, -1); - } - - private static class StatementRemovalListener - implements RemovalListener> { - @Override - public void onRemoval(final RemovalNotification> notification) { - try { - AutoCloseables.close(notification.getValue()); - } catch (final Exception e) { - // swallow - } - } - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java deleted file mode 100644 index b281d8e52685b..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.example; - -import static java.lang.String.format; -import static org.apache.arrow.adapter.jdbc.JdbcToArrow.sqlToArrowVectorIterator; -import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.jdbcToArrowSchema; -import static org.apache.arrow.flight.sql.impl.FlightSql.*; -import static org.slf4j.LoggerFactory.getLogger; - -import com.google.protobuf.ByteString; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.StreamCorruptedException; -import java.nio.ByteBuffer; -import java.nio.channels.Channels; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; -import org.apache.arrow.adapter.jdbc.JdbcParameterBinder; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.PutResult; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.ipc.SeekableReadChannel; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.slf4j.Logger; - -/** - * Example {@link FlightSqlProducer} implementation showing an Apache Derby backed Flight SQL server - * that generally supports all current features of Flight SQL. - */ -public class FlightSqlStatelessExample extends FlightSqlExample { - private static final Logger LOGGER = getLogger(FlightSqlStatelessExample.class); - public static final String DB_NAME = "derbyStatelessDB"; - - public FlightSqlStatelessExample(final Location location, final String dbName) { - super(location, dbName); - } - - @Override - public Runnable acceptPutPreparedStatementQuery( - CommandPreparedStatementQuery command, - CallContext context, - FlightStream flightStream, - StreamListener ackStream) { - - return () -> { - final String query = new String(command.getPreparedStatementHandle().toStringUtf8()); - try (Connection connection = dataSource.getConnection(); - PreparedStatement preparedStatement = createPreparedStatement(connection, query)) { - while (flightStream.next()) { - final VectorSchemaRoot root = flightStream.getRoot(); - final JdbcParameterBinder binder = - JdbcParameterBinder.builder(preparedStatement, root).bindAll().build(); - while (binder.next()) { - // Do not execute() - will be done in a getStream call - } - - final ByteArrayOutputStream parametersStream = new ByteArrayOutputStream(); - try (ArrowFileWriter writer = - new ArrowFileWriter(root, null, Channels.newChannel(parametersStream))) { - writer.start(); - writer.writeBatch(); - } - - if (parametersStream.size() > 0) { - final DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO = - new DoPutPreparedStatementResultPOJO(query, parametersStream.toByteArray()); - - final byte[] doPutPreparedStatementResultPOJOArr = - serializePOJO(doPutPreparedStatementResultPOJO); - final DoPutPreparedStatementResult doPutPreparedStatementResult = - DoPutPreparedStatementResult.newBuilder() - .setPreparedStatementHandle( - ByteString.copyFrom(ByteBuffer.wrap(doPutPreparedStatementResultPOJOArr))) - .build(); - - try (final ArrowBuf buffer = - rootAllocator.buffer(doPutPreparedStatementResult.getSerializedSize())) { - buffer.writeBytes(doPutPreparedStatementResult.toByteArray()); - ackStream.onNext(PutResult.metadata(buffer)); - } - } - } - - } catch (SQLException | IOException e) { - ackStream.onError( - CallStatus.INTERNAL - .withDescription("Failed to bind parameters: " + e.getMessage()) - .withCause(e) - .toRuntimeException()); - return; - } - - ackStream.onCompleted(); - }; - } - - @Override - public void getStreamPreparedStatement( - final CommandPreparedStatementQuery command, - final CallContext context, - final ServerStreamListener listener) { - final byte[] handle = command.getPreparedStatementHandle().toByteArray(); - try { - // Case where there are parameters - try { - final DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO = - deserializePOJO(handle); - final String query = doPutPreparedStatementResultPOJO.getQuery(); - - try (Connection connection = dataSource.getConnection(); - PreparedStatement statement = createPreparedStatement(connection, query); - ArrowFileReader reader = - new ArrowFileReader( - new SeekableReadChannel( - new ByteArrayReadableSeekableByteChannel( - doPutPreparedStatementResultPOJO.getParameters())), - rootAllocator)) { - - for (ArrowBlock arrowBlock : reader.getRecordBlocks()) { - reader.loadRecordBatch(arrowBlock); - VectorSchemaRoot vectorSchemaRootRecover = reader.getVectorSchemaRoot(); - JdbcParameterBinder binder = - JdbcParameterBinder.builder(statement, vectorSchemaRootRecover).bindAll().build(); - - while (binder.next()) { - executeQuery(statement, listener); - } - } - } - } catch (StreamCorruptedException e) { - // Case where there are no parameters - final String query = new String(command.getPreparedStatementHandle().toStringUtf8()); - try (Connection connection = dataSource.getConnection(); - PreparedStatement preparedStatement = createPreparedStatement(connection, query)) { - executeQuery(preparedStatement, listener); - } - } - } catch (final SQLException | IOException | ClassNotFoundException e) { - LOGGER.error(format("Failed to getStreamPreparedStatement: <%s>.", e.getMessage()), e); - listener.error( - CallStatus.INTERNAL - .withDescription("Failed to prepare statement: " + e) - .toRuntimeException()); - } finally { - listener.completed(); - } - } - - private void executeQuery(PreparedStatement statement, final ServerStreamListener listener) - throws IOException, SQLException { - try (final ResultSet resultSet = statement.executeQuery()) { - final Schema schema = jdbcToArrowSchema(resultSet.getMetaData(), DEFAULT_CALENDAR); - try (final VectorSchemaRoot vectorSchemaRoot = - VectorSchemaRoot.create(schema, rootAllocator)) { - final VectorLoader loader = new VectorLoader(vectorSchemaRoot); - listener.start(vectorSchemaRoot); - - final ArrowVectorIterator iterator = sqlToArrowVectorIterator(resultSet, rootAllocator); - while (iterator.hasNext()) { - final VectorSchemaRoot batch = iterator.next(); - if (batch.getRowCount() == 0) { - break; - } - final VectorUnloader unloader = new VectorUnloader(batch); - loader.load(unloader.getRecordBatch()); - listener.putNext(); - vectorSchemaRoot.clear(); - } - listener.putNext(); - } - } - } - - @Override - public FlightInfo getFlightInfoPreparedStatement( - final CommandPreparedStatementQuery command, - final CallContext context, - final FlightDescriptor descriptor) { - final byte[] handle = command.getPreparedStatementHandle().toByteArray(); - try { - String query; - try { - query = deserializePOJO(handle).getQuery(); - } catch (StreamCorruptedException e) { - query = new String(command.getPreparedStatementHandle().toStringUtf8()); - } - try (Connection connection = dataSource.getConnection(); - PreparedStatement statement = createPreparedStatement(connection, query)) { - ResultSetMetaData metaData = statement.getMetaData(); - return getFlightInfoForSchema( - command, descriptor, jdbcToArrowSchema(metaData, DEFAULT_CALENDAR)); - } - } catch (final SQLException | IOException | ClassNotFoundException e) { - LOGGER.error( - format("There was a problem executing the prepared statement: <%s>.", e.getMessage()), e); - throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); - } - } - - private DoPutPreparedStatementResultPOJO deserializePOJO(byte[] handle) - throws IOException, ClassNotFoundException { - try (ByteArrayInputStream bis = new ByteArrayInputStream(handle); - ObjectInputStream ois = new ObjectInputStream(bis)) { - return (DoPutPreparedStatementResultPOJO) ois.readObject(); - } - } - - private byte[] serializePOJO(DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO) - throws IOException { - try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); - ObjectOutputStream oos = new ObjectOutputStream(bos)) { - oos.writeObject(doPutPreparedStatementResultPOJO); - return bos.toByteArray(); - } - } - - private PreparedStatement createPreparedStatement(Connection connection, String query) - throws SQLException { - return connection.prepareStatement( - query, ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/StatementContext.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/StatementContext.java deleted file mode 100644 index d6e21a341e52d..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/StatementContext.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.example; - -import java.sql.Connection; -import java.sql.Statement; -import java.util.Objects; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.util.AutoCloseables; - -/** - * Context for {@link T} to be persisted in memory in between {@link FlightSqlProducer} calls. - * - * @param the {@link Statement} to be persisted. - */ -public final class StatementContext implements AutoCloseable { - - private final T statement; - private final String query; - - public StatementContext(final T statement, final String query) { - this.statement = Objects.requireNonNull(statement, "statement cannot be null."); - this.query = query; - } - - /** - * Gets the statement wrapped by this {@link StatementContext}. - * - * @return the inner statement. - */ - public T getStatement() { - return statement; - } - - /** - * Gets the optional SQL query wrapped by this {@link StatementContext}. - * - * @return the SQL query if present; empty otherwise. - */ - public String getQuery() { - return query; - } - - @Override - public void close() throws Exception { - Connection connection = statement.getConnection(); - AutoCloseables.close(statement, connection); - } - - @Override - public boolean equals(final Object other) { - if (this == other) { - return true; - } - if (!(other instanceof StatementContext)) { - return false; - } - final StatementContext that = (StatementContext) other; - return statement.equals(that.statement); - } - - @Override - public int hashCode() { - return Objects.hash(statement); - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java deleted file mode 100644 index 3f769363fb64d..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java +++ /dev/null @@ -1,1605 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.test; - -import static java.util.Arrays.asList; -import static java.util.Collections.emptyList; -import static java.util.Collections.singletonList; -import static org.apache.arrow.flight.sql.util.FlightStreamUtils.getResults; -import static org.apache.arrow.util.AutoCloseables.close; -import static org.hamcrest.CoreMatchers.containsString; -import static org.hamcrest.CoreMatchers.is; -import static org.hamcrest.CoreMatchers.notNullValue; -import static org.hamcrest.CoreMatchers.nullValue; -import static org.junit.jupiter.api.Assertions.assertAll; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import com.google.common.collect.ImmutableList; -import java.io.IOException; -import java.io.PipedInputStream; -import java.io.PipedOutputStream; -import java.nio.charset.StandardCharsets; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.IntStream; -import org.apache.arrow.flight.CancelFlightInfoRequest; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightRuntimeException; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStatusCode; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.RenewFlightEndpointRequest; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.FlightSqlClient.PreparedStatement; -import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.flight.sql.example.FlightSqlExample; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption; -import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption; -import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity; -import org.apache.arrow.flight.sql.util.TableRef; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.VectorBatchAppender; -import org.hamcrest.Matcher; -import org.hamcrest.MatcherAssert; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.function.Executable; - -/** Test direct usage of Flight SQL workflows. */ -public class TestFlightSql { - - protected static final Schema SCHEMA_INT_TABLE = - new Schema( - asList( - new Field("ID", new FieldType(false, MinorType.INT.getType(), null), null), - Field.nullable("KEYNAME", MinorType.VARCHAR.getType()), - Field.nullable("VALUE", MinorType.INT.getType()), - Field.nullable("FOREIGNID", MinorType.INT.getType()))); - private static final List> EXPECTED_RESULTS_FOR_STAR_SELECT_QUERY = - ImmutableList.of( - asList("1", "one", "1", "1"), - asList("2", "zero", "0", "1"), - asList("3", "negative one", "-1", "1")); - protected static final List> EXPECTED_RESULTS_FOR_PARAMETER_BINDING = - ImmutableList.of(asList("1", "one", "1", "1")); - private static final Map GET_SQL_INFO_EXPECTED_RESULTS_MAP = - new LinkedHashMap<>(); - protected static final String LOCALHOST = "localhost"; - protected static BufferAllocator allocator; - protected static FlightServer server; - protected static FlightSqlClient sqlClient; - - private static void populateNext10RowsInIngestRootBatch( - int startRowNumber, - IntVector valueVector, - VarCharVector keyNameVector, - IntVector foreignIdVector, - VarCharVector keyNamesToBeDeletedVector, - VectorSchemaRoot ingestRoot) { - - final int NumRowsInBatch = 10; - - valueVector.reset(); - keyNameVector.reset(); - foreignIdVector.reset(); - - final IntStream range = IntStream.range(1, NumRowsInBatch); - - range.forEach( - i -> { - valueVector.setSafe(i - 1, (i + startRowNumber - 1) * NumRowsInBatch); - keyNameVector.setSafe(i - 1, new Text("value" + (i + startRowNumber - 1))); - foreignIdVector.setSafe(i - 1, 1); - }); - // put some comma and double-quote containing string as well - valueVector.setSafe(NumRowsInBatch - 1, (NumRowsInBatch + startRowNumber - 1) * NumRowsInBatch); - keyNameVector.setSafe( - NumRowsInBatch - 1, - new Text( - String.format( - "value%d, is \"%d\"", - (NumRowsInBatch + startRowNumber - 1), - (NumRowsInBatch + startRowNumber - 1) * NumRowsInBatch))); - foreignIdVector.setSafe(NumRowsInBatch - 1, 1); - ingestRoot.setRowCount(NumRowsInBatch); - - VectorBatchAppender.batchAppend(keyNamesToBeDeletedVector, keyNameVector); - } - - @BeforeAll - public static void setUp() throws Exception { - setUpClientServer(); - setUpExpectedResultsMap(); - } - - private static void setUpClientServer() throws Exception { - allocator = new RootAllocator(Integer.MAX_VALUE); - - final Location serverLocation = Location.forGrpcInsecure(LOCALHOST, 0); - server = - FlightServer.builder( - allocator, - serverLocation, - new FlightSqlExample(serverLocation, FlightSqlExample.DB_NAME)) - .build() - .start(); - - final Location clientLocation = Location.forGrpcInsecure(LOCALHOST, server.getPort()); - sqlClient = new FlightSqlClient(FlightClient.builder(allocator, clientLocation).build()); - } - - protected static void setUpExpectedResultsMap() { - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE), "Apache Derby"); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE), - "10.15.2.0 - (1873585)"); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE), - "10.15.2.0 - (1873585)"); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false"); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE_VALUE), "true"); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_NULL_ORDERING_VALUE), - Integer.toString(FlightSql.SqlNullOrdering.SQL_NULLS_SORTED_AT_END_VALUE)); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_DDL_CATALOG_VALUE), "false"); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_DDL_SCHEMA_VALUE), "true"); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_DDL_TABLE_VALUE), "true"); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_CASE_VALUE), - Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UPPERCASE_VALUE)); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR_VALUE), "\""); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE_VALUE), - Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_CASE_INSENSITIVE_VALUE)); - GET_SQL_INFO_EXPECTED_RESULTS_MAP.put( - Integer.toString(FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE_VALUE), "42"); - } - - @AfterAll - public static void tearDown() throws Exception { - close(sqlClient, server, allocator); - FlightSqlExample.removeDerbyDatabaseIfExists(FlightSqlExample.DB_NAME); - } - - private static List> getNonConformingResultsForGetSqlInfo( - final List> results) { - return getNonConformingResultsForGetSqlInfo( - results, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY, - FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE, - FlightSql.SqlInfo.SQL_NULL_ORDERING, - FlightSql.SqlInfo.SQL_DDL_CATALOG, - FlightSql.SqlInfo.SQL_DDL_SCHEMA, - FlightSql.SqlInfo.SQL_DDL_TABLE, - FlightSql.SqlInfo.SQL_IDENTIFIER_CASE, - FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR, - FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE, - FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE); - } - - private static List> getNonConformingResultsForGetSqlInfo( - final List> results, final FlightSql.SqlInfo... args) { - final List> nonConformingResults = new ArrayList<>(); - if (results.size() == args.length) { - for (int index = 0; index < results.size(); index++) { - final List result = results.get(index); - final String providedName = result.get(0); - final String expectedName = Integer.toString(args[index].getNumber()); - System.err.println(expectedName); - if (!(GET_SQL_INFO_EXPECTED_RESULTS_MAP.get(providedName).equals(result.get(1)) - && providedName.equals(expectedName))) { - nonConformingResults.add(result); - break; - } - } - } - return nonConformingResults; - } - - @Test - public void testGetTablesSchema() { - final FlightInfo info = sqlClient.getTables(null, null, null, null, true); - MatcherAssert.assertThat( - info.getSchemaOptional(), is(Optional.of(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA))); - } - - @Test - public void testGetTablesSchemaExcludeSchema() { - final FlightInfo info = sqlClient.getTables(null, null, null, null, false); - MatcherAssert.assertThat( - info.getSchemaOptional(), - is(Optional.of(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA_NO_SCHEMA))); - } - - @Test - public void testGetTablesResultNoSchema() throws Exception { - try (final FlightStream stream = - sqlClient.getStream( - sqlClient.getTables(null, null, null, null, false).getEndpoints().get(0).getTicket())) { - assertAll( - () -> { - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA_NO_SCHEMA)); - }, - () -> { - final List> results = getResults(stream); - final List> expectedResults = - ImmutableList.of( - // catalog_name | schema_name | table_name | table_type | table_schema - asList(null /* TODO No catalog yet */, "SYS", "SYSALIASES", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSCHECKS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSCOLPERMS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSCOLUMNS", "SYSTEM TABLE"), - asList( - null /* TODO No catalog yet */, "SYS", "SYSCONGLOMERATES", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSCONSTRAINTS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSDEPENDS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSFILES", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSFOREIGNKEYS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSKEYS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSPERMS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSROLES", "SYSTEM TABLE"), - asList( - null /* TODO No catalog yet */, "SYS", "SYSROUTINEPERMS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSSCHEMAS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSSEQUENCES", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSSTATEMENTS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSSTATISTICS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSTABLEPERMS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSTABLES", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSTRIGGERS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSUSERS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYS", "SYSVIEWS", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "SYSIBM", "SYSDUMMY1", "SYSTEM TABLE"), - asList(null /* TODO No catalog yet */, "APP", "FOREIGNTABLE", "TABLE"), - asList(null /* TODO No catalog yet */, "APP", "INTTABLE", "TABLE")); - MatcherAssert.assertThat(results, is(expectedResults)); - }); - } - } - - @Test - public void testGetTablesResultFilteredNoSchema() throws Exception { - try (final FlightStream stream = - sqlClient.getStream( - sqlClient - .getTables(null, null, null, singletonList("TABLE"), false) - .getEndpoints() - .get(0) - .getTicket())) { - - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA_NO_SCHEMA)), - () -> { - final List> results = getResults(stream); - final List> expectedResults = - ImmutableList.of( - // catalog_name | schema_name | table_name | table_type | table_schema - asList(null /* TODO No catalog yet */, "APP", "FOREIGNTABLE", "TABLE"), - asList(null /* TODO No catalog yet */, "APP", "INTTABLE", "TABLE")); - MatcherAssert.assertThat(results, is(expectedResults)); - }); - } - } - - @Test - public void testGetTablesResultFilteredWithSchema() throws Exception { - try (final FlightStream stream = - sqlClient.getStream( - sqlClient - .getTables(null, null, null, singletonList("TABLE"), true) - .getEndpoints() - .get(0) - .getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA)), - () -> { - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA)); - final List> results = getResults(stream); - final List> expectedResults = - ImmutableList.of( - // catalog_name | schema_name | table_name | table_type | table_schema - asList( - null /* TODO No catalog yet */, - "APP", - "FOREIGNTABLE", - "TABLE", - new Schema( - asList( - new Field( - "ID", - new FieldType( - false, - MinorType.INT.getType(), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("") - .typeName("INTEGER") - .schemaName("APP") - .tableName("FOREIGNTABLE") - .precision(10) - .scale(0) - .isAutoIncrement(true) - .build() - .getMetadataMap()), - null), - new Field( - "FOREIGNNAME", - new FieldType( - true, - MinorType.VARCHAR.getType(), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("") - .typeName("VARCHAR") - .schemaName("APP") - .tableName("FOREIGNTABLE") - .precision(100) - .scale(0) - .isAutoIncrement(false) - .build() - .getMetadataMap()), - null), - new Field( - "VALUE", - new FieldType( - true, - MinorType.INT.getType(), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("") - .typeName("INTEGER") - .schemaName("APP") - .tableName("FOREIGNTABLE") - .precision(10) - .scale(0) - .isAutoIncrement(false) - .build() - .getMetadataMap()), - null))) - .toJson()), - asList( - null /* TODO No catalog yet */, - "APP", - "INTTABLE", - "TABLE", - new Schema( - asList( - new Field( - "ID", - new FieldType( - false, - MinorType.INT.getType(), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("") - .typeName("INTEGER") - .schemaName("APP") - .tableName("INTTABLE") - .precision(10) - .scale(0) - .isAutoIncrement(true) - .build() - .getMetadataMap()), - null), - new Field( - "KEYNAME", - new FieldType( - true, - MinorType.VARCHAR.getType(), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("") - .typeName("VARCHAR") - .schemaName("APP") - .tableName("INTTABLE") - .precision(100) - .scale(0) - .isAutoIncrement(false) - .build() - .getMetadataMap()), - null), - new Field( - "VALUE", - new FieldType( - true, - MinorType.INT.getType(), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("") - .typeName("INTEGER") - .schemaName("APP") - .tableName("INTTABLE") - .precision(10) - .scale(0) - .isAutoIncrement(false) - .build() - .getMetadataMap()), - null), - new Field( - "FOREIGNID", - new FieldType( - true, - MinorType.INT.getType(), - null, - new FlightSqlColumnMetadata.Builder() - .catalogName("") - .typeName("INTEGER") - .schemaName("APP") - .tableName("INTTABLE") - .precision(10) - .scale(0) - .isAutoIncrement(false) - .build() - .getMetadataMap()), - null))) - .toJson())); - MatcherAssert.assertThat(results, is(expectedResults)); - }); - } - } - - @Test - public void testSimplePreparedStatementSchema() throws Exception { - try (final PreparedStatement preparedStatement = sqlClient.prepare("SELECT * FROM intTable")) { - assertAll( - () -> { - final Schema actualSchema = preparedStatement.getResultSetSchema(); - MatcherAssert.assertThat(actualSchema, is(SCHEMA_INT_TABLE)); - }, - () -> { - final FlightInfo info = preparedStatement.execute(); - MatcherAssert.assertThat(info.getSchemaOptional(), is(Optional.of(SCHEMA_INT_TABLE))); - }); - } - } - - @Test - public void testSimplePreparedStatementResults() throws Exception { - try (final PreparedStatement preparedStatement = sqlClient.prepare("SELECT * FROM intTable"); - final FlightStream stream = - sqlClient.getStream(preparedStatement.execute().getEndpoints().get(0).getTicket())) { - assertAll( - () -> MatcherAssert.assertThat(stream.getSchema(), is(SCHEMA_INT_TABLE)), - () -> - MatcherAssert.assertThat( - getResults(stream), is(EXPECTED_RESULTS_FOR_STAR_SELECT_QUERY))); - } - } - - @Test - public void testSimplePreparedStatementResultsWithParameterBinding() throws Exception { - try (PreparedStatement prepare = sqlClient.prepare("SELECT * FROM intTable WHERE id = ?")) { - final Schema parameterSchema = prepare.getParameterSchema(); - try (final VectorSchemaRoot insertRoot = - VectorSchemaRoot.create(parameterSchema, allocator)) { - insertRoot.allocateNew(); - - final IntVector valueVector = (IntVector) insertRoot.getVector(0); - valueVector.setSafe(0, 1); - insertRoot.setRowCount(1); - - prepare.setParameters(insertRoot); - FlightInfo flightInfo = prepare.execute(); - - FlightStream stream = sqlClient.getStream(flightInfo.getEndpoints().get(0).getTicket()); - - assertAll( - () -> MatcherAssert.assertThat(stream.getSchema(), is(SCHEMA_INT_TABLE)), - () -> - MatcherAssert.assertThat( - getResults(stream), is(EXPECTED_RESULTS_FOR_PARAMETER_BINDING))); - } - } - } - - @Test - public void testSimplePreparedStatementUpdateResults() throws SQLException { - try (PreparedStatement prepare = - sqlClient.prepare("INSERT INTO INTTABLE (keyName, value ) VALUES (?, ?)"); - PreparedStatement deletePrepare = - sqlClient.prepare("DELETE FROM INTTABLE WHERE keyName = ?")) { - final Schema parameterSchema = prepare.getParameterSchema(); - try (final VectorSchemaRoot insertRoot = - VectorSchemaRoot.create(parameterSchema, allocator)) { - final VarCharVector varCharVector = (VarCharVector) insertRoot.getVector(0); - final IntVector valueVector = (IntVector) insertRoot.getVector(1); - final int counter = 10; - insertRoot.allocateNew(); - - final IntStream range = IntStream.range(0, counter); - - range.forEach( - i -> { - valueVector.setSafe(i, i * counter); - varCharVector.setSafe(i, new Text("value" + i)); - }); - - insertRoot.setRowCount(counter); - - prepare.setParameters(insertRoot); - final long updatedRows = prepare.executeUpdate(); - - final long deletedRows; - try (final VectorSchemaRoot deleteRoot = VectorSchemaRoot.of(varCharVector)) { - deletePrepare.setParameters(deleteRoot); - deletedRows = deletePrepare.executeUpdate(); - } - assertAll( - () -> MatcherAssert.assertThat(updatedRows, is(10L)), - () -> MatcherAssert.assertThat(deletedRows, is(10L))); - } - } - } - - @Test - public void testBulkIngest() throws IOException { - // For bulk ingest DerbyDB requires uppercase column names - var keyName = new Field("KEYNAME", FieldType.nullable(new ArrowType.Utf8()), null); - var value = new Field("VALUE", FieldType.nullable(new ArrowType.Int(32, true)), null); - var foreignId = new Field("FOREIGNID", FieldType.nullable(new ArrowType.Int(32, true)), null); - - Schema dataSchema = new Schema(List.of(keyName, value, foreignId)); - - try (final VectorSchemaRoot ingestRoot = VectorSchemaRoot.create(dataSchema, allocator); - final VarCharVector keyNamesToBeDeletedVector = new VarCharVector(keyName, allocator)) { - final VarCharVector keyNameVector = (VarCharVector) ingestRoot.getVector(0); - final IntVector valueVector = (IntVector) ingestRoot.getVector(1); - final IntVector foreignIdVector = (IntVector) ingestRoot.getVector(2); - ingestRoot.allocateNew(); - keyNamesToBeDeletedVector.allocateNew(); - - try (PipedInputStream inPipe = new PipedInputStream(1024); - PipedOutputStream outPipe = new PipedOutputStream(inPipe); - ArrowStreamReader reader = new ArrowStreamReader(inPipe, allocator)) { - - new Thread( - () -> { - try (ArrowStreamWriter writer = - new ArrowStreamWriter(ingestRoot, null, outPipe)) { - writer.start(); - populateNext10RowsInIngestRootBatch( - 1, - valueVector, - keyNameVector, - foreignIdVector, - keyNamesToBeDeletedVector, - ingestRoot); - writer.writeBatch(); - populateNext10RowsInIngestRootBatch( - 11, - valueVector, - keyNameVector, - foreignIdVector, - keyNamesToBeDeletedVector, - ingestRoot); - writer.writeBatch(); - } catch (Exception e) { - throw new RuntimeException(e); - } - }) - .start(); - - // Ingest from a stream - final long updatedRows = - sqlClient.executeIngest( - reader, - new FlightSqlClient.ExecuteIngestOptions( - "INTTABLE", - TableDefinitionOptions.newBuilder() - .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND) - .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL) - .build(), - null, - null, - null)); - - MatcherAssert.assertThat(updatedRows, is(-1L)); - - // Ingest directly using VectorSchemaRoot - populateNext10RowsInIngestRootBatch( - 21, valueVector, keyNameVector, foreignIdVector, keyNamesToBeDeletedVector, ingestRoot); - sqlClient.executeIngest( - ingestRoot, - new FlightSqlClient.ExecuteIngestOptions( - "INTTABLE", - TableDefinitionOptions.newBuilder() - .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND) - .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL) - .build(), - null, - null, - null)); - - try (PreparedStatement deletePrepare = - sqlClient.prepare("DELETE FROM INTTABLE WHERE keyName = ?")) { - final long deletedRows; - try (final VectorSchemaRoot deleteRoot = VectorSchemaRoot.of(keyNamesToBeDeletedVector)) { - deletePrepare.setParameters(deleteRoot); - deletedRows = deletePrepare.executeUpdate(); - } - - MatcherAssert.assertThat(deletedRows, is(30L)); - } - } - } - } - - @Test - public void testBulkIngestTransaction() { - assertThrows( - RuntimeException.class, - () -> { - sqlClient.executeIngest( - VectorSchemaRoot.create(new Schema(List.of()), allocator), - new FlightSqlClient.ExecuteIngestOptions( - "INTTABLE", - TableDefinitionOptions.newBuilder() - .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND) - .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL) - .build(), - null, - null, - null), - new FlightSqlClient.Transaction("123".getBytes(StandardCharsets.UTF_8))); - }); - } - - @Test - public void testSimplePreparedStatementUpdateResultsWithoutParameters() throws SQLException { - try (PreparedStatement prepare = - sqlClient.prepare("INSERT INTO INTTABLE (keyName, value ) VALUES ('test', 1000)"); - PreparedStatement deletePrepare = - sqlClient.prepare("DELETE FROM INTTABLE WHERE keyName = 'test'")) { - final long updatedRows = prepare.executeUpdate(); - - final long deletedRows = deletePrepare.executeUpdate(); - - assertAll( - () -> MatcherAssert.assertThat(updatedRows, is(1L)), - () -> MatcherAssert.assertThat(deletedRows, is(1L))); - } - } - - @Test - public void testSimplePreparedStatementClosesProperly() { - final PreparedStatement preparedStatement = sqlClient.prepare("SELECT * FROM intTable"); - assertAll( - () -> { - MatcherAssert.assertThat(preparedStatement.isClosed(), is(false)); - }, - () -> { - preparedStatement.close(); - MatcherAssert.assertThat(preparedStatement.isClosed(), is(true)); - }); - } - - @Test - public void testGetCatalogsSchema() { - final FlightInfo info = sqlClient.getCatalogs(); - MatcherAssert.assertThat( - info.getSchemaOptional(), is(Optional.of(FlightSqlProducer.Schemas.GET_CATALOGS_SCHEMA))); - } - - @Test - public void testGetCatalogsResults() throws Exception { - try (final FlightStream stream = - sqlClient.getStream(sqlClient.getCatalogs().getEndpoints().get(0).getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_CATALOGS_SCHEMA)), - () -> { - List> catalogs = getResults(stream); - MatcherAssert.assertThat(catalogs, is(emptyList())); - }); - } - } - - @Test - public void testGetTableTypesSchema() { - final FlightInfo info = sqlClient.getTableTypes(); - MatcherAssert.assertThat( - info.getSchemaOptional(), - is(Optional.of(FlightSqlProducer.Schemas.GET_TABLE_TYPES_SCHEMA))); - } - - @Test - public void testGetTableTypesResult() throws Exception { - try (final FlightStream stream = - sqlClient.getStream(sqlClient.getTableTypes().getEndpoints().get(0).getTicket())) { - assertAll( - () -> { - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_TABLE_TYPES_SCHEMA)); - }, - () -> { - final List> tableTypes = getResults(stream); - final List> expectedTableTypes = - ImmutableList.of( - // table_type - singletonList("SYNONYM"), - singletonList("SYSTEM TABLE"), - singletonList("TABLE"), - singletonList("VIEW")); - MatcherAssert.assertThat(tableTypes, is(expectedTableTypes)); - }); - } - } - - @Test - public void testGetSchemasSchema() { - final FlightInfo info = sqlClient.getSchemas(null, null); - MatcherAssert.assertThat( - info.getSchemaOptional(), is(Optional.of(FlightSqlProducer.Schemas.GET_SCHEMAS_SCHEMA))); - } - - @Test - public void testGetSchemasResult() throws Exception { - try (final FlightStream stream = - sqlClient.getStream(sqlClient.getSchemas(null, null).getEndpoints().get(0).getTicket())) { - assertAll( - () -> { - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_SCHEMAS_SCHEMA)); - }, - () -> { - final List> schemas = getResults(stream); - final List> expectedSchemas = - ImmutableList.of( - // catalog_name | schema_name - asList(null /* TODO Add catalog. */, "APP"), - asList(null /* TODO Add catalog. */, "NULLID"), - asList(null /* TODO Add catalog. */, "SQLJ"), - asList(null /* TODO Add catalog. */, "SYS"), - asList(null /* TODO Add catalog. */, "SYSCAT"), - asList(null /* TODO Add catalog. */, "SYSCS_DIAG"), - asList(null /* TODO Add catalog. */, "SYSCS_UTIL"), - asList(null /* TODO Add catalog. */, "SYSFUN"), - asList(null /* TODO Add catalog. */, "SYSIBM"), - asList(null /* TODO Add catalog. */, "SYSPROC"), - asList(null /* TODO Add catalog. */, "SYSSTAT")); - MatcherAssert.assertThat(schemas, is(expectedSchemas)); - }); - } - } - - @Test - public void testGetPrimaryKey() { - final FlightInfo flightInfo = sqlClient.getPrimaryKeys(TableRef.of(null, null, "INTTABLE")); - final FlightStream stream = sqlClient.getStream(flightInfo.getEndpoints().get(0).getTicket()); - - final List> results = getResults(stream); - - assertAll( - () -> MatcherAssert.assertThat(results.size(), is(1)), - () -> { - final List result = results.get(0); - assertAll( - () -> MatcherAssert.assertThat(result.get(0), is("")), - () -> MatcherAssert.assertThat(result.get(1), is("APP")), - () -> MatcherAssert.assertThat(result.get(2), is("INTTABLE")), - () -> MatcherAssert.assertThat(result.get(3), is("ID")), - () -> MatcherAssert.assertThat(result.get(4), is("1")), - () -> MatcherAssert.assertThat(result.get(5), notNullValue())); - }); - } - - @Test - public void testGetSqlInfoSchema() { - final FlightInfo info = sqlClient.getSqlInfo(); - MatcherAssert.assertThat( - info.getSchemaOptional(), is(Optional.of(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA))); - } - - @Test - public void testGetSqlInfoResults() throws Exception { - final FlightInfo info = sqlClient.getSqlInfo(); - try (final FlightStream stream = sqlClient.getStream(info.getEndpoints().get(0).getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA)), - () -> - MatcherAssert.assertThat( - getNonConformingResultsForGetSqlInfo(getResults(stream)), is(emptyList()))); - } - } - - @Test - public void testGetSqlInfoResultsWithSingleArg() throws Exception { - final FlightSql.SqlInfo arg = FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME; - final FlightInfo info = sqlClient.getSqlInfo(arg); - try (final FlightStream stream = sqlClient.getStream(info.getEndpoints().get(0).getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA)), - () -> - MatcherAssert.assertThat( - getNonConformingResultsForGetSqlInfo(getResults(stream), arg), is(emptyList()))); - } - } - - @Test - public void testGetSqlInfoResultsWithManyArgs() throws Exception { - final FlightSql.SqlInfo[] args = { - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION, - FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY, - FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE, - FlightSql.SqlInfo.SQL_NULL_ORDERING, - FlightSql.SqlInfo.SQL_DDL_CATALOG, - FlightSql.SqlInfo.SQL_DDL_SCHEMA, - FlightSql.SqlInfo.SQL_DDL_TABLE, - FlightSql.SqlInfo.SQL_IDENTIFIER_CASE, - FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR, - FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE, - FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE - }; - final FlightInfo info = sqlClient.getSqlInfo(args); - try (final FlightStream stream = sqlClient.getStream(info.getEndpoints().get(0).getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA)), - () -> - MatcherAssert.assertThat( - getNonConformingResultsForGetSqlInfo(getResults(stream), args), is(emptyList()))); - } - } - - @Test - public void testGetCommandExportedKeys() throws Exception { - try (final FlightStream stream = - sqlClient.getStream( - sqlClient - .getExportedKeys(TableRef.of(null, null, "FOREIGNTABLE")) - .getEndpoints() - .get(0) - .getTicket())) { - - final List> results = getResults(stream); - - final List> matchers = - asList( - nullValue(String.class), // pk_catalog_name - is("APP"), // pk_schema_name - is("FOREIGNTABLE"), // pk_table_name - is("ID"), // pk_column_name - nullValue(String.class), // fk_catalog_name - is("APP"), // fk_schema_name - is("INTTABLE"), // fk_table_name - is("FOREIGNID"), // fk_column_name - is("1"), // key_sequence - containsString("SQL"), // fk_key_name - containsString("SQL"), // pk_key_name - is("3"), // update_rule - is("3")); // delete_rule - - final List assertions = new ArrayList<>(); - assertEquals(1, results.size()); - for (int i = 0; i < matchers.size(); i++) { - final String actual = results.get(0).get(i); - final Matcher expected = matchers.get(i); - assertions.add(() -> MatcherAssert.assertThat(actual, expected)); - } - assertAll(assertions); - } - } - - @Test - public void testGetCommandImportedKeys() throws Exception { - try (final FlightStream stream = - sqlClient.getStream( - sqlClient - .getImportedKeys(TableRef.of(null, null, "INTTABLE")) - .getEndpoints() - .get(0) - .getTicket())) { - - final List> results = getResults(stream); - - final List> matchers = - asList( - nullValue(String.class), // pk_catalog_name - is("APP"), // pk_schema_name - is("FOREIGNTABLE"), // pk_table_name - is("ID"), // pk_column_name - nullValue(String.class), // fk_catalog_name - is("APP"), // fk_schema_name - is("INTTABLE"), // fk_table_name - is("FOREIGNID"), // fk_column_name - is("1"), // key_sequence - containsString("SQL"), // fk_key_name - containsString("SQL"), // pk_key_name - is("3"), // update_rule - is("3")); // delete_rule - - assertEquals(1, results.size()); - final List assertions = new ArrayList<>(); - for (int i = 0; i < matchers.size(); i++) { - final String actual = results.get(0).get(i); - final Matcher expected = matchers.get(i); - assertions.add(() -> MatcherAssert.assertThat(actual, expected)); - } - assertAll(assertions); - } - } - - @Test - public void testGetTypeInfo() throws Exception { - FlightInfo flightInfo = sqlClient.getXdbcTypeInfo(); - - try (FlightStream stream = sqlClient.getStream(flightInfo.getEndpoints().get(0).getTicket())) { - - final List> results = getResults(stream); - - final List> matchers = - ImmutableList.of( - asList( - "BIGINT", - "-5", - "19", - null, - null, - emptyList().toString(), - "1", - "false", - "2", - "false", - "false", - "true", - "BIGINT", - "0", - "0", - null, - null, - "10", - null), - asList( - "LONG VARCHAR FOR BIT DATA", - "-4", - "32700", - "X'", - "'", - emptyList().toString(), - "1", - "false", - "0", - "true", - "false", - "false", - "LONG VARCHAR FOR BIT DATA", - null, - null, - null, - null, - null, - null), - asList( - "VARCHAR () FOR BIT DATA", - "-3", - "32672", - "X'", - "'", - singletonList("length").toString(), - "1", - "false", - "2", - "true", - "false", - "false", - "VARCHAR () FOR BIT DATA", - null, - null, - null, - null, - null, - null), - asList( - "CHAR () FOR BIT DATA", - "-2", - "254", - "X'", - "'", - singletonList("length").toString(), - "1", - "false", - "2", - "true", - "false", - "false", - "CHAR () FOR BIT DATA", - null, - null, - null, - null, - null, - null), - asList( - "LONG VARCHAR", - "-1", - "32700", - "'", - "'", - emptyList().toString(), - "1", - "true", - "1", - "true", - "false", - "false", - "LONG VARCHAR", - null, - null, - null, - null, - null, - null), - asList( - "CHAR", - "1", - "254", - "'", - "'", - singletonList("length").toString(), - "1", - "true", - "3", - "true", - "false", - "false", - "CHAR", - null, - null, - null, - null, - null, - null), - asList( - "NUMERIC", - "2", - "31", - null, - null, - Arrays.asList("precision", "scale").toString(), - "1", - "false", - "2", - "false", - "true", - "false", - "NUMERIC", - "0", - "31", - null, - null, - "10", - null), - asList( - "DECIMAL", - "3", - "31", - null, - null, - Arrays.asList("precision", "scale").toString(), - "1", - "false", - "2", - "false", - "true", - "false", - "DECIMAL", - "0", - "31", - null, - null, - "10", - null), - asList( - "INTEGER", - "4", - "10", - null, - null, - emptyList().toString(), - "1", - "false", - "2", - "false", - "false", - "true", - "INTEGER", - "0", - "0", - null, - null, - "10", - null), - asList( - "SMALLINT", - "5", - "5", - null, - null, - emptyList().toString(), - "1", - "false", - "2", - "false", - "false", - "true", - "SMALLINT", - "0", - "0", - null, - null, - "10", - null), - asList( - "FLOAT", - "6", - "52", - null, - null, - singletonList("precision").toString(), - "1", - "false", - "2", - "false", - "false", - "false", - "FLOAT", - null, - null, - null, - null, - "2", - null), - asList( - "REAL", - "7", - "23", - null, - null, - emptyList().toString(), - "1", - "false", - "2", - "false", - "false", - "false", - "REAL", - null, - null, - null, - null, - "2", - null), - asList( - "DOUBLE", - "8", - "52", - null, - null, - emptyList().toString(), - "1", - "false", - "2", - "false", - "false", - "false", - "DOUBLE", - null, - null, - null, - null, - "2", - null), - asList( - "VARCHAR", - "12", - "32672", - "'", - "'", - singletonList("length").toString(), - "1", - "true", - "3", - "true", - "false", - "false", - "VARCHAR", - null, - null, - null, - null, - null, - null), - asList( - "BOOLEAN", - "16", - "1", - null, - null, - emptyList().toString(), - "1", - "false", - "2", - "true", - "false", - "false", - "BOOLEAN", - null, - null, - null, - null, - null, - null), - asList( - "DATE", - "91", - "10", - "DATE'", - "'", - emptyList().toString(), - "1", - "false", - "2", - "true", - "false", - "false", - "DATE", - "0", - "0", - null, - null, - "10", - null), - asList( - "TIME", - "92", - "8", - "TIME'", - "'", - emptyList().toString(), - "1", - "false", - "2", - "true", - "false", - "false", - "TIME", - "0", - "0", - null, - null, - "10", - null), - asList( - "TIMESTAMP", - "93", - "29", - "TIMESTAMP'", - "'", - emptyList().toString(), - "1", - "false", - "2", - "true", - "false", - "false", - "TIMESTAMP", - "0", - "9", - null, - null, - "10", - null), - asList( - "OBJECT", - "2000", - null, - null, - null, - emptyList().toString(), - "1", - "false", - "2", - "true", - "false", - "false", - "OBJECT", - null, - null, - null, - null, - null, - null), - asList( - "BLOB", - "2004", - "2147483647", - null, - null, - singletonList("length").toString(), - "1", - "false", - "0", - null, - "false", - null, - "BLOB", - null, - null, - null, - null, - null, - null), - asList( - "CLOB", - "2005", - "2147483647", - "'", - "'", - singletonList("length").toString(), - "1", - "true", - "1", - null, - "false", - null, - "CLOB", - null, - null, - null, - null, - null, - null), - asList( - "XML", - "2009", - null, - null, - null, - emptyList().toString(), - "1", - "true", - "0", - "false", - "false", - "false", - "XML", - null, - null, - null, - null, - null, - null)); - MatcherAssert.assertThat(results, is(matchers)); - } - } - - @Test - public void testGetTypeInfoWithFiltering() throws Exception { - FlightInfo flightInfo = sqlClient.getXdbcTypeInfo(-5); - - try (FlightStream stream = sqlClient.getStream(flightInfo.getEndpoints().get(0).getTicket())) { - - final List> results = getResults(stream); - - final List> matchers = - ImmutableList.of( - asList( - "BIGINT", - "-5", - "19", - null, - null, - emptyList().toString(), - "1", - "false", - "2", - "false", - "false", - "true", - "BIGINT", - "0", - "0", - null, - null, - "10", - null)); - MatcherAssert.assertThat(results, is(matchers)); - } - } - - @Test - public void testGetCommandCrossReference() throws Exception { - final FlightInfo flightInfo = - sqlClient.getCrossReference( - TableRef.of(null, null, "FOREIGNTABLE"), TableRef.of(null, null, "INTTABLE")); - try (final FlightStream stream = - sqlClient.getStream(flightInfo.getEndpoints().get(0).getTicket())) { - - final List> results = getResults(stream); - - final List> matchers = - asList( - nullValue(String.class), // pk_catalog_name - is("APP"), // pk_schema_name - is("FOREIGNTABLE"), // pk_table_name - is("ID"), // pk_column_name - nullValue(String.class), // fk_catalog_name - is("APP"), // fk_schema_name - is("INTTABLE"), // fk_table_name - is("FOREIGNID"), // fk_column_name - is("1"), // key_sequence - containsString("SQL"), // fk_key_name - containsString("SQL"), // pk_key_name - is("3"), // update_rule - is("3")); // delete_rule - - assertEquals(1, results.size()); - final List assertions = new ArrayList<>(); - for (int i = 0; i < matchers.size(); i++) { - final String actual = results.get(0).get(i); - final Matcher expected = matchers.get(i); - assertions.add(() -> MatcherAssert.assertThat(actual, expected)); - } - assertAll(assertions); - } - } - - @Test - public void testCreateStatementSchema() throws Exception { - final FlightInfo info = sqlClient.execute("SELECT * FROM intTable"); - MatcherAssert.assertThat(info.getSchemaOptional(), is(Optional.of(SCHEMA_INT_TABLE))); - - // Consume statement to close connection before cache eviction - try (FlightStream stream = sqlClient.getStream(info.getEndpoints().get(0).getTicket())) { - while (stream.next()) { - // Do nothing - } - } - } - - @Test - public void testCreateStatementResults() throws Exception { - try (final FlightStream stream = - sqlClient.getStream( - sqlClient.execute("SELECT * FROM intTable").getEndpoints().get(0).getTicket())) { - assertAll( - () -> { - MatcherAssert.assertThat(stream.getSchema(), is(SCHEMA_INT_TABLE)); - }, - () -> { - MatcherAssert.assertThat( - getResults(stream), is(EXPECTED_RESULTS_FOR_STAR_SELECT_QUERY)); - }); - } - } - - @Test - public void testExecuteUpdate() { - assertAll( - () -> { - long insertedCount = - sqlClient.executeUpdate( - "INSERT INTO INTTABLE (keyName, value) VALUES " - + "('KEYNAME1', 1001), ('KEYNAME2', 1002), ('KEYNAME3', 1003)"); - MatcherAssert.assertThat(insertedCount, is(3L)); - }, - () -> { - long updatedCount = - sqlClient.executeUpdate( - "UPDATE INTTABLE SET keyName = 'KEYNAME1' " - + "WHERE keyName = 'KEYNAME2' OR keyName = 'KEYNAME3'"); - MatcherAssert.assertThat(updatedCount, is(2L)); - }, - () -> { - long deletedCount = - sqlClient.executeUpdate("DELETE FROM INTTABLE WHERE keyName = 'KEYNAME1'"); - MatcherAssert.assertThat(deletedCount, is(3L)); - }); - } - - @Test - public void testQueryWithNoResultsShouldNotHang() throws Exception { - try (final PreparedStatement preparedStatement = - sqlClient.prepare("SELECT * FROM intTable WHERE 1 = 0"); - final FlightStream stream = - sqlClient.getStream(preparedStatement.execute().getEndpoints().get(0).getTicket())) { - assertAll( - () -> MatcherAssert.assertThat(stream.getSchema(), is(SCHEMA_INT_TABLE)), - () -> { - final List> result = getResults(stream); - MatcherAssert.assertThat(result, is(emptyList())); - }); - } - } - - @Test - public void testCancelFlightInfo() { - FlightInfo info = sqlClient.getSqlInfo(); - CancelFlightInfoRequest request = new CancelFlightInfoRequest(info); - FlightRuntimeException fre = - assertThrows(FlightRuntimeException.class, () -> sqlClient.cancelFlightInfo(request)); - assertEquals(FlightStatusCode.UNIMPLEMENTED, fre.status().code()); - } - - @Test - public void testCancelQuery() { - FlightInfo info = sqlClient.getSqlInfo(); - FlightRuntimeException fre = - assertThrows(FlightRuntimeException.class, () -> sqlClient.cancelQuery(info)); - assertEquals(FlightStatusCode.UNIMPLEMENTED, fre.status().code()); - } - - @Test - public void testRenewEndpoint() { - FlightInfo info = sqlClient.getSqlInfo(); - FlightRuntimeException fre = - assertThrows( - FlightRuntimeException.class, - () -> - sqlClient.renewFlightEndpoint( - new RenewFlightEndpointRequest(info.getEndpoints().get(0)))); - assertEquals(FlightStatusCode.UNIMPLEMENTED, fre.status().code()); - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java deleted file mode 100644 index 36d621ad64c6a..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.test; - -import static org.apache.arrow.flight.sql.util.FlightStreamUtils.getResults; -import static org.apache.arrow.util.AutoCloseables.close; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.jupiter.api.Assertions.assertAll; - -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.FlightSqlClient.PreparedStatement; -import org.apache.arrow.flight.sql.example.FlightSqlStatelessExample; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.hamcrest.MatcherAssert; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -/** Test direct usage of Flight SQL workflows. */ -public class TestFlightSqlStateless extends TestFlightSql { - - @BeforeAll - public static void setUp() throws Exception { - setUpClientServer(); - setUpExpectedResultsMap(); - } - - @AfterAll - public static void tearDown() throws Exception { - close(sqlClient, server, allocator); - FlightSqlStatelessExample.removeDerbyDatabaseIfExists(FlightSqlStatelessExample.DB_NAME); - } - - private static void setUpClientServer() throws Exception { - allocator = new RootAllocator(Integer.MAX_VALUE); - - final Location serverLocation = Location.forGrpcInsecure(LOCALHOST, 0); - server = - FlightServer.builder( - allocator, - serverLocation, - new FlightSqlStatelessExample(serverLocation, FlightSqlStatelessExample.DB_NAME)) - .build() - .start(); - - final Location clientLocation = Location.forGrpcInsecure(LOCALHOST, server.getPort()); - sqlClient = new FlightSqlClient(FlightClient.builder(allocator, clientLocation).build()); - } - - @Override - @Test - public void testSimplePreparedStatementResultsWithParameterBinding() throws Exception { - try (PreparedStatement prepare = sqlClient.prepare("SELECT * FROM intTable WHERE id = ?")) { - final Schema parameterSchema = prepare.getParameterSchema(); - try (final VectorSchemaRoot insertRoot = - VectorSchemaRoot.create(parameterSchema, allocator)) { - insertRoot.allocateNew(); - - final IntVector valueVector = (IntVector) insertRoot.getVector(0); - valueVector.setSafe(0, 1); - insertRoot.setRowCount(1); - - prepare.setParameters(insertRoot); - final FlightInfo flightInfo = prepare.execute(); - - for (FlightEndpoint endpoint : flightInfo.getEndpoints()) { - try (FlightStream stream = sqlClient.getStream(endpoint.getTicket())) { - assertAll( - () -> MatcherAssert.assertThat(stream.getSchema(), is(SCHEMA_INT_TABLE)), - () -> - MatcherAssert.assertThat( - getResults(stream), is(EXPECTED_RESULTS_FOR_PARAMETER_BINDING))); - } - } - } - } - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStreams.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStreams.java deleted file mode 100644 index 71c0dc88e4ec2..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStreams.java +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.test; - -import static java.util.Arrays.asList; -import static java.util.Collections.emptyList; -import static java.util.Collections.singletonList; -import static org.apache.arrow.flight.sql.util.FlightStreamUtils.getResults; -import static org.apache.arrow.util.AutoCloseables.close; -import static org.apache.arrow.vector.types.Types.MinorType.INT; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.jupiter.api.Assertions.assertAll; - -import com.google.common.collect.ImmutableList; -import com.google.protobuf.Any; -import com.google.protobuf.Message; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flight.CallStatus; -import org.apache.arrow.flight.FlightClient; -import org.apache.arrow.flight.FlightDescriptor; -import org.apache.arrow.flight.FlightEndpoint; -import org.apache.arrow.flight.FlightInfo; -import org.apache.arrow.flight.FlightServer; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.flight.Location; -import org.apache.arrow.flight.Ticket; -import org.apache.arrow.flight.sql.BasicFlightSqlProducer; -import org.apache.arrow.flight.sql.FlightSqlClient; -import org.apache.arrow.flight.sql.FlightSqlProducer; -import org.apache.arrow.flight.sql.impl.FlightSql; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.hamcrest.MatcherAssert; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -public class TestFlightSqlStreams { - - /** - * A limited {@link FlightSqlProducer} for testing GetTables, GetTableTypes, GetSqlInfo, and - * limited SQL commands. - */ - private static class FlightSqlTestProducer extends BasicFlightSqlProducer { - - // Note that for simplicity the getStream* implementations are blocking, but a proper - // FlightSqlProducer should - // have non-blocking implementations of getStream*. - - private static final String FIXED_QUERY = "SELECT 1 AS c1 FROM test_table"; - private static final Schema FIXED_SCHEMA = - new Schema(asList(Field.nullable("c1", Types.MinorType.INT.getType()))); - - private BufferAllocator allocator; - - FlightSqlTestProducer(BufferAllocator allocator) { - this.allocator = allocator; - } - - @Override - protected List determineEndpoints( - T request, FlightDescriptor flightDescriptor, Schema schema) { - if (request instanceof FlightSql.CommandGetTables - || request instanceof FlightSql.CommandGetTableTypes - || request instanceof FlightSql.CommandGetXdbcTypeInfo - || request instanceof FlightSql.CommandGetSqlInfo) { - return Collections.singletonList( - new FlightEndpoint(new Ticket(Any.pack(request).toByteArray()))); - } else if (request instanceof FlightSql.CommandStatementQuery - && ((FlightSql.CommandStatementQuery) request).getQuery().equals(FIXED_QUERY)) { - - // Tickets from CommandStatementQuery requests should be built using TicketStatementQuery - // then packed() into - // a ticket. The content of the statement handle is specific to the FlightSqlProducer. It - // does not need to - // be the query. It can be a query ID for example. - FlightSql.TicketStatementQuery ticketStatementQuery = - FlightSql.TicketStatementQuery.newBuilder() - .setStatementHandle(((FlightSql.CommandStatementQuery) request).getQueryBytes()) - .build(); - return Collections.singletonList( - new FlightEndpoint(new Ticket(Any.pack(ticketStatementQuery).toByteArray()))); - } - throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException(); - } - - @Override - public FlightInfo getFlightInfoStatement( - FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) { - return generateFlightInfo(command, descriptor, FIXED_SCHEMA); - } - - @Override - public void getStreamStatement( - FlightSql.TicketStatementQuery ticket, CallContext context, ServerStreamListener listener) { - final String query = ticket.getStatementHandle().toStringUtf8(); - if (!query.equals(FIXED_QUERY)) { - listener.error( - CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException()); - } - - try (VectorSchemaRoot root = VectorSchemaRoot.create(FIXED_SCHEMA, allocator)) { - root.setRowCount(1); - ((IntVector) root.getVector("c1")).setSafe(0, 1); - listener.start(root); - listener.putNext(); - listener.completed(); - } - } - - @Override - public void getStreamSqlInfo( - FlightSql.CommandGetSqlInfo command, CallContext context, ServerStreamListener listener) { - try (VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_SQL_INFO_SCHEMA, allocator)) { - root.setRowCount(0); - listener.start(root); - listener.putNext(); - listener.completed(); - } - } - - @Override - public void getStreamTypeInfo( - FlightSql.CommandGetXdbcTypeInfo request, - CallContext context, - ServerStreamListener listener) { - try (VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_TYPE_INFO_SCHEMA, allocator)) { - root.setRowCount(1); - ((VarCharVector) root.getVector("type_name")).setSafe(0, new Text("Integer")); - ((IntVector) root.getVector("data_type")).setSafe(0, INT.ordinal()); - ((IntVector) root.getVector("column_size")).setSafe(0, 400); - root.getVector("literal_prefix").setNull(0); - root.getVector("literal_suffix").setNull(0); - root.getVector("create_params").setNull(0); - ((IntVector) root.getVector("nullable")) - .setSafe(0, FlightSql.Nullable.NULLABILITY_NULLABLE.getNumber()); - ((BitVector) root.getVector("case_sensitive")).setSafe(0, 1); - ((IntVector) root.getVector("nullable")) - .setSafe(0, FlightSql.Searchable.SEARCHABLE_FULL.getNumber()); - ((BitVector) root.getVector("unsigned_attribute")).setSafe(0, 1); - root.getVector("fixed_prec_scale").setNull(0); - ((BitVector) root.getVector("auto_increment")).setSafe(0, 1); - ((VarCharVector) root.getVector("local_type_name")).setSafe(0, new Text("Integer")); - root.getVector("minimum_scale").setNull(0); - root.getVector("maximum_scale").setNull(0); - ((IntVector) root.getVector("sql_data_type")).setSafe(0, INT.ordinal()); - root.getVector("datetime_subcode").setNull(0); - ((IntVector) root.getVector("num_prec_radix")).setSafe(0, 10); - root.getVector("interval_precision").setNull(0); - - listener.start(root); - listener.putNext(); - listener.completed(); - } - } - - @Override - public void getStreamTables( - FlightSql.CommandGetTables command, CallContext context, ServerStreamListener listener) { - try (VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_TABLES_SCHEMA_NO_SCHEMA, allocator)) { - root.setRowCount(1); - root.getVector("catalog_name").setNull(0); - root.getVector("db_schema_name").setNull(0); - ((VarCharVector) root.getVector("table_name")).setSafe(0, new Text("test_table")); - ((VarCharVector) root.getVector("table_type")).setSafe(0, new Text("TABLE")); - - listener.start(root); - listener.putNext(); - listener.completed(); - } - } - - @Override - public void getStreamTableTypes(CallContext context, ServerStreamListener listener) { - try (VectorSchemaRoot root = - VectorSchemaRoot.create(Schemas.GET_TABLE_TYPES_SCHEMA, allocator)) { - root.setRowCount(1); - ((VarCharVector) root.getVector("table_type")).setSafe(0, new Text("TABLE")); - - listener.start(root); - listener.putNext(); - listener.completed(); - } - } - } - - private static BufferAllocator allocator; - - private static FlightServer server; - private static FlightSqlClient sqlClient; - - @BeforeAll - public static void setUp() throws Exception { - allocator = new RootAllocator(Integer.MAX_VALUE); - - final Location serverLocation = Location.forGrpcInsecure("localhost", 0); - server = - FlightServer.builder(allocator, serverLocation, new FlightSqlTestProducer(allocator)) - .build() - .start(); - - final Location clientLocation = Location.forGrpcInsecure("localhost", server.getPort()); - sqlClient = new FlightSqlClient(FlightClient.builder(allocator, clientLocation).build()); - } - - @AfterAll - public static void tearDown() throws Exception { - close(sqlClient, server); - - // Manually close all child allocators. - allocator.getChildAllocators().forEach(BufferAllocator::close); - close(allocator); - } - - @Test - public void testGetTablesResultNoSchema() throws Exception { - try (final FlightStream stream = - sqlClient.getStream( - sqlClient.getTables(null, null, null, null, false).getEndpoints().get(0).getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_TABLES_SCHEMA_NO_SCHEMA)), - () -> { - final List> results = getResults(stream); - final List> expectedResults = - ImmutableList.of( - // catalog_name | schema_name | table_name | table_type | table_schema - asList(null, null, "test_table", "TABLE")); - MatcherAssert.assertThat(results, is(expectedResults)); - }); - } - } - - @Test - public void testGetTableTypesResult() throws Exception { - try (final FlightStream stream = - sqlClient.getStream(sqlClient.getTableTypes().getEndpoints().get(0).getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_TABLE_TYPES_SCHEMA)), - () -> { - final List> tableTypes = getResults(stream); - final List> expectedTableTypes = - ImmutableList.of( - // table_type - singletonList("TABLE")); - MatcherAssert.assertThat(tableTypes, is(expectedTableTypes)); - }); - } - } - - @Test - public void testGetSqlInfoResults() throws Exception { - final FlightInfo info = sqlClient.getSqlInfo(); - try (final FlightStream stream = sqlClient.getStream(info.getEndpoints().get(0).getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat( - stream.getSchema(), is(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA)), - () -> MatcherAssert.assertThat(getResults(stream), is(emptyList()))); - } - } - - @Test - public void testGetTypeInfo() throws Exception { - FlightInfo flightInfo = sqlClient.getXdbcTypeInfo(); - - try (FlightStream stream = sqlClient.getStream(flightInfo.getEndpoints().get(0).getTicket())) { - - final List> results = getResults(stream); - - final List> matchers = - ImmutableList.of( - asList( - "Integer", "4", "400", null, null, "3", "true", null, "true", null, "true", - "Integer", null, null, "4", null, "10", null)); - - MatcherAssert.assertThat(results, is(matchers)); - } - } - - @Test - public void testExecuteQuery() throws Exception { - try (final FlightStream stream = - sqlClient.getStream( - sqlClient - .execute(FlightSqlTestProducer.FIXED_QUERY) - .getEndpoints() - .get(0) - .getTicket())) { - assertAll( - () -> - MatcherAssert.assertThat(stream.getSchema(), is(FlightSqlTestProducer.FIXED_SCHEMA)), - () -> - MatcherAssert.assertThat(getResults(stream), is(singletonList(singletonList("1"))))); - } - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/AdhocTestOption.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/AdhocTestOption.java deleted file mode 100644 index 82aa883935f27..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/AdhocTestOption.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.util; - -import com.google.protobuf.Descriptors.EnumDescriptor; -import com.google.protobuf.Descriptors.EnumValueDescriptor; -import com.google.protobuf.ProtocolMessageEnum; - -enum AdhocTestOption implements ProtocolMessageEnum { - OPTION_A, - OPTION_B, - OPTION_C; - - @Override - public int getNumber() { - return ordinal(); - } - - @Override - public EnumValueDescriptor getValueDescriptor() { - throw getUnsupportedException(); - } - - @Override - public EnumDescriptor getDescriptorForType() { - throw getUnsupportedException(); - } - - private UnsupportedOperationException getUnsupportedException() { - return new UnsupportedOperationException( - "Unimplemented method is irrelevant for the scope of this test."); - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/FlightStreamUtils.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/FlightStreamUtils.java deleted file mode 100644 index 58981edd34802..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/FlightStreamUtils.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.util; - -import static java.util.Objects.isNull; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import org.apache.arrow.flight.FlightStream; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.util.Text; - -public class FlightStreamUtils { - - public static List> getResults(FlightStream stream) { - final List> results = new ArrayList<>(); - while (stream.next()) { - try (final VectorSchemaRoot root = stream.getRoot()) { - final long rowCount = root.getRowCount(); - for (int i = 0; i < rowCount; ++i) { - results.add(new ArrayList<>()); - } - - root.getSchema() - .getFields() - .forEach( - field -> { - try (final FieldVector fieldVector = root.getVector(field.getName())) { - if (fieldVector instanceof VarCharVector) { - final VarCharVector varcharVector = (VarCharVector) fieldVector; - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - final Text data = varcharVector.getObject(rowIndex); - results.get(rowIndex).add(isNull(data) ? null : data.toString()); - } - } else if (fieldVector instanceof IntVector) { - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - Object data = fieldVector.getObject(rowIndex); - results.get(rowIndex).add(isNull(data) ? null : Objects.toString(data)); - } - } else if (fieldVector instanceof VarBinaryVector) { - final VarBinaryVector varbinaryVector = (VarBinaryVector) fieldVector; - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - final byte[] data = varbinaryVector.getObject(rowIndex); - final String output; - try { - output = - isNull(data) - ? null - : MessageSerializer.deserializeSchema( - new ReadChannel( - Channels.newChannel(new ByteArrayInputStream(data)))) - .toJson(); - } catch (final IOException e) { - throw new RuntimeException("Failed to deserialize schema", e); - } - results.get(rowIndex).add(output); - } - } else if (fieldVector instanceof DenseUnionVector) { - final DenseUnionVector denseUnionVector = (DenseUnionVector) fieldVector; - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - final Object data = denseUnionVector.getObject(rowIndex); - results.get(rowIndex).add(isNull(data) ? null : Objects.toString(data)); - } - } else if (fieldVector instanceof ListVector) { - for (int i = 0; i < fieldVector.getValueCount(); i++) { - if (!fieldVector.isNull(i)) { - List elements = - (List) ((ListVector) fieldVector).getObject(i); - List values = new ArrayList<>(); - - for (Text element : elements) { - values.add(element.toString()); - } - results.get(i).add(values.toString()); - } - } - - } else if (fieldVector instanceof UInt4Vector) { - final UInt4Vector uInt4Vector = (UInt4Vector) fieldVector; - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - final Object data = uInt4Vector.getObject(rowIndex); - results.get(rowIndex).add(isNull(data) ? null : Objects.toString(data)); - } - } else if (fieldVector instanceof UInt1Vector) { - final UInt1Vector uInt1Vector = (UInt1Vector) fieldVector; - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - final Object data = uInt1Vector.getObject(rowIndex); - results.get(rowIndex).add(isNull(data) ? null : Objects.toString(data)); - } - } else if (fieldVector instanceof BitVector) { - for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { - Object data = fieldVector.getObject(rowIndex); - results.get(rowIndex).add(isNull(data) ? null : Objects.toString(data)); - } - } else { - throw new UnsupportedOperationException("Not yet implemented"); - } - } - }); - } - } - - return results; - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtilsBitmaskCreationTest.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtilsBitmaskCreationTest.java deleted file mode 100644 index 3e7552a0a39f3..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtilsBitmaskCreationTest.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.util; - -import static java.util.Arrays.asList; -import static org.apache.arrow.flight.sql.util.AdhocTestOption.OPTION_A; -import static org.apache.arrow.flight.sql.util.AdhocTestOption.OPTION_B; -import static org.apache.arrow.flight.sql.util.AdhocTestOption.OPTION_C; -import static org.apache.arrow.flight.sql.util.SqlInfoOptionsUtils.createBitmaskFromEnums; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.List; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; - -public final class SqlInfoOptionsUtilsBitmaskCreationTest { - - public static List provideParameters() { - return asList( - new Object[][] { - {new AdhocTestOption[0], 0L}, - {new AdhocTestOption[] {OPTION_A}, 1L}, - {new AdhocTestOption[] {OPTION_B}, 0b10L}, - {new AdhocTestOption[] {OPTION_A, OPTION_B}, 0b11L}, - {new AdhocTestOption[] {OPTION_C}, 0b100L}, - {new AdhocTestOption[] {OPTION_A, OPTION_C}, 0b101L}, - {new AdhocTestOption[] {OPTION_B, OPTION_C}, 0b110L}, - {AdhocTestOption.values(), 0b111L}, - }); - } - - @ParameterizedTest - @MethodSource("provideParameters") - public void testShouldBuildBitmaskFromEnums( - AdhocTestOption[] adhocTestOptions, long expectedBitmask) { - assertEquals(createBitmaskFromEnums(adhocTestOptions), expectedBitmask); - } -} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtilsBitmaskParsingTest.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtilsBitmaskParsingTest.java deleted file mode 100644 index f3bac067edcb2..0000000000000 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/util/SqlInfoOptionsUtilsBitmaskParsingTest.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flight.sql.util; - -import static java.util.Arrays.asList; -import static java.util.Arrays.stream; -import static java.util.stream.Collectors.toCollection; -import static org.apache.arrow.flight.sql.util.AdhocTestOption.OPTION_A; -import static org.apache.arrow.flight.sql.util.AdhocTestOption.OPTION_B; -import static org.apache.arrow.flight.sql.util.AdhocTestOption.OPTION_C; -import static org.apache.arrow.flight.sql.util.SqlInfoOptionsUtils.doesBitmaskTranslateToEnum; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.EnumSet; -import java.util.List; -import java.util.Set; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; - -public final class SqlInfoOptionsUtilsBitmaskParsingTest { - - public static List provideParameters() { - return asList( - new Object[][] { - {0L, EnumSet.noneOf(AdhocTestOption.class)}, - {1L, EnumSet.of(OPTION_A)}, - {0b10L, EnumSet.of(OPTION_B)}, - {0b11L, EnumSet.of(OPTION_A, OPTION_B)}, - {0b100L, EnumSet.of(OPTION_C)}, - {0b101L, EnumSet.of(OPTION_A, OPTION_C)}, - {0b110L, EnumSet.of(OPTION_B, OPTION_C)}, - {0b111L, EnumSet.allOf(AdhocTestOption.class)}, - }); - } - - @ParameterizedTest - @MethodSource("provideParameters") - public void testShouldFilterOutEnumsBasedOnBitmask( - long bitmask, Set expectedOptions) { - final Set actualOptions = - stream(AdhocTestOption.values()) - .filter(enumInstance -> doesBitmaskTranslateToEnum(enumInstance, bitmask)) - .collect(toCollection(() -> EnumSet.noneOf(AdhocTestOption.class))); - assertEquals(actualOptions, expectedOptions); - } -} diff --git a/java/flight/flight-sql/src/test/resources/logback.xml b/java/flight/flight-sql/src/test/resources/logback.xml deleted file mode 100644 index 4c54d18a210ff..0000000000000 --- a/java/flight/flight-sql/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/flight/pom.xml b/java/flight/pom.xml deleted file mode 100644 index 2fc3e89ef8a22..0000000000000 --- a/java/flight/pom.xml +++ /dev/null @@ -1,40 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - arrow-flight - - pom - - Arrow Flight - - - flight-core - flight-sql - flight-sql-jdbc-core - flight-sql-jdbc-driver - flight-integration-tests - - diff --git a/java/format/pom.xml b/java/format/pom.xml deleted file mode 100644 index d3578b63d2043..0000000000000 --- a/java/format/pom.xml +++ /dev/null @@ -1,75 +0,0 @@ - - - - 4.0.0 - - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - - arrow-format - jar - Arrow Format - Generated Java files from the IPC Flatbuffer definitions. - - - - com.google.flatbuffers - flatbuffers-java - - - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - - true - - - - com.diffplug.spotless - spotless-maven-plugin - - - - - ${maven.multiModuleProjectDirectory}/dev/license/asf-java.license - package - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Werror - - - - - - diff --git a/java/format/src/main/java/module-info.java b/java/format/src/main/java/module-info.java deleted file mode 100644 index f8d740b726fde..0000000000000 --- a/java/format/src/main/java/module-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// TODO(https://github.com/apache/arrow/issues/44037): Google hasn't reviewed Flatbuffers fix -@SuppressWarnings({ "requires-automatic", "requires-transitive-automatic" }) -module org.apache.arrow.format { - exports org.apache.arrow.flatbuf; - requires transitive flatbuffers.java; -} diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java deleted file mode 100644 index 1d076742850c3..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Binary.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Opaque binary data - */ -@SuppressWarnings("unused") -public final class Binary extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Binary getRootAsBinary(ByteBuffer _bb) { return getRootAsBinary(_bb, new Binary()); } - public static Binary getRootAsBinary(ByteBuffer _bb, Binary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Binary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startBinary(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endBinary(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Binary get(int j) { return get(new Binary(), j); } - public Binary get(Binary obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java deleted file mode 100644 index d76cf70b9b3a5..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/BinaryView.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Logically the same as Binary, but the internal representation uses a view - * struct that contains the string length and either the string's entire data - * inline (for small strings) or an inlined prefix, an index of another buffer, - * and an offset pointing to a slice in that buffer (for non-small strings). - * - * Since it uses a variable number of data buffers, each Field with this type - * must have a corresponding entry in `variadicBufferCounts`. - */ -@SuppressWarnings("unused") -public final class BinaryView extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static BinaryView getRootAsBinaryView(ByteBuffer _bb) { return getRootAsBinaryView(_bb, new BinaryView()); } - public static BinaryView getRootAsBinaryView(ByteBuffer _bb, BinaryView obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public BinaryView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startBinaryView(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endBinaryView(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public BinaryView get(int j) { return get(new BinaryView(), j); } - public BinaryView get(BinaryView obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java deleted file mode 100644 index f844e815cd7b9..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Block.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class Block extends Struct { - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Block __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Index to the start of the RecordBlock (note this is past the Message header) - */ - public long offset() { return bb.getLong(bb_pos + 0); } - /** - * Length of the metadata - */ - public int metaDataLength() { return bb.getInt(bb_pos + 8); } - /** - * Length of the data (this is aligned so there can be a gap between this and - * the metadata). - */ - public long bodyLength() { return bb.getLong(bb_pos + 16); } - - public static int createBlock(FlatBufferBuilder builder, long offset, int metaDataLength, long bodyLength) { - builder.prep(8, 24); - builder.putLong(bodyLength); - builder.pad(4); - builder.putInt(metaDataLength); - builder.putLong(offset); - return builder.offset(); - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Block get(int j) { return get(new Block(), j); } - public Block get(Block obj, int j) { return obj.__assign(__element(j), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java deleted file mode 100644 index e5c93a6b75c3e..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompression.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Optional compression for the memory buffers constituting IPC message - * bodies. Intended for use with RecordBatch but could be used for other - * message types - */ -@SuppressWarnings("unused") -public final class BodyCompression extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static BodyCompression getRootAsBodyCompression(ByteBuffer _bb) { return getRootAsBodyCompression(_bb, new BodyCompression()); } - public static BodyCompression getRootAsBodyCompression(ByteBuffer _bb, BodyCompression obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public BodyCompression __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Compressor library. - * For LZ4_FRAME, each compressed buffer must consist of a single frame. - */ - public byte codec() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; } - /** - * Indicates the way the record batch body was compressed - */ - public byte method() { int o = __offset(6); return o != 0 ? bb.get(o + bb_pos) : 0; } - - public static int createBodyCompression(FlatBufferBuilder builder, - byte codec, - byte method) { - builder.startTable(2); - BodyCompression.addMethod(builder, method); - BodyCompression.addCodec(builder, codec); - return BodyCompression.endBodyCompression(builder); - } - - public static void startBodyCompression(FlatBufferBuilder builder) { builder.startTable(2); } - public static void addCodec(FlatBufferBuilder builder, byte codec) { builder.addByte(0, codec, 0); } - public static void addMethod(FlatBufferBuilder builder, byte method) { builder.addByte(1, method, 0); } - public static int endBodyCompression(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public BodyCompression get(int j) { return get(new BodyCompression(), j); } - public BodyCompression get(BodyCompression obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java b/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java deleted file mode 100644 index 91f846cc60366..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/BodyCompressionMethod.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -/** - * Provided for forward compatibility in case we need to support different - * strategies for compressing the IPC message body (like whole-body - * compression rather than buffer-level) in the future - */ -@SuppressWarnings("unused") -public final class BodyCompressionMethod { - private BodyCompressionMethod() { } - /** - * Each constituent buffer is first compressed with the indicated - * compressor, and then written with the uncompressed length in the first 8 - * bytes as a 64-bit little-endian signed integer followed by the compressed - * buffer bytes (and then padding as required by the protocol). The - * uncompressed length may be set to -1 to indicate that the data that - * follows is not compressed, which can be useful for cases where - * compression does not yield appreciable savings. - */ - public static final byte BUFFER = 0; - - public static final String[] names = { "BUFFER", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java deleted file mode 100644 index d1b61c3d30d0d..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Bool.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class Bool extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Bool getRootAsBool(ByteBuffer _bb) { return getRootAsBool(_bb, new Bool()); } - public static Bool getRootAsBool(ByteBuffer _bb, Bool obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Bool __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startBool(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endBool(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Bool get(int j) { return get(new Bool(), j); } - public Bool get(Bool obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java deleted file mode 100644 index 6451f0fdaeec7..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Buffer.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * ---------------------------------------------------------------------- - * A Buffer represents a single contiguous memory segment - */ -@SuppressWarnings("unused") -public final class Buffer extends Struct { - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Buffer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * The relative offset into the shared memory page where the bytes for this - * buffer starts - */ - public long offset() { return bb.getLong(bb_pos + 0); } - /** - * The absolute length (in bytes) of the memory buffer. The memory is found - * from offset (inclusive) to offset + length (non-inclusive). When building - * messages using the encapsulated IPC message, padding bytes may be written - * after a buffer, but such padding bytes do not need to be accounted for in - * the size here. - */ - public long length() { return bb.getLong(bb_pos + 8); } - - public static int createBuffer(FlatBufferBuilder builder, long offset, long length) { - builder.prep(8, 16); - builder.putLong(length); - builder.putLong(offset); - return builder.offset(); - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Buffer get(int j) { return get(new Buffer(), j); } - public Buffer get(Buffer obj, int j) { return obj.__assign(__element(j), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java b/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java deleted file mode 100644 index 4a7467c0b2f54..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/CompressionType.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class CompressionType { - private CompressionType() { } - public static final byte LZ4_FRAME = 0; - public static final byte ZSTD = 1; - - public static final String[] names = { "LZ4_FRAME", "ZSTD", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java deleted file mode 100644 index 9ad3a523fe777..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Date.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Date is either a 32-bit or 64-bit signed integer type representing an - * elapsed time since UNIX epoch (1970-01-01), stored in either of two units: - * - * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no - * leap seconds), where the values are evenly divisible by 86400000 - * * Days (32 bits) since the UNIX epoch - */ -@SuppressWarnings("unused") -public final class Date extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Date getRootAsDate(ByteBuffer _bb) { return getRootAsDate(_bb, new Date()); } - public static Date getRootAsDate(ByteBuffer _bb, Date obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Date __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; } - - public static int createDate(FlatBufferBuilder builder, - short unit) { - builder.startTable(1); - Date.addUnit(builder, unit); - return Date.endDate(builder); - } - - public static void startDate(FlatBufferBuilder builder) { builder.startTable(1); } - public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); } - public static int endDate(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Date get(int j) { return get(new Date(), j); } - public Date get(Date obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java deleted file mode 100644 index 6d1116e7335f9..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/DateUnit.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class DateUnit { - private DateUnit() { } - public static final short DAY = 0; - public static final short MILLISECOND = 1; - - public static final String[] names = { "DAY", "MILLISECOND", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java deleted file mode 100644 index 13b13dd917083..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Decimal.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Exact decimal value represented as an integer value in two's - * complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers - * are used. The representation uses the endianness indicated - * in the Schema. - */ -@SuppressWarnings("unused") -public final class Decimal extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Decimal getRootAsDecimal(ByteBuffer _bb) { return getRootAsDecimal(_bb, new Decimal()); } - public static Decimal getRootAsDecimal(ByteBuffer _bb, Decimal obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Decimal __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Total number of decimal digits - */ - public int precision() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; } - /** - * Number of digits after the decimal point "." - */ - public int scale() { int o = __offset(6); return o != 0 ? bb.getInt(o + bb_pos) : 0; } - /** - * Number of bits per value. The only accepted widths are 128 and 256. - * We use bitWidth for consistency with Int::bitWidth. - */ - public int bitWidth() { int o = __offset(8); return o != 0 ? bb.getInt(o + bb_pos) : 128; } - - public static int createDecimal(FlatBufferBuilder builder, - int precision, - int scale, - int bitWidth) { - builder.startTable(3); - Decimal.addBitWidth(builder, bitWidth); - Decimal.addScale(builder, scale); - Decimal.addPrecision(builder, precision); - return Decimal.endDecimal(builder); - } - - public static void startDecimal(FlatBufferBuilder builder) { builder.startTable(3); } - public static void addPrecision(FlatBufferBuilder builder, int precision) { builder.addInt(0, precision, 0); } - public static void addScale(FlatBufferBuilder builder, int scale) { builder.addInt(1, scale, 0); } - public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(2, bitWidth, 128); } - public static int endDecimal(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Decimal get(int j) { return get(new Decimal(), j); } - public Decimal get(Decimal obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java deleted file mode 100644 index b29097d8a562b..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryBatch.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * For sending dictionary encoding information. Any Field can be - * dictionary-encoded, but in this case none of its children may be - * dictionary-encoded. - * There is one vector / column per dictionary, but that vector / column - * may be spread across multiple dictionary batches by using the isDelta - * flag - */ -@SuppressWarnings("unused") -public final class DictionaryBatch extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static DictionaryBatch getRootAsDictionaryBatch(ByteBuffer _bb) { return getRootAsDictionaryBatch(_bb, new DictionaryBatch()); } - public static DictionaryBatch getRootAsDictionaryBatch(ByteBuffer _bb, DictionaryBatch obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public DictionaryBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; } - public org.apache.arrow.flatbuf.RecordBatch data() { return data(new org.apache.arrow.flatbuf.RecordBatch()); } - public org.apache.arrow.flatbuf.RecordBatch data(org.apache.arrow.flatbuf.RecordBatch obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * If isDelta is true the values in the dictionary are to be appended to a - * dictionary with the indicated id. If isDelta is false this dictionary - * should replace the existing dictionary. - */ - public boolean isDelta() { int o = __offset(8); return o != 0 ? 0!=bb.get(o + bb_pos) : false; } - - public static int createDictionaryBatch(FlatBufferBuilder builder, - long id, - int dataOffset, - boolean isDelta) { - builder.startTable(3); - DictionaryBatch.addId(builder, id); - DictionaryBatch.addData(builder, dataOffset); - DictionaryBatch.addIsDelta(builder, isDelta); - return DictionaryBatch.endDictionaryBatch(builder); - } - - public static void startDictionaryBatch(FlatBufferBuilder builder) { builder.startTable(3); } - public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); } - public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addOffset(1, dataOffset, 0); } - public static void addIsDelta(FlatBufferBuilder builder, boolean isDelta) { builder.addBoolean(2, isDelta, false); } - public static int endDictionaryBatch(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public DictionaryBatch get(int j) { return get(new DictionaryBatch(), j); } - public DictionaryBatch get(DictionaryBatch obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java deleted file mode 100644 index 6b94889dc8fed..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryEncoding.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class DictionaryEncoding extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static DictionaryEncoding getRootAsDictionaryEncoding(ByteBuffer _bb) { return getRootAsDictionaryEncoding(_bb, new DictionaryEncoding()); } - public static DictionaryEncoding getRootAsDictionaryEncoding(ByteBuffer _bb, DictionaryEncoding obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public DictionaryEncoding __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * The known dictionary id in the application where this data is used. In - * the file or streaming formats, the dictionary ids are found in the - * DictionaryBatch messages - */ - public long id() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; } - /** - * The dictionary indices are constrained to be non-negative integers. If - * this field is null, the indices must be signed int32. To maximize - * cross-language compatibility and performance, implementations are - * recommended to prefer signed integer types over unsigned integer types - * and to avoid uint64 indices unless they are required by an application. - */ - public org.apache.arrow.flatbuf.Int indexType() { return indexType(new org.apache.arrow.flatbuf.Int()); } - public org.apache.arrow.flatbuf.Int indexType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * By default, dictionaries are not ordered, or the order does not have - * semantic meaning. In some statistical, applications, dictionary-encoding - * is used to represent ordered categorical data, and we provide a way to - * preserve that metadata here - */ - public boolean isOrdered() { int o = __offset(8); return o != 0 ? 0!=bb.get(o + bb_pos) : false; } - public short dictionaryKind() { int o = __offset(10); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - - public static int createDictionaryEncoding(FlatBufferBuilder builder, - long id, - int indexTypeOffset, - boolean isOrdered, - short dictionaryKind) { - builder.startTable(4); - DictionaryEncoding.addId(builder, id); - DictionaryEncoding.addIndexType(builder, indexTypeOffset); - DictionaryEncoding.addDictionaryKind(builder, dictionaryKind); - DictionaryEncoding.addIsOrdered(builder, isOrdered); - return DictionaryEncoding.endDictionaryEncoding(builder); - } - - public static void startDictionaryEncoding(FlatBufferBuilder builder) { builder.startTable(4); } - public static void addId(FlatBufferBuilder builder, long id) { builder.addLong(0, id, 0L); } - public static void addIndexType(FlatBufferBuilder builder, int indexTypeOffset) { builder.addOffset(1, indexTypeOffset, 0); } - public static void addIsOrdered(FlatBufferBuilder builder, boolean isOrdered) { builder.addBoolean(2, isOrdered, false); } - public static void addDictionaryKind(FlatBufferBuilder builder, short dictionaryKind) { builder.addShort(3, dictionaryKind, 0); } - public static int endDictionaryEncoding(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public DictionaryEncoding get(int j) { return get(new DictionaryEncoding(), j); } - public DictionaryEncoding get(DictionaryEncoding obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java b/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java deleted file mode 100644 index 7039b0522c27a..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/DictionaryKind.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -/** - * ---------------------------------------------------------------------- - * Dictionary encoding metadata - * Maintained for forwards compatibility, in the future - * Dictionaries might be explicit maps between integers and values - * allowing for non-contiguous index values - */ -@SuppressWarnings("unused") -public final class DictionaryKind { - private DictionaryKind() { } - public static final short DenseArray = 0; - - public static final String[] names = { "DenseArray", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java deleted file mode 100644 index 86974b611b354..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Duration.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class Duration extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Duration getRootAsDuration(ByteBuffer _bb) { return getRootAsDuration(_bb, new Duration()); } - public static Duration getRootAsDuration(ByteBuffer _bb, Duration obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Duration __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; } - - public static int createDuration(FlatBufferBuilder builder, - short unit) { - builder.startTable(1); - Duration.addUnit(builder, unit); - return Duration.endDuration(builder); - } - - public static void startDuration(FlatBufferBuilder builder) { builder.startTable(1); } - public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); } - public static int endDuration(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Duration get(int j) { return get(new Duration(), j); } - public Duration get(Duration obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java deleted file mode 100644 index f033ccb5832c3..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Endianness.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -/** - * ---------------------------------------------------------------------- - * Endianness of the platform producing the data - */ -@SuppressWarnings("unused") -public final class Endianness { - private Endianness() { } - public static final short Little = 0; - public static final short Big = 1; - - public static final String[] names = { "Little", "Big", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java deleted file mode 100644 index cad9b75c1621e..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Feature.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -/** - * Represents Arrow Features that might not have full support - * within implementations. This is intended to be used in - * two scenarios: - * 1. A mechanism for readers of Arrow Streams - * and files to understand that the stream or file makes - * use of a feature that isn't supported or unknown to - * the implementation (and therefore can meet the Arrow - * forward compatibility guarantees). - * 2. A means of negotiating between a client and server - * what features a stream is allowed to use. The enums - * values here are intented to represent higher level - * features, additional details maybe negotiated - * with key-value pairs specific to the protocol. - * - * Enums added to this list should be assigned power-of-two values - * to facilitate exchanging and comparing bitmaps for supported - * features. - */ -@SuppressWarnings("unused") -public final class Feature { - private Feature() { } - /** - * Needed to make flatbuffers happy. - */ - public static final long UNUSED = 0L; - /** - * The stream makes use of multiple full dictionaries with the - * same ID and assumes clients implement dictionary replacement - * correctly. - */ - public static final long DICTIONARY_REPLACEMENT = 1L; - /** - * The stream makes use of compressed bodies as described - * in Message.fbs. - */ - public static final long COMPRESSED_BODY = 2L; -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java deleted file mode 100644 index 28d6fd1bad755..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Field.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * ---------------------------------------------------------------------- - * A field represents a named column in a record / row batch or child of a - * nested type. - */ -@SuppressWarnings("unused") -public final class Field extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Field getRootAsField(ByteBuffer _bb) { return getRootAsField(_bb, new Field()); } - public static Field getRootAsField(ByteBuffer _bb, Field obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Field __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Name is not required, in i.e. a List - */ - public String name() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; } - public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(4, 1); } - public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); } - /** - * Whether or not this field can contain nulls. Should be true in general. - */ - public boolean nullable() { int o = __offset(6); return o != 0 ? 0!=bb.get(o + bb_pos) : false; } - public byte typeType() { int o = __offset(8); return o != 0 ? bb.get(o + bb_pos) : 0; } - /** - * This is the type of the decoded value if the field is dictionary encoded. - */ - public Table type(Table obj) { int o = __offset(10); return o != 0 ? __union(obj, o + bb_pos) : null; } - /** - * Present only if the field is dictionary encoded. - */ - public org.apache.arrow.flatbuf.DictionaryEncoding dictionary() { return dictionary(new org.apache.arrow.flatbuf.DictionaryEncoding()); } - public org.apache.arrow.flatbuf.DictionaryEncoding dictionary(org.apache.arrow.flatbuf.DictionaryEncoding obj) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * children apply only to nested data types like Struct, List and Union. For - * primitive types children will have length 0. - */ - public org.apache.arrow.flatbuf.Field children(int j) { return children(new org.apache.arrow.flatbuf.Field(), j); } - public org.apache.arrow.flatbuf.Field children(org.apache.arrow.flatbuf.Field obj, int j) { int o = __offset(14); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } - public int childrenLength() { int o = __offset(14); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.Field.Vector childrenVector() { return childrenVector(new org.apache.arrow.flatbuf.Field.Vector()); } - public org.apache.arrow.flatbuf.Field.Vector childrenVector(org.apache.arrow.flatbuf.Field.Vector obj) { int o = __offset(14); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } - /** - * User-defined metadata - */ - public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); } - public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(16); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } - public int customMetadataLength() { int o = __offset(16); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); } - public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(16); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } - - public static int createField(FlatBufferBuilder builder, - int nameOffset, - boolean nullable, - byte typeType, - int typeOffset, - int dictionaryOffset, - int childrenOffset, - int customMetadataOffset) { - builder.startTable(7); - Field.addCustomMetadata(builder, customMetadataOffset); - Field.addChildren(builder, childrenOffset); - Field.addDictionary(builder, dictionaryOffset); - Field.addType(builder, typeOffset); - Field.addName(builder, nameOffset); - Field.addTypeType(builder, typeType); - Field.addNullable(builder, nullable); - return Field.endField(builder); - } - - public static void startField(FlatBufferBuilder builder) { builder.startTable(7); } - public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(0, nameOffset, 0); } - public static void addNullable(FlatBufferBuilder builder, boolean nullable) { builder.addBoolean(1, nullable, false); } - public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(2, typeType, 0); } - public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(3, typeOffset, 0); } - public static void addDictionary(FlatBufferBuilder builder, int dictionaryOffset) { builder.addOffset(4, dictionaryOffset, 0); } - public static void addChildren(FlatBufferBuilder builder, int childrenOffset) { builder.addOffset(5, childrenOffset, 0); } - public static int createChildrenVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } - public static void startChildrenVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(6, customMetadataOffset, 0); } - public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } - public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static int endField(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Field get(int j) { return get(new Field(), j); } - public Field get(Field obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java deleted file mode 100644 index 77f2cd1ca1a8e..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/FieldNode.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * ---------------------------------------------------------------------- - * Data structures for describing a table row batch (a collection of - * equal-length Arrow arrays) - * Metadata about a field at some level of a nested type tree (but not - * its children). - * - * For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` - * would have {length: 5, null_count: 2} for its List node, and {length: 6, - * null_count: 0} for its Int16 node, as separate FieldNode structs - */ -@SuppressWarnings("unused") -public final class FieldNode extends Struct { - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public FieldNode __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * The number of value slots in the Arrow array at this level of a nested - * tree - */ - public long length() { return bb.getLong(bb_pos + 0); } - /** - * The number of observed nulls. Fields with null_count == 0 may choose not - * to write their physical validity bitmap out as a materialized buffer, - * instead setting the length of the bitmap buffer to 0. - */ - public long nullCount() { return bb.getLong(bb_pos + 8); } - - public static int createFieldNode(FlatBufferBuilder builder, long length, long nullCount) { - builder.prep(8, 16); - builder.putLong(nullCount); - builder.putLong(length); - return builder.offset(); - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public FieldNode get(int j) { return get(new FieldNode(), j); } - public FieldNode get(FieldNode obj, int j) { return obj.__assign(__element(j), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java deleted file mode 100644 index c09e428d6f683..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeBinary.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class FixedSizeBinary extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static FixedSizeBinary getRootAsFixedSizeBinary(ByteBuffer _bb) { return getRootAsFixedSizeBinary(_bb, new FixedSizeBinary()); } - public static FixedSizeBinary getRootAsFixedSizeBinary(ByteBuffer _bb, FixedSizeBinary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public FixedSizeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Number of bytes per value - */ - public int byteWidth() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; } - - public static int createFixedSizeBinary(FlatBufferBuilder builder, - int byteWidth) { - builder.startTable(1); - FixedSizeBinary.addByteWidth(builder, byteWidth); - return FixedSizeBinary.endFixedSizeBinary(builder); - } - - public static void startFixedSizeBinary(FlatBufferBuilder builder) { builder.startTable(1); } - public static void addByteWidth(FlatBufferBuilder builder, int byteWidth) { builder.addInt(0, byteWidth, 0); } - public static int endFixedSizeBinary(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public FixedSizeBinary get(int j) { return get(new FixedSizeBinary(), j); } - public FixedSizeBinary get(FixedSizeBinary obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java deleted file mode 100644 index d36cd8359f99e..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/FixedSizeList.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class FixedSizeList extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static FixedSizeList getRootAsFixedSizeList(ByteBuffer _bb) { return getRootAsFixedSizeList(_bb, new FixedSizeList()); } - public static FixedSizeList getRootAsFixedSizeList(ByteBuffer _bb, FixedSizeList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public FixedSizeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Number of list items per value - */ - public int listSize() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; } - - public static int createFixedSizeList(FlatBufferBuilder builder, - int listSize) { - builder.startTable(1); - FixedSizeList.addListSize(builder, listSize); - return FixedSizeList.endFixedSizeList(builder); - } - - public static void startFixedSizeList(FlatBufferBuilder builder) { builder.startTable(1); } - public static void addListSize(FlatBufferBuilder builder, int listSize) { builder.addInt(0, listSize, 0); } - public static int endFixedSizeList(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public FixedSizeList get(int j) { return get(new FixedSizeList(), j); } - public FixedSizeList get(FixedSizeList obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java b/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java deleted file mode 100644 index 72ec44d3add35..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/FloatingPoint.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class FloatingPoint extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static FloatingPoint getRootAsFloatingPoint(ByteBuffer _bb) { return getRootAsFloatingPoint(_bb, new FloatingPoint()); } - public static FloatingPoint getRootAsFloatingPoint(ByteBuffer _bb, FloatingPoint obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public FloatingPoint __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short precision() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - - public static int createFloatingPoint(FlatBufferBuilder builder, - short precision) { - builder.startTable(1); - FloatingPoint.addPrecision(builder, precision); - return FloatingPoint.endFloatingPoint(builder); - } - - public static void startFloatingPoint(FlatBufferBuilder builder) { builder.startTable(1); } - public static void addPrecision(FlatBufferBuilder builder, short precision) { builder.addShort(0, precision, 0); } - public static int endFloatingPoint(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public FloatingPoint get(int j) { return get(new FloatingPoint(), j); } - public FloatingPoint get(FloatingPoint obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java deleted file mode 100644 index 63f32d33cafae..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Footer.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * ---------------------------------------------------------------------- - * Arrow File metadata - * - */ -@SuppressWarnings("unused") -public final class Footer extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Footer getRootAsFooter(ByteBuffer _bb) { return getRootAsFooter(_bb, new Footer()); } - public static Footer getRootAsFooter(ByteBuffer _bb, Footer obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Footer __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short version() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - public org.apache.arrow.flatbuf.Schema schema() { return schema(new org.apache.arrow.flatbuf.Schema()); } - public org.apache.arrow.flatbuf.Schema schema(org.apache.arrow.flatbuf.Schema obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - public org.apache.arrow.flatbuf.Block dictionaries(int j) { return dictionaries(new org.apache.arrow.flatbuf.Block(), j); } - public org.apache.arrow.flatbuf.Block dictionaries(org.apache.arrow.flatbuf.Block obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o) + j * 24, bb) : null; } - public int dictionariesLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.Block.Vector dictionariesVector() { return dictionariesVector(new org.apache.arrow.flatbuf.Block.Vector()); } - public org.apache.arrow.flatbuf.Block.Vector dictionariesVector(org.apache.arrow.flatbuf.Block.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 24, bb) : null; } - public org.apache.arrow.flatbuf.Block recordBatches(int j) { return recordBatches(new org.apache.arrow.flatbuf.Block(), j); } - public org.apache.arrow.flatbuf.Block recordBatches(org.apache.arrow.flatbuf.Block obj, int j) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o) + j * 24, bb) : null; } - public int recordBatchesLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.Block.Vector recordBatchesVector() { return recordBatchesVector(new org.apache.arrow.flatbuf.Block.Vector()); } - public org.apache.arrow.flatbuf.Block.Vector recordBatchesVector(org.apache.arrow.flatbuf.Block.Vector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), 24, bb) : null; } - /** - * User-defined metadata - */ - public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); } - public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } - public int customMetadataLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); } - public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } - - public static int createFooter(FlatBufferBuilder builder, - short version, - int schemaOffset, - int dictionariesOffset, - int recordBatchesOffset, - int customMetadataOffset) { - builder.startTable(5); - Footer.addCustomMetadata(builder, customMetadataOffset); - Footer.addRecordBatches(builder, recordBatchesOffset); - Footer.addDictionaries(builder, dictionariesOffset); - Footer.addSchema(builder, schemaOffset); - Footer.addVersion(builder, version); - return Footer.endFooter(builder); - } - - public static void startFooter(FlatBufferBuilder builder) { builder.startTable(5); } - public static void addVersion(FlatBufferBuilder builder, short version) { builder.addShort(0, version, 0); } - public static void addSchema(FlatBufferBuilder builder, int schemaOffset) { builder.addOffset(1, schemaOffset, 0); } - public static void addDictionaries(FlatBufferBuilder builder, int dictionariesOffset) { builder.addOffset(2, dictionariesOffset, 0); } - public static void startDictionariesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(24, numElems, 8); } - public static void addRecordBatches(FlatBufferBuilder builder, int recordBatchesOffset) { builder.addOffset(3, recordBatchesOffset, 0); } - public static void startRecordBatchesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(24, numElems, 8); } - public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(4, customMetadataOffset, 0); } - public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } - public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static int endFooter(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - public static void finishFooterBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); } - public static void finishSizePrefixedFooterBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Footer get(int j) { return get(new Footer(), j); } - public Footer get(Footer obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java deleted file mode 100644 index 5a12d82ee7b57..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Int.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class Int extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Int getRootAsInt(ByteBuffer _bb) { return getRootAsInt(_bb, new Int()); } - public static Int getRootAsInt(ByteBuffer _bb, Int obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Int __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public int bitWidth() { int o = __offset(4); return o != 0 ? bb.getInt(o + bb_pos) : 0; } - public boolean isSigned() { int o = __offset(6); return o != 0 ? 0!=bb.get(o + bb_pos) : false; } - - public static int createInt(FlatBufferBuilder builder, - int bitWidth, - boolean isSigned) { - builder.startTable(2); - Int.addBitWidth(builder, bitWidth); - Int.addIsSigned(builder, isSigned); - return Int.endInt(builder); - } - - public static void startInt(FlatBufferBuilder builder) { builder.startTable(2); } - public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(0, bitWidth, 0); } - public static void addIsSigned(FlatBufferBuilder builder, boolean isSigned) { builder.addBoolean(1, isSigned, false); } - public static int endInt(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Int get(int j) { return get(new Int(), j); } - public Int get(Int obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java deleted file mode 100644 index 6598d4858033b..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Interval.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class Interval extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Interval getRootAsInterval(ByteBuffer _bb) { return getRootAsInterval(_bb, new Interval()); } - public static Interval getRootAsInterval(ByteBuffer _bb, Interval obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Interval __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - - public static int createInterval(FlatBufferBuilder builder, - short unit) { - builder.startTable(1); - Interval.addUnit(builder, unit); - return Interval.endInterval(builder); - } - - public static void startInterval(FlatBufferBuilder builder) { builder.startTable(1); } - public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 0); } - public static int endInterval(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Interval get(int j) { return get(new Interval(), j); } - public Interval get(Interval obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java deleted file mode 100644 index 45b47fdf67810..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/IntervalUnit.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class IntervalUnit { - private IntervalUnit() { } - public static final short YEAR_MONTH = 0; - public static final short DAY_TIME = 1; - public static final short MONTH_DAY_NANO = 2; - - public static final String[] names = { "YEAR_MONTH", "DAY_TIME", "MONTH_DAY_NANO", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java b/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java deleted file mode 100644 index bc64bb32e59c1..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/KeyValue.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * ---------------------------------------------------------------------- - * user defined key value pairs to add custom metadata to arrow - * key namespacing is the responsibility of the user - */ -@SuppressWarnings("unused") -public final class KeyValue extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static KeyValue getRootAsKeyValue(ByteBuffer _bb) { return getRootAsKeyValue(_bb, new KeyValue()); } - public static KeyValue getRootAsKeyValue(ByteBuffer _bb, KeyValue obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public KeyValue __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public String key() { int o = __offset(4); return o != 0 ? __string(o + bb_pos) : null; } - public ByteBuffer keyAsByteBuffer() { return __vector_as_bytebuffer(4, 1); } - public ByteBuffer keyInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 4, 1); } - public String value() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; } - public ByteBuffer valueAsByteBuffer() { return __vector_as_bytebuffer(6, 1); } - public ByteBuffer valueInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); } - - public static int createKeyValue(FlatBufferBuilder builder, - int keyOffset, - int valueOffset) { - builder.startTable(2); - KeyValue.addValue(builder, valueOffset); - KeyValue.addKey(builder, keyOffset); - return KeyValue.endKeyValue(builder); - } - - public static void startKeyValue(FlatBufferBuilder builder) { builder.startTable(2); } - public static void addKey(FlatBufferBuilder builder, int keyOffset) { builder.addOffset(0, keyOffset, 0); } - public static void addValue(FlatBufferBuilder builder, int valueOffset) { builder.addOffset(1, valueOffset, 0); } - public static int endKeyValue(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public KeyValue get(int j) { return get(new KeyValue(), j); } - public KeyValue get(KeyValue obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java deleted file mode 100644 index d32b284644ba4..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeBinary.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Same as Binary, but with 64-bit offsets, allowing to represent - * extremely large data values. - */ -@SuppressWarnings("unused") -public final class LargeBinary extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static LargeBinary getRootAsLargeBinary(ByteBuffer _bb) { return getRootAsLargeBinary(_bb, new LargeBinary()); } - public static LargeBinary getRootAsLargeBinary(ByteBuffer _bb, LargeBinary obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public LargeBinary __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startLargeBinary(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endLargeBinary(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public LargeBinary get(int j) { return get(new LargeBinary(), j); } - public LargeBinary get(LargeBinary obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java deleted file mode 100644 index d5ad4d1600f98..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeList.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Same as List, but with 64-bit offsets, allowing to represent - * extremely large data values. - */ -@SuppressWarnings("unused") -public final class LargeList extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static LargeList getRootAsLargeList(ByteBuffer _bb) { return getRootAsLargeList(_bb, new LargeList()); } - public static LargeList getRootAsLargeList(ByteBuffer _bb, LargeList obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public LargeList __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startLargeList(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endLargeList(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public LargeList get(int j) { return get(new LargeList(), j); } - public LargeList get(LargeList obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java deleted file mode 100644 index fff70d20e2320..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeListView.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Same as ListView, but with 64-bit offsets and sizes, allowing to represent - * extremely large data values. - */ -@SuppressWarnings("unused") -public final class LargeListView extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static LargeListView getRootAsLargeListView(ByteBuffer _bb) { return getRootAsLargeListView(_bb, new LargeListView()); } - public static LargeListView getRootAsLargeListView(ByteBuffer _bb, LargeListView obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public LargeListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startLargeListView(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endLargeListView(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public LargeListView get(int j) { return get(new LargeListView(), j); } - public LargeListView get(LargeListView obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java b/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java deleted file mode 100644 index 83e280020403a..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/LargeUtf8.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Same as Utf8, but with 64-bit offsets, allowing to represent - * extremely large data values. - */ -@SuppressWarnings("unused") -public final class LargeUtf8 extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static LargeUtf8 getRootAsLargeUtf8(ByteBuffer _bb) { return getRootAsLargeUtf8(_bb, new LargeUtf8()); } - public static LargeUtf8 getRootAsLargeUtf8(ByteBuffer _bb, LargeUtf8 obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public LargeUtf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startLargeUtf8(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endLargeUtf8(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public LargeUtf8 get(int j) { return get(new LargeUtf8(), j); } - public LargeUtf8 get(LargeUtf8 obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/List.java b/java/format/src/main/java/org/apache/arrow/flatbuf/List.java deleted file mode 100644 index 2dbd9171443de..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/List.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class List extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static List getRootAsList(ByteBuffer _bb) { return getRootAsList(_bb, new List()); } - public static List getRootAsList(ByteBuffer _bb, List obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public List __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startList(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endList(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public List get(int j) { return get(new List(), j); } - public List get(List obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java b/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java deleted file mode 100644 index 387b2b9662297..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/ListView.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Represents the same logical types that List can, but contains offsets and - * sizes allowing for writes in any order and sharing of child values among - * list values. - */ -@SuppressWarnings("unused") -public final class ListView extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static ListView getRootAsListView(ByteBuffer _bb) { return getRootAsListView(_bb, new ListView()); } - public static ListView getRootAsListView(ByteBuffer _bb, ListView obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public ListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startListView(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endListView(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public ListView get(int j) { return get(new ListView(), j); } - public ListView get(ListView obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java deleted file mode 100644 index 5a100fb1735f8..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Map.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * A Map is a logical nested type that is represented as - * - * List> - * - * In this layout, the keys and values are each respectively contiguous. We do - * not constrain the key and value types, so the application is responsible - * for ensuring that the keys are hashable and unique. Whether the keys are sorted - * may be set in the metadata for this field. - * - * In a field with Map type, the field has a child Struct field, which then - * has two children: key type and the second the value type. The names of the - * child fields may be respectively "entries", "key", and "value", but this is - * not enforced. - * - * Map - * ```text - * - child[0] entries: Struct - * - child[0] key: K - * - child[1] value: V - * ``` - * Neither the "entries" field nor the "key" field may be nullable. - * - * The metadata is structured so that Arrow systems without special handling - * for Map can make Map an alias for List. The "layout" attribute for the Map - * field must have the same contents as a List. - */ -@SuppressWarnings("unused") -public final class Map extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Map getRootAsMap(ByteBuffer _bb) { return getRootAsMap(_bb, new Map()); } - public static Map getRootAsMap(ByteBuffer _bb, Map obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Map __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Set to true if the keys within each value are sorted - */ - public boolean keysSorted() { int o = __offset(4); return o != 0 ? 0!=bb.get(o + bb_pos) : false; } - - public static int createMap(FlatBufferBuilder builder, - boolean keysSorted) { - builder.startTable(1); - Map.addKeysSorted(builder, keysSorted); - return Map.endMap(builder); - } - - public static void startMap(FlatBufferBuilder builder) { builder.startTable(1); } - public static void addKeysSorted(FlatBufferBuilder builder, boolean keysSorted) { builder.addBoolean(0, keysSorted, false); } - public static int endMap(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Map get(int j) { return get(new Map(), j); } - public Map get(Map obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java deleted file mode 100644 index 2349b8053a170..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Message.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class Message extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Message getRootAsMessage(ByteBuffer _bb) { return getRootAsMessage(_bb, new Message()); } - public static Message getRootAsMessage(ByteBuffer _bb, Message obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Message __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short version() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - public byte headerType() { int o = __offset(6); return o != 0 ? bb.get(o + bb_pos) : 0; } - public Table header(Table obj) { int o = __offset(8); return o != 0 ? __union(obj, o + bb_pos) : null; } - public long bodyLength() { int o = __offset(10); return o != 0 ? bb.getLong(o + bb_pos) : 0L; } - public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); } - public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(12); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } - public int customMetadataLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); } - public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } - - public static int createMessage(FlatBufferBuilder builder, - short version, - byte headerType, - int headerOffset, - long bodyLength, - int customMetadataOffset) { - builder.startTable(5); - Message.addBodyLength(builder, bodyLength); - Message.addCustomMetadata(builder, customMetadataOffset); - Message.addHeader(builder, headerOffset); - Message.addVersion(builder, version); - Message.addHeaderType(builder, headerType); - return Message.endMessage(builder); - } - - public static void startMessage(FlatBufferBuilder builder) { builder.startTable(5); } - public static void addVersion(FlatBufferBuilder builder, short version) { builder.addShort(0, version, 0); } - public static void addHeaderType(FlatBufferBuilder builder, byte headerType) { builder.addByte(1, headerType, 0); } - public static void addHeader(FlatBufferBuilder builder, int headerOffset) { builder.addOffset(2, headerOffset, 0); } - public static void addBodyLength(FlatBufferBuilder builder, long bodyLength) { builder.addLong(3, bodyLength, 0L); } - public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(4, customMetadataOffset, 0); } - public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } - public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static int endMessage(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - public static void finishMessageBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); } - public static void finishSizePrefixedMessageBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Message get(int j) { return get(new Message(), j); } - public Message get(Message obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java b/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java deleted file mode 100644 index f2be733f4b1b7..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/MessageHeader.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -/** - * ---------------------------------------------------------------------- - * The root Message type - * This union enables us to easily send different message types without - * redundant storage, and in the future we can easily add new message types. - * - * Arrow implementations do not need to implement all of the message types, - * which may include experimental metadata types. For maximum compatibility, - * it is best to send data using RecordBatch - */ -@SuppressWarnings("unused") -public final class MessageHeader { - private MessageHeader() { } - public static final byte NONE = 0; - public static final byte Schema = 1; - public static final byte DictionaryBatch = 2; - public static final byte RecordBatch = 3; - public static final byte Tensor = 4; - public static final byte SparseTensor = 5; - - public static final String[] names = { "NONE", "Schema", "DictionaryBatch", "RecordBatch", "Tensor", "SparseTensor", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java b/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java deleted file mode 100644 index cbfa0ee90f32f..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/MetadataVersion.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class MetadataVersion { - private MetadataVersion() { } - /** - * 0.1.0 (October 2016). - */ - public static final short V1 = 0; - /** - * 0.2.0 (February 2017). Non-backwards compatible with V1. - */ - public static final short V2 = 1; - /** - * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2. - */ - public static final short V3 = 2; - /** - * >= 0.8.0 (December 2017). Non-backwards compatible with V3. - */ - public static final short V4 = 3; - /** - * >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4 - * metadata and IPC messages). Implementations are recommended to provide a - * V4 compatibility mode with V5 format changes disabled. - * - * Incompatible changes between V4 and V5: - * - Union buffer layout has changed. In V5, Unions don't have a validity - * bitmap buffer. - */ - public static final short V5 = 4; - - public static final String[] names = { "V1", "V2", "V3", "V4", "V5", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java deleted file mode 100644 index 37cb705ec10cb..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Null.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * These are stored in the flatbuffer in the Type union below - */ -@SuppressWarnings("unused") -public final class Null extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Null getRootAsNull(ByteBuffer _bb) { return getRootAsNull(_bb, new Null()); } - public static Null getRootAsNull(ByteBuffer _bb, Null obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Null __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startNull(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endNull(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Null get(int j) { return get(new Null(), j); } - public Null get(Null obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java deleted file mode 100644 index e2140834ff4da..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Precision.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class Precision { - private Precision() { } - public static final short HALF = 0; - public static final short SINGLE = 1; - public static final short DOUBLE = 2; - - public static final String[] names = { "HALF", "SINGLE", "DOUBLE", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java b/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java deleted file mode 100644 index 78208064ec018..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/RecordBatch.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * A data header describing the shared memory layout of a "record" or "row" - * batch. Some systems call this a "row batch" internally and others a "record - * batch". - */ -@SuppressWarnings("unused") -public final class RecordBatch extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static RecordBatch getRootAsRecordBatch(ByteBuffer _bb) { return getRootAsRecordBatch(_bb, new RecordBatch()); } - public static RecordBatch getRootAsRecordBatch(ByteBuffer _bb, RecordBatch obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public RecordBatch __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * number of records / rows. The arrays in the batch should all have this - * length - */ - public long length() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; } - /** - * Nodes correspond to the pre-ordered flattened logical schema - */ - public org.apache.arrow.flatbuf.FieldNode nodes(int j) { return nodes(new org.apache.arrow.flatbuf.FieldNode(), j); } - public org.apache.arrow.flatbuf.FieldNode nodes(org.apache.arrow.flatbuf.FieldNode obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; } - public int nodesLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.FieldNode.Vector nodesVector() { return nodesVector(new org.apache.arrow.flatbuf.FieldNode.Vector()); } - public org.apache.arrow.flatbuf.FieldNode.Vector nodesVector(org.apache.arrow.flatbuf.FieldNode.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; } - /** - * Buffers correspond to the pre-ordered flattened buffer tree - * - * The number of buffers appended to this list depends on the schema. For - * example, most primitive arrays will have 2 buffers, 1 for the validity - * bitmap and 1 for the values. For struct arrays, there will only be a - * single buffer for the validity (nulls) bitmap - */ - public org.apache.arrow.flatbuf.Buffer buffers(int j) { return buffers(new org.apache.arrow.flatbuf.Buffer(), j); } - public org.apache.arrow.flatbuf.Buffer buffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; } - public int buffersLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.Buffer.Vector buffersVector() { return buffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); } - public org.apache.arrow.flatbuf.Buffer.Vector buffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; } - /** - * Optional compression of the message body - */ - public org.apache.arrow.flatbuf.BodyCompression compression() { return compression(new org.apache.arrow.flatbuf.BodyCompression()); } - public org.apache.arrow.flatbuf.BodyCompression compression(org.apache.arrow.flatbuf.BodyCompression obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * Some types such as Utf8View are represented using a variable number of buffers. - * For each such Field in the pre-ordered flattened logical schema, there will be - * an entry in variadicBufferCounts to indicate the number of number of variadic - * buffers which belong to that Field in the current RecordBatch. - * - * For example, the schema - * col1: Struct - * col2: Utf8View - * contains two Fields with variadic buffers so variadicBufferCounts will have - * two entries, the first counting the variadic buffers of `col1.beta` and the - * second counting `col2`'s. - * - * This field may be omitted if and only if the schema contains no Fields with - * a variable number of buffers, such as BinaryView and Utf8View. - */ - public long variadicBufferCounts(int j) { int o = __offset(12); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; } - public int variadicBufferCountsLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; } - public LongVector variadicBufferCountsVector() { return variadicBufferCountsVector(new LongVector()); } - public LongVector variadicBufferCountsVector(LongVector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), bb) : null; } - public ByteBuffer variadicBufferCountsAsByteBuffer() { return __vector_as_bytebuffer(12, 8); } - public ByteBuffer variadicBufferCountsInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 12, 8); } - - public static int createRecordBatch(FlatBufferBuilder builder, - long length, - int nodesOffset, - int buffersOffset, - int compressionOffset, - int variadicBufferCountsOffset) { - builder.startTable(5); - RecordBatch.addLength(builder, length); - RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset); - RecordBatch.addCompression(builder, compressionOffset); - RecordBatch.addBuffers(builder, buffersOffset); - RecordBatch.addNodes(builder, nodesOffset); - return RecordBatch.endRecordBatch(builder); - } - - public static void startRecordBatch(FlatBufferBuilder builder) { builder.startTable(5); } - public static void addLength(FlatBufferBuilder builder, long length) { builder.addLong(0, length, 0L); } - public static void addNodes(FlatBufferBuilder builder, int nodesOffset) { builder.addOffset(1, nodesOffset, 0); } - public static void startNodesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); } - public static void addBuffers(FlatBufferBuilder builder, int buffersOffset) { builder.addOffset(2, buffersOffset, 0); } - public static void startBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); } - public static void addCompression(FlatBufferBuilder builder, int compressionOffset) { builder.addOffset(3, compressionOffset, 0); } - public static void addVariadicBufferCounts(FlatBufferBuilder builder, int variadicBufferCountsOffset) { builder.addOffset(4, variadicBufferCountsOffset, 0); } - public static int createVariadicBufferCountsVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); } - public static void startVariadicBufferCountsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); } - public static int endRecordBatch(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public RecordBatch get(int j) { return get(new RecordBatch(), j); } - public RecordBatch get(RecordBatch obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java b/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java deleted file mode 100644 index 4ad73827b971f..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/RunEndEncoded.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Contains two child arrays, run_ends and values. - * The run_ends child array must be a 16/32/64-bit integer array - * which encodes the indices at which the run with the value in - * each corresponding index in the values child array ends. - * Like list/struct types, the value array can be of any type. - */ -@SuppressWarnings("unused") -public final class RunEndEncoded extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static RunEndEncoded getRootAsRunEndEncoded(ByteBuffer _bb) { return getRootAsRunEndEncoded(_bb, new RunEndEncoded()); } - public static RunEndEncoded getRootAsRunEndEncoded(ByteBuffer _bb, RunEndEncoded obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public RunEndEncoded __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startRunEndEncoded(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endRunEndEncoded(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public RunEndEncoded get(int j) { return get(new RunEndEncoded(), j); } - public RunEndEncoded get(RunEndEncoded obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java deleted file mode 100644 index 52c0ab26f0fe9..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Schema.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * ---------------------------------------------------------------------- - * A Schema describes the columns in a row batch - */ -@SuppressWarnings("unused") -public final class Schema extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Schema getRootAsSchema(ByteBuffer _bb) { return getRootAsSchema(_bb, new Schema()); } - public static Schema getRootAsSchema(ByteBuffer _bb, Schema obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Schema __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * endianness of the buffer - * it is Little Endian by default - * if endianness doesn't match the underlying system then the vectors need to be converted - */ - public short endianness() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - public org.apache.arrow.flatbuf.Field fields(int j) { return fields(new org.apache.arrow.flatbuf.Field(), j); } - public org.apache.arrow.flatbuf.Field fields(org.apache.arrow.flatbuf.Field obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } - public int fieldsLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.Field.Vector fieldsVector() { return fieldsVector(new org.apache.arrow.flatbuf.Field.Vector()); } - public org.apache.arrow.flatbuf.Field.Vector fieldsVector(org.apache.arrow.flatbuf.Field.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } - public org.apache.arrow.flatbuf.KeyValue customMetadata(int j) { return customMetadata(new org.apache.arrow.flatbuf.KeyValue(), j); } - public org.apache.arrow.flatbuf.KeyValue customMetadata(org.apache.arrow.flatbuf.KeyValue obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } - public int customMetadataLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector() { return customMetadataVector(new org.apache.arrow.flatbuf.KeyValue.Vector()); } - public org.apache.arrow.flatbuf.KeyValue.Vector customMetadataVector(org.apache.arrow.flatbuf.KeyValue.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } - /** - * Features used in the stream/file. - */ - public long features(int j) { int o = __offset(10); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; } - public int featuresLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; } - public LongVector featuresVector() { return featuresVector(new LongVector()); } - public LongVector featuresVector(LongVector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), bb) : null; } - public ByteBuffer featuresAsByteBuffer() { return __vector_as_bytebuffer(10, 8); } - public ByteBuffer featuresInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 8); } - - public static int createSchema(FlatBufferBuilder builder, - short endianness, - int fieldsOffset, - int customMetadataOffset, - int featuresOffset) { - builder.startTable(4); - Schema.addFeatures(builder, featuresOffset); - Schema.addCustomMetadata(builder, customMetadataOffset); - Schema.addFields(builder, fieldsOffset); - Schema.addEndianness(builder, endianness); - return Schema.endSchema(builder); - } - - public static void startSchema(FlatBufferBuilder builder) { builder.startTable(4); } - public static void addEndianness(FlatBufferBuilder builder, short endianness) { builder.addShort(0, endianness, 0); } - public static void addFields(FlatBufferBuilder builder, int fieldsOffset) { builder.addOffset(1, fieldsOffset, 0); } - public static int createFieldsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } - public static void startFieldsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static void addCustomMetadata(FlatBufferBuilder builder, int customMetadataOffset) { builder.addOffset(2, customMetadataOffset, 0); } - public static int createCustomMetadataVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } - public static void startCustomMetadataVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static void addFeatures(FlatBufferBuilder builder, int featuresOffset) { builder.addOffset(3, featuresOffset, 0); } - public static int createFeaturesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); } - public static void startFeaturesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); } - public static int endSchema(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - public static void finishSchemaBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); } - public static void finishSizePrefixedSchemaBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Schema get(int j) { return get(new Schema(), j); } - public Schema get(Schema obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java deleted file mode 100644 index 160b783fe2d34..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixCompressedAxis.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class SparseMatrixCompressedAxis { - private SparseMatrixCompressedAxis() { } - public static final short Row = 0; - public static final short Column = 1; - - public static final String[] names = { "Row", "Column", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java deleted file mode 100644 index 9fc345aa4aa82..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseMatrixIndexCSX.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Compressed Sparse format, that is matrix-specific. - */ -@SuppressWarnings("unused") -public final class SparseMatrixIndexCSX extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static SparseMatrixIndexCSX getRootAsSparseMatrixIndexCSX(ByteBuffer _bb) { return getRootAsSparseMatrixIndexCSX(_bb, new SparseMatrixIndexCSX()); } - public static SparseMatrixIndexCSX getRootAsSparseMatrixIndexCSX(ByteBuffer _bb, SparseMatrixIndexCSX obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public SparseMatrixIndexCSX __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Which axis, row or column, is compressed - */ - public short compressedAxis() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - /** - * The type of values in indptrBuffer - */ - public org.apache.arrow.flatbuf.Int indptrType() { return indptrType(new org.apache.arrow.flatbuf.Int()); } - public org.apache.arrow.flatbuf.Int indptrType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(6); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * indptrBuffer stores the location and size of indptr array that - * represents the range of the rows. - * The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. - * The length of this array is 1 + (the number of rows), and the type - * of index value is long. - * - * For example, let X be the following 6x4 matrix: - * ```text - * X := [[0, 1, 2, 0], - * [0, 0, 3, 0], - * [0, 4, 0, 5], - * [0, 0, 0, 0], - * [6, 0, 7, 8], - * [0, 9, 0, 0]]. - * ``` - * The array of non-zero values in X is: - * ```text - * values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. - * ``` - * And the indptr of X is: - * ```text - * indptr(X) = [0, 2, 3, 5, 5, 8, 10]. - * ``` - */ - public org.apache.arrow.flatbuf.Buffer indptrBuffer() { return indptrBuffer(new org.apache.arrow.flatbuf.Buffer()); } - public org.apache.arrow.flatbuf.Buffer indptrBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(8); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; } - /** - * The type of values in indicesBuffer - */ - public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); } - public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(10); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * indicesBuffer stores the location and size of the array that - * contains the column indices of the corresponding non-zero values. - * The type of index value is long. - * - * For example, the indices of the above X is: - * ```text - * indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. - * ``` - * Note that the indices are sorted in lexicographical order for each row. - */ - public org.apache.arrow.flatbuf.Buffer indicesBuffer() { return indicesBuffer(new org.apache.arrow.flatbuf.Buffer()); } - public org.apache.arrow.flatbuf.Buffer indicesBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(12); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; } - - public static void startSparseMatrixIndexCSX(FlatBufferBuilder builder) { builder.startTable(5); } - public static void addCompressedAxis(FlatBufferBuilder builder, short compressedAxis) { builder.addShort(0, compressedAxis, 0); } - public static void addIndptrType(FlatBufferBuilder builder, int indptrTypeOffset) { builder.addOffset(1, indptrTypeOffset, 0); } - public static void addIndptrBuffer(FlatBufferBuilder builder, int indptrBufferOffset) { builder.addStruct(2, indptrBufferOffset, 0); } - public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(3, indicesTypeOffset, 0); } - public static void addIndicesBuffer(FlatBufferBuilder builder, int indicesBufferOffset) { builder.addStruct(4, indicesBufferOffset, 0); } - public static int endSparseMatrixIndexCSX(FlatBufferBuilder builder) { - int o = builder.endTable(); - builder.required(o, 6); // indptrType - builder.required(o, 8); // indptrBuffer - builder.required(o, 10); // indicesType - builder.required(o, 12); // indicesBuffer - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public SparseMatrixIndexCSX get(int j) { return get(new SparseMatrixIndexCSX(), j); } - public SparseMatrixIndexCSX get(SparseMatrixIndexCSX obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java deleted file mode 100644 index 3d01281204549..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensor.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class SparseTensor extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static SparseTensor getRootAsSparseTensor(ByteBuffer _bb) { return getRootAsSparseTensor(_bb, new SparseTensor()); } - public static SparseTensor getRootAsSparseTensor(ByteBuffer _bb, SparseTensor obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public SparseTensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public byte typeType() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; } - /** - * The type of data contained in a value cell. - * Currently only fixed-width value types are supported, - * no strings or nested types. - */ - public Table type(Table obj) { int o = __offset(6); return o != 0 ? __union(obj, o + bb_pos) : null; } - /** - * The dimensions of the tensor, optionally named. - */ - public org.apache.arrow.flatbuf.TensorDim shape(int j) { return shape(new org.apache.arrow.flatbuf.TensorDim(), j); } - public org.apache.arrow.flatbuf.TensorDim shape(org.apache.arrow.flatbuf.TensorDim obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } - public int shapeLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector() { return shapeVector(new org.apache.arrow.flatbuf.TensorDim.Vector()); } - public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector(org.apache.arrow.flatbuf.TensorDim.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } - /** - * The number of non-zero values in a sparse tensor. - */ - public long nonZeroLength() { int o = __offset(10); return o != 0 ? bb.getLong(o + bb_pos) : 0L; } - public byte sparseIndexType() { int o = __offset(12); return o != 0 ? bb.get(o + bb_pos) : 0; } - /** - * Sparse tensor index - */ - public Table sparseIndex(Table obj) { int o = __offset(14); return o != 0 ? __union(obj, o + bb_pos) : null; } - /** - * The location and size of the tensor's data - */ - public org.apache.arrow.flatbuf.Buffer data() { return data(new org.apache.arrow.flatbuf.Buffer()); } - public org.apache.arrow.flatbuf.Buffer data(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(16); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; } - - public static void startSparseTensor(FlatBufferBuilder builder) { builder.startTable(7); } - public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(0, typeType, 0); } - public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(1, typeOffset, 0); } - public static void addShape(FlatBufferBuilder builder, int shapeOffset) { builder.addOffset(2, shapeOffset, 0); } - public static int createShapeVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } - public static void startShapeVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static void addNonZeroLength(FlatBufferBuilder builder, long nonZeroLength) { builder.addLong(3, nonZeroLength, 0L); } - public static void addSparseIndexType(FlatBufferBuilder builder, byte sparseIndexType) { builder.addByte(4, sparseIndexType, 0); } - public static void addSparseIndex(FlatBufferBuilder builder, int sparseIndexOffset) { builder.addOffset(5, sparseIndexOffset, 0); } - public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addStruct(6, dataOffset, 0); } - public static int endSparseTensor(FlatBufferBuilder builder) { - int o = builder.endTable(); - builder.required(o, 6); // type - builder.required(o, 8); // shape - builder.required(o, 14); // sparseIndex - builder.required(o, 16); // data - return o; - } - public static void finishSparseTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); } - public static void finishSizePrefixedSparseTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public SparseTensor get(int j) { return get(new SparseTensor(), j); } - public SparseTensor get(SparseTensor obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java deleted file mode 100644 index b05aef44b2573..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndex.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class SparseTensorIndex { - private SparseTensorIndex() { } - public static final byte NONE = 0; - public static final byte SparseTensorIndexCOO = 1; - public static final byte SparseMatrixIndexCSX = 2; - public static final byte SparseTensorIndexCSF = 3; - - public static final String[] names = { "NONE", "SparseTensorIndexCOO", "SparseMatrixIndexCSX", "SparseTensorIndexCSF", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java deleted file mode 100644 index 3ce20921334b8..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCOO.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * ---------------------------------------------------------------------- - * EXPERIMENTAL: Data structures for sparse tensors - * Coordinate (COO) format of sparse tensor index. - * - * COO's index list are represented as a NxM matrix, - * where N is the number of non-zero values, - * and M is the number of dimensions of a sparse tensor. - * - * indicesBuffer stores the location and size of the data of this indices - * matrix. The value type and the stride of the indices matrix is - * specified in indicesType and indicesStrides fields. - * - * For example, let X be a 2x3x4x5 tensor, and it has the following - * 6 non-zero values: - * ```text - * X[0, 1, 2, 0] := 1 - * X[1, 1, 2, 3] := 2 - * X[0, 2, 1, 0] := 3 - * X[0, 1, 3, 0] := 4 - * X[0, 1, 2, 1] := 5 - * X[1, 2, 0, 4] := 6 - * ``` - * In COO format, the index matrix of X is the following 4x6 matrix: - * ```text - * [[0, 0, 0, 0, 1, 1], - * [1, 1, 1, 2, 1, 2], - * [2, 2, 3, 1, 2, 0], - * [0, 1, 0, 0, 3, 4]] - * ``` - * When isCanonical is true, the indices is sorted in lexicographical order - * (row-major order), and it does not have duplicated entries. Otherwise, - * the indices may not be sorted, or may have duplicated entries. - */ -@SuppressWarnings("unused") -public final class SparseTensorIndexCOO extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static SparseTensorIndexCOO getRootAsSparseTensorIndexCOO(ByteBuffer _bb) { return getRootAsSparseTensorIndexCOO(_bb, new SparseTensorIndexCOO()); } - public static SparseTensorIndexCOO getRootAsSparseTensorIndexCOO(ByteBuffer _bb, SparseTensorIndexCOO obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public SparseTensorIndexCOO __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * The type of values in indicesBuffer - */ - public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); } - public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * Non-negative byte offsets to advance one value cell along each dimension - * If omitted, default to row-major order (C-like). - */ - public long indicesStrides(int j) { int o = __offset(6); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; } - public int indicesStridesLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; } - public LongVector indicesStridesVector() { return indicesStridesVector(new LongVector()); } - public LongVector indicesStridesVector(LongVector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), bb) : null; } - public ByteBuffer indicesStridesAsByteBuffer() { return __vector_as_bytebuffer(6, 8); } - public ByteBuffer indicesStridesInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 8); } - /** - * The location and size of the indices matrix's data - */ - public org.apache.arrow.flatbuf.Buffer indicesBuffer() { return indicesBuffer(new org.apache.arrow.flatbuf.Buffer()); } - public org.apache.arrow.flatbuf.Buffer indicesBuffer(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(8); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; } - /** - * This flag is true if and only if the indices matrix is sorted in - * row-major order, and does not have duplicated entries. - * This sort order is the same as of Tensorflow's SparseTensor, - * but it is inverse order of SciPy's canonical coo_matrix - * (SciPy employs column-major order for its coo_matrix). - */ - public boolean isCanonical() { int o = __offset(10); return o != 0 ? 0!=bb.get(o + bb_pos) : false; } - - public static void startSparseTensorIndexCOO(FlatBufferBuilder builder) { builder.startTable(4); } - public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(0, indicesTypeOffset, 0); } - public static void addIndicesStrides(FlatBufferBuilder builder, int indicesStridesOffset) { builder.addOffset(1, indicesStridesOffset, 0); } - public static int createIndicesStridesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); } - public static void startIndicesStridesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); } - public static void addIndicesBuffer(FlatBufferBuilder builder, int indicesBufferOffset) { builder.addStruct(2, indicesBufferOffset, 0); } - public static void addIsCanonical(FlatBufferBuilder builder, boolean isCanonical) { builder.addBoolean(3, isCanonical, false); } - public static int endSparseTensorIndexCOO(FlatBufferBuilder builder) { - int o = builder.endTable(); - builder.required(o, 4); // indicesType - builder.required(o, 8); // indicesBuffer - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public SparseTensorIndexCOO get(int j) { return get(new SparseTensorIndexCOO(), j); } - public SparseTensorIndexCOO get(SparseTensorIndexCOO obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java b/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java deleted file mode 100644 index 91bb3574fd078..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/SparseTensorIndexCSF.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Compressed Sparse Fiber (CSF) sparse tensor index. - */ -@SuppressWarnings("unused") -public final class SparseTensorIndexCSF extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static SparseTensorIndexCSF getRootAsSparseTensorIndexCSF(ByteBuffer _bb) { return getRootAsSparseTensorIndexCSF(_bb, new SparseTensorIndexCSF()); } - public static SparseTensorIndexCSF getRootAsSparseTensorIndexCSF(ByteBuffer _bb, SparseTensorIndexCSF obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public SparseTensorIndexCSF __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * CSF is a generalization of compressed sparse row (CSR) index. - * See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) - * - * CSF index recursively compresses each dimension of a tensor into a set - * of prefix trees. Each path from a root to leaf forms one tensor - * non-zero index. CSF is implemented with two arrays of buffers and one - * arrays of integers. - * - * For example, let X be a 2x3x4x5 tensor and let it have the following - * 8 non-zero values: - * ```text - * X[0, 0, 0, 1] := 1 - * X[0, 0, 0, 2] := 2 - * X[0, 1, 0, 0] := 3 - * X[0, 1, 0, 2] := 4 - * X[0, 1, 1, 0] := 5 - * X[1, 1, 1, 0] := 6 - * X[1, 1, 1, 1] := 7 - * X[1, 1, 1, 2] := 8 - * ``` - * As a prefix tree this would be represented as: - * ```text - * 0 1 - * / \ | - * 0 1 1 - * / / \ | - * 0 0 1 1 - * /| /| | /| | - * 1 2 0 2 0 0 1 2 - * ``` - * The type of values in indptrBuffers - */ - public org.apache.arrow.flatbuf.Int indptrType() { return indptrType(new org.apache.arrow.flatbuf.Int()); } - public org.apache.arrow.flatbuf.Int indptrType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(4); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * indptrBuffers stores the sparsity structure. - * Each two consecutive dimensions in a tensor correspond to a buffer in - * indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` - * and `indptrBuffers[dim][i + 1]` signify a range of nodes in - * `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. - * - * For example, the indptrBuffers for the above X is: - * ```text - * indptrBuffer(X) = [ - * [0, 2, 3], - * [0, 1, 3, 4], - * [0, 2, 4, 5, 8] - * ]. - * ``` - */ - public org.apache.arrow.flatbuf.Buffer indptrBuffers(int j) { return indptrBuffers(new org.apache.arrow.flatbuf.Buffer(), j); } - public org.apache.arrow.flatbuf.Buffer indptrBuffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; } - public int indptrBuffersLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.Buffer.Vector indptrBuffersVector() { return indptrBuffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); } - public org.apache.arrow.flatbuf.Buffer.Vector indptrBuffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; } - /** - * The type of values in indicesBuffers - */ - public org.apache.arrow.flatbuf.Int indicesType() { return indicesType(new org.apache.arrow.flatbuf.Int()); } - public org.apache.arrow.flatbuf.Int indicesType(org.apache.arrow.flatbuf.Int obj) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(o + bb_pos), bb) : null; } - /** - * indicesBuffers stores values of nodes. - * Each tensor dimension corresponds to a buffer in indicesBuffers. - * For example, the indicesBuffers for the above X is: - * ```text - * indicesBuffer(X) = [ - * [0, 1], - * [0, 1, 1], - * [0, 0, 1, 1], - * [1, 2, 0, 2, 0, 0, 1, 2] - * ]. - * ``` - */ - public org.apache.arrow.flatbuf.Buffer indicesBuffers(int j) { return indicesBuffers(new org.apache.arrow.flatbuf.Buffer(), j); } - public org.apache.arrow.flatbuf.Buffer indicesBuffers(org.apache.arrow.flatbuf.Buffer obj, int j) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o) + j * 16, bb) : null; } - public int indicesBuffersLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.Buffer.Vector indicesBuffersVector() { return indicesBuffersVector(new org.apache.arrow.flatbuf.Buffer.Vector()); } - public org.apache.arrow.flatbuf.Buffer.Vector indicesBuffersVector(org.apache.arrow.flatbuf.Buffer.Vector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), 16, bb) : null; } - /** - * axisOrder stores the sequence in which dimensions were traversed to - * produce the prefix tree. - * For example, the axisOrder for the above X is: - * ```text - * axisOrder(X) = [0, 1, 2, 3]. - * ``` - */ - public int axisOrder(int j) { int o = __offset(12); return o != 0 ? bb.getInt(__vector(o) + j * 4) : 0; } - public int axisOrderLength() { int o = __offset(12); return o != 0 ? __vector_len(o) : 0; } - public IntVector axisOrderVector() { return axisOrderVector(new IntVector()); } - public IntVector axisOrderVector(IntVector obj) { int o = __offset(12); return o != 0 ? obj.__assign(__vector(o), bb) : null; } - public ByteBuffer axisOrderAsByteBuffer() { return __vector_as_bytebuffer(12, 4); } - public ByteBuffer axisOrderInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 12, 4); } - - public static int createSparseTensorIndexCSF(FlatBufferBuilder builder, - int indptrTypeOffset, - int indptrBuffersOffset, - int indicesTypeOffset, - int indicesBuffersOffset, - int axisOrderOffset) { - builder.startTable(5); - SparseTensorIndexCSF.addAxisOrder(builder, axisOrderOffset); - SparseTensorIndexCSF.addIndicesBuffers(builder, indicesBuffersOffset); - SparseTensorIndexCSF.addIndicesType(builder, indicesTypeOffset); - SparseTensorIndexCSF.addIndptrBuffers(builder, indptrBuffersOffset); - SparseTensorIndexCSF.addIndptrType(builder, indptrTypeOffset); - return SparseTensorIndexCSF.endSparseTensorIndexCSF(builder); - } - - public static void startSparseTensorIndexCSF(FlatBufferBuilder builder) { builder.startTable(5); } - public static void addIndptrType(FlatBufferBuilder builder, int indptrTypeOffset) { builder.addOffset(0, indptrTypeOffset, 0); } - public static void addIndptrBuffers(FlatBufferBuilder builder, int indptrBuffersOffset) { builder.addOffset(1, indptrBuffersOffset, 0); } - public static void startIndptrBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); } - public static void addIndicesType(FlatBufferBuilder builder, int indicesTypeOffset) { builder.addOffset(2, indicesTypeOffset, 0); } - public static void addIndicesBuffers(FlatBufferBuilder builder, int indicesBuffersOffset) { builder.addOffset(3, indicesBuffersOffset, 0); } - public static void startIndicesBuffersVector(FlatBufferBuilder builder, int numElems) { builder.startVector(16, numElems, 8); } - public static void addAxisOrder(FlatBufferBuilder builder, int axisOrderOffset) { builder.addOffset(4, axisOrderOffset, 0); } - public static int createAxisOrderVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addInt(data[i]); return builder.endVector(); } - public static void startAxisOrderVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static int endSparseTensorIndexCSF(FlatBufferBuilder builder) { - int o = builder.endTable(); - builder.required(o, 4); // indptrType - builder.required(o, 6); // indptrBuffers - builder.required(o, 8); // indicesType - builder.required(o, 10); // indicesBuffers - builder.required(o, 12); // axisOrder - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public SparseTensorIndexCSF get(int j) { return get(new SparseTensorIndexCSF(), j); } - public SparseTensorIndexCSF get(SparseTensorIndexCSF obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java deleted file mode 100644 index 3af1c91f08c80..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Struct_.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * A Struct_ in the flatbuffer metadata is the same as an Arrow Struct - * (according to the physical memory layout). We used Struct_ here as - * Struct is a reserved word in Flatbuffers - */ -@SuppressWarnings("unused") -public final class Struct_ extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Struct_ getRootAsStruct_(ByteBuffer _bb) { return getRootAsStruct_(_bb, new Struct_()); } - public static Struct_ getRootAsStruct_(ByteBuffer _bb, Struct_ obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Struct_ __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startStruct_(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endStruct_(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Struct_ get(int j) { return get(new Struct_(), j); } - public Struct_ get(Struct_ obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java deleted file mode 100644 index 4fd46467389b7..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Tensor.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -@SuppressWarnings("unused") -public final class Tensor extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Tensor getRootAsTensor(ByteBuffer _bb) { return getRootAsTensor(_bb, new Tensor()); } - public static Tensor getRootAsTensor(ByteBuffer _bb, Tensor obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Tensor __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public byte typeType() { int o = __offset(4); return o != 0 ? bb.get(o + bb_pos) : 0; } - /** - * The type of data contained in a value cell. Currently only fixed-width - * value types are supported, no strings or nested types - */ - public Table type(Table obj) { int o = __offset(6); return o != 0 ? __union(obj, o + bb_pos) : null; } - /** - * The dimensions of the tensor, optionally named - */ - public org.apache.arrow.flatbuf.TensorDim shape(int j) { return shape(new org.apache.arrow.flatbuf.TensorDim(), j); } - public org.apache.arrow.flatbuf.TensorDim shape(org.apache.arrow.flatbuf.TensorDim obj, int j) { int o = __offset(8); return o != 0 ? obj.__assign(__indirect(__vector(o) + j * 4), bb) : null; } - public int shapeLength() { int o = __offset(8); return o != 0 ? __vector_len(o) : 0; } - public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector() { return shapeVector(new org.apache.arrow.flatbuf.TensorDim.Vector()); } - public org.apache.arrow.flatbuf.TensorDim.Vector shapeVector(org.apache.arrow.flatbuf.TensorDim.Vector obj) { int o = __offset(8); return o != 0 ? obj.__assign(__vector(o), 4, bb) : null; } - /** - * Non-negative byte offsets to advance one value cell along each dimension - * If omitted, default to row-major order (C-like). - */ - public long strides(int j) { int o = __offset(10); return o != 0 ? bb.getLong(__vector(o) + j * 8) : 0; } - public int stridesLength() { int o = __offset(10); return o != 0 ? __vector_len(o) : 0; } - public LongVector stridesVector() { return stridesVector(new LongVector()); } - public LongVector stridesVector(LongVector obj) { int o = __offset(10); return o != 0 ? obj.__assign(__vector(o), bb) : null; } - public ByteBuffer stridesAsByteBuffer() { return __vector_as_bytebuffer(10, 8); } - public ByteBuffer stridesInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 10, 8); } - /** - * The location and size of the tensor's data - */ - public org.apache.arrow.flatbuf.Buffer data() { return data(new org.apache.arrow.flatbuf.Buffer()); } - public org.apache.arrow.flatbuf.Buffer data(org.apache.arrow.flatbuf.Buffer obj) { int o = __offset(12); return o != 0 ? obj.__assign(o + bb_pos, bb) : null; } - - public static void startTensor(FlatBufferBuilder builder) { builder.startTable(5); } - public static void addTypeType(FlatBufferBuilder builder, byte typeType) { builder.addByte(0, typeType, 0); } - public static void addType(FlatBufferBuilder builder, int typeOffset) { builder.addOffset(1, typeOffset, 0); } - public static void addShape(FlatBufferBuilder builder, int shapeOffset) { builder.addOffset(2, shapeOffset, 0); } - public static int createShapeVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addOffset(data[i]); return builder.endVector(); } - public static void startShapeVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static void addStrides(FlatBufferBuilder builder, int stridesOffset) { builder.addOffset(3, stridesOffset, 0); } - public static int createStridesVector(FlatBufferBuilder builder, long[] data) { builder.startVector(8, data.length, 8); for (int i = data.length - 1; i >= 0; i--) builder.addLong(data[i]); return builder.endVector(); } - public static void startStridesVector(FlatBufferBuilder builder, int numElems) { builder.startVector(8, numElems, 8); } - public static void addData(FlatBufferBuilder builder, int dataOffset) { builder.addStruct(4, dataOffset, 0); } - public static int endTensor(FlatBufferBuilder builder) { - int o = builder.endTable(); - builder.required(o, 6); // type - builder.required(o, 8); // shape - builder.required(o, 12); // data - return o; - } - public static void finishTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finish(offset); } - public static void finishSizePrefixedTensorBuffer(FlatBufferBuilder builder, int offset) { builder.finishSizePrefixed(offset); } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Tensor get(int j) { return get(new Tensor(), j); } - public Tensor get(Tensor obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java b/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java deleted file mode 100644 index 1ebaa8c4339ff..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/TensorDim.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * ---------------------------------------------------------------------- - * Data structures for dense tensors - * Shape data for a single axis in a tensor - */ -@SuppressWarnings("unused") -public final class TensorDim extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static TensorDim getRootAsTensorDim(ByteBuffer _bb) { return getRootAsTensorDim(_bb, new TensorDim()); } - public static TensorDim getRootAsTensorDim(ByteBuffer _bb, TensorDim obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public TensorDim __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - /** - * Length of dimension - */ - public long size() { int o = __offset(4); return o != 0 ? bb.getLong(o + bb_pos) : 0L; } - /** - * Name of the dimension, optional - */ - public String name() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; } - public ByteBuffer nameAsByteBuffer() { return __vector_as_bytebuffer(6, 1); } - public ByteBuffer nameInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); } - - public static int createTensorDim(FlatBufferBuilder builder, - long size, - int nameOffset) { - builder.startTable(2); - TensorDim.addSize(builder, size); - TensorDim.addName(builder, nameOffset); - return TensorDim.endTensorDim(builder); - } - - public static void startTensorDim(FlatBufferBuilder builder) { builder.startTable(2); } - public static void addSize(FlatBufferBuilder builder, long size) { builder.addLong(0, size, 0L); } - public static void addName(FlatBufferBuilder builder, int nameOffset) { builder.addOffset(1, nameOffset, 0); } - public static int endTensorDim(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public TensorDim get(int j) { return get(new TensorDim(), j); } - public TensorDim get(TensorDim obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java deleted file mode 100644 index 63bee08ce64e4..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Time.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Time is either a 32-bit or 64-bit signed integer type representing an - * elapsed time since midnight, stored in either of four units: seconds, - * milliseconds, microseconds or nanoseconds. - * - * The integer `bitWidth` depends on the `unit` and must be one of the following: - * * SECOND and MILLISECOND: 32 bits - * * MICROSECOND and NANOSECOND: 64 bits - * - * The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds - * (exclusive), adjusted for the time unit (for example, up to 86400000 - * exclusive for the MILLISECOND unit). - * This definition doesn't allow for leap seconds. Time values from - * measurements with leap seconds will need to be corrected when ingesting - * into Arrow (for example by replacing the value 86400 with 86399). - */ -@SuppressWarnings("unused") -public final class Time extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Time getRootAsTime(ByteBuffer _bb) { return getRootAsTime(_bb, new Time()); } - public static Time getRootAsTime(ByteBuffer _bb, Time obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Time __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 1; } - public int bitWidth() { int o = __offset(6); return o != 0 ? bb.getInt(o + bb_pos) : 32; } - - public static int createTime(FlatBufferBuilder builder, - short unit, - int bitWidth) { - builder.startTable(2); - Time.addBitWidth(builder, bitWidth); - Time.addUnit(builder, unit); - return Time.endTime(builder); - } - - public static void startTime(FlatBufferBuilder builder) { builder.startTable(2); } - public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 1); } - public static void addBitWidth(FlatBufferBuilder builder, int bitWidth) { builder.addInt(1, bitWidth, 32); } - public static int endTime(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Time get(int j) { return get(new Time(), j); } - public Time get(Time obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java b/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java deleted file mode 100644 index 0df6867609b61..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/TimeUnit.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class TimeUnit { - private TimeUnit() { } - public static final short SECOND = 0; - public static final short MILLISECOND = 1; - public static final short MICROSECOND = 2; - public static final short NANOSECOND = 3; - - public static final String[] names = { "SECOND", "MILLISECOND", "MICROSECOND", "NANOSECOND", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java deleted file mode 100644 index d45d866daf3e1..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Timestamp.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Timestamp is a 64-bit signed integer representing an elapsed time since a - * fixed epoch, stored in either of four units: seconds, milliseconds, - * microseconds or nanoseconds, and is optionally annotated with a timezone. - * - * Timestamp values do not include any leap seconds (in other words, all - * days are considered 86400 seconds long). - * - * Timestamps with a non-empty timezone - * ------------------------------------ - * - * If a Timestamp column has a non-empty timezone value, its epoch is - * 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone - * (the Unix epoch), regardless of the Timestamp's own timezone. - * - * Therefore, timestamp values with a non-empty timezone correspond to - * physical points in time together with some additional information about - * how the data was obtained and/or how to display it (the timezone). - * - * For example, the timestamp value 0 with the timezone string "Europe/Paris" - * corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the - * application may prefer to display it as "January 1st 1970, 01h00" in - * the Europe/Paris timezone (which is the same physical point in time). - * - * One consequence is that timestamp values with a non-empty timezone - * can be compared and ordered directly, since they all share the same - * well-known point of reference (the Unix epoch). - * - * Timestamps with an unset / empty timezone - * ----------------------------------------- - * - * If a Timestamp column has no timezone value, its epoch is - * 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. - * - * Therefore, timestamp values without a timezone cannot be meaningfully - * interpreted as physical points in time, but only as calendar / clock - * indications ("wall clock time") in an unspecified timezone. - * - * For example, the timestamp value 0 with an empty timezone string - * corresponds to "January 1st 1970, 00h00" in an unknown timezone: there - * is not enough information to interpret it as a well-defined physical - * point in time. - * - * One consequence is that timestamp values without a timezone cannot - * be reliably compared or ordered, since they may have different points of - * reference. In particular, it is *not* possible to interpret an unset - * or empty timezone as the same as "UTC". - * - * Conversion between timezones - * ---------------------------- - * - * If a Timestamp column has a non-empty timezone, changing the timezone - * to a different non-empty value is a metadata-only operation: - * the timestamp values need not change as their point of reference remains - * the same (the Unix epoch). - * - * However, if a Timestamp column has no timezone value, changing it to a - * non-empty value requires to think about the desired semantics. - * One possibility is to assume that the original timestamp values are - * relative to the epoch of the timezone being set; timestamp values should - * then adjusted to the Unix epoch (for example, changing the timezone from - * empty to "Europe/Paris" would require converting the timestamp values - * from "Europe/Paris" to "UTC", which seems counter-intuitive but is - * nevertheless correct). - * - * Guidelines for encoding data from external libraries - * ---------------------------------------------------- - * - * Date & time libraries often have multiple different data types for temporal - * data. In order to ease interoperability between different implementations the - * Arrow project has some recommendations for encoding these types into a Timestamp - * column. - * - * An "instant" represents a physical point in time that has no relevant timezone - * (for example, astronomical data). To encode an instant, use a Timestamp with - * the timezone string set to "UTC", and make sure the Timestamp values - * are relative to the UTC epoch (January 1st 1970, midnight). - * - * A "zoned date-time" represents a physical point in time annotated with an - * informative timezone (for example, the timezone in which the data was - * recorded). To encode a zoned date-time, use a Timestamp with the timezone - * string set to the name of the timezone, and make sure the Timestamp values - * are relative to the UTC epoch (January 1st 1970, midnight). - * - * (There is some ambiguity between an instant and a zoned date-time with the - * UTC timezone. Both of these are stored the same in Arrow. Typically, - * this distinction does not matter. If it does, then an application should - * use custom metadata or an extension type to distinguish between the two cases.) - * - * An "offset date-time" represents a physical point in time combined with an - * explicit offset from UTC. To encode an offset date-time, use a Timestamp - * with the timezone string set to the numeric timezone offset string - * (e.g. "+03:00"), and make sure the Timestamp values are relative to - * the UTC epoch (January 1st 1970, midnight). - * - * A "naive date-time" (also called "local date-time" in some libraries) - * represents a wall clock time combined with a calendar date, but with - * no indication of how to map this information to a physical point in time. - * Naive date-times must be handled with care because of this missing - * information, and also because daylight saving time (DST) may make - * some values ambiguous or nonexistent. A naive date-time may be - * stored as a struct with Date and Time fields. However, it may also be - * encoded into a Timestamp column with an empty timezone. The timestamp - * values should be computed "as if" the timezone of the date-time values - * was UTC; for example, the naive date-time "January 1st 1970, 00h00" would - * be encoded as timestamp value 0. - */ -@SuppressWarnings("unused") -public final class Timestamp extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Timestamp getRootAsTimestamp(ByteBuffer _bb) { return getRootAsTimestamp(_bb, new Timestamp()); } - public static Timestamp getRootAsTimestamp(ByteBuffer _bb, Timestamp obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Timestamp __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short unit() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - /** - * The timezone is an optional string indicating the name of a timezone, - * one of: - * - * * As used in the Olson timezone database (the "tz database" or - * "tzdata"), such as "America/New_York". - * * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", - * such as "+07:30". - * - * Whether a timezone string is present indicates different semantics about - * the data (see above). - */ - public String timezone() { int o = __offset(6); return o != 0 ? __string(o + bb_pos) : null; } - public ByteBuffer timezoneAsByteBuffer() { return __vector_as_bytebuffer(6, 1); } - public ByteBuffer timezoneInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 1); } - - public static int createTimestamp(FlatBufferBuilder builder, - short unit, - int timezoneOffset) { - builder.startTable(2); - Timestamp.addTimezone(builder, timezoneOffset); - Timestamp.addUnit(builder, unit); - return Timestamp.endTimestamp(builder); - } - - public static void startTimestamp(FlatBufferBuilder builder) { builder.startTable(2); } - public static void addUnit(FlatBufferBuilder builder, short unit) { builder.addShort(0, unit, 0); } - public static void addTimezone(FlatBufferBuilder builder, int timezoneOffset) { builder.addOffset(1, timezoneOffset, 0); } - public static int endTimestamp(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Timestamp get(int j) { return get(new Timestamp(), j); } - public Timestamp get(Timestamp obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java deleted file mode 100644 index ba0f665f647d4..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Type.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -/** - * ---------------------------------------------------------------------- - * Top-level Type value, enabling extensible type-specific metadata. We can - * add new logical types to Type without breaking backwards compatibility - */ -@SuppressWarnings("unused") -public final class Type { - private Type() { } - public static final byte NONE = 0; - public static final byte Null = 1; - public static final byte Int = 2; - public static final byte FloatingPoint = 3; - public static final byte Binary = 4; - public static final byte Utf8 = 5; - public static final byte Bool = 6; - public static final byte Decimal = 7; - public static final byte Date = 8; - public static final byte Time = 9; - public static final byte Timestamp = 10; - public static final byte Interval = 11; - public static final byte List = 12; - public static final byte Struct_ = 13; - public static final byte Union = 14; - public static final byte FixedSizeBinary = 15; - public static final byte FixedSizeList = 16; - public static final byte Map = 17; - public static final byte Duration = 18; - public static final byte LargeBinary = 19; - public static final byte LargeUtf8 = 20; - public static final byte LargeList = 21; - public static final byte RunEndEncoded = 22; - public static final byte BinaryView = 23; - public static final byte Utf8View = 24; - public static final byte ListView = 25; - public static final byte LargeListView = 26; - - public static final String[] names = { "NONE", "Null", "Int", "FloatingPoint", "Binary", "Utf8", "Bool", "Decimal", "Date", "Time", "Timestamp", "Interval", "List", "Struct_", "Union", "FixedSizeBinary", "FixedSizeList", "Map", "Duration", "LargeBinary", "LargeUtf8", "LargeList", "RunEndEncoded", "BinaryView", "Utf8View", "ListView", "LargeListView", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java deleted file mode 100644 index 70f007833d14f..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Union.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * A union is a complex type with children in Field - * By default ids in the type vector refer to the offsets in the children - * optionally typeIds provides an indirection between the child offset and the type id - * for each child `typeIds[offset]` is the id used in the type vector - */ -@SuppressWarnings("unused") -public final class Union extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Union getRootAsUnion(ByteBuffer _bb) { return getRootAsUnion(_bb, new Union()); } - public static Union getRootAsUnion(ByteBuffer _bb, Union obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Union __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - public short mode() { int o = __offset(4); return o != 0 ? bb.getShort(o + bb_pos) : 0; } - public int typeIds(int j) { int o = __offset(6); return o != 0 ? bb.getInt(__vector(o) + j * 4) : 0; } - public int typeIdsLength() { int o = __offset(6); return o != 0 ? __vector_len(o) : 0; } - public IntVector typeIdsVector() { return typeIdsVector(new IntVector()); } - public IntVector typeIdsVector(IntVector obj) { int o = __offset(6); return o != 0 ? obj.__assign(__vector(o), bb) : null; } - public ByteBuffer typeIdsAsByteBuffer() { return __vector_as_bytebuffer(6, 4); } - public ByteBuffer typeIdsInByteBuffer(ByteBuffer _bb) { return __vector_in_bytebuffer(_bb, 6, 4); } - - public static int createUnion(FlatBufferBuilder builder, - short mode, - int typeIdsOffset) { - builder.startTable(2); - Union.addTypeIds(builder, typeIdsOffset); - Union.addMode(builder, mode); - return Union.endUnion(builder); - } - - public static void startUnion(FlatBufferBuilder builder) { builder.startTable(2); } - public static void addMode(FlatBufferBuilder builder, short mode) { builder.addShort(0, mode, 0); } - public static void addTypeIds(FlatBufferBuilder builder, int typeIdsOffset) { builder.addOffset(1, typeIdsOffset, 0); } - public static int createTypeIdsVector(FlatBufferBuilder builder, int[] data) { builder.startVector(4, data.length, 4); for (int i = data.length - 1; i >= 0; i--) builder.addInt(data[i]); return builder.endVector(); } - public static void startTypeIdsVector(FlatBufferBuilder builder, int numElems) { builder.startVector(4, numElems, 4); } - public static int endUnion(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Union get(int j) { return get(new Union(), j); } - public Union get(Union obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java b/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java deleted file mode 100644 index 3f3eb8c728339..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/UnionMode.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -@SuppressWarnings("unused") -public final class UnionMode { - private UnionMode() { } - public static final short Sparse = 0; - public static final short Dense = 1; - - public static final String[] names = { "Sparse", "Dense", }; - - public static String name(int e) { return names[e]; } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java deleted file mode 100644 index 62e2eb45d5e63..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Unicode with UTF-8 encoding - */ -@SuppressWarnings("unused") -public final class Utf8 extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Utf8 getRootAsUtf8(ByteBuffer _bb) { return getRootAsUtf8(_bb, new Utf8()); } - public static Utf8 getRootAsUtf8(ByteBuffer _bb, Utf8 obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Utf8 __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startUtf8(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endUtf8(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Utf8 get(int j) { return get(new Utf8(), j); } - public Utf8 get(Utf8 obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java b/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java deleted file mode 100644 index 792969563507a..0000000000000 --- a/java/format/src/main/java/org/apache/arrow/flatbuf/Utf8View.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.flatbuf; - -import com.google.flatbuffers.BaseVector; -import com.google.flatbuffers.BooleanVector; -import com.google.flatbuffers.ByteVector; -import com.google.flatbuffers.Constants; -import com.google.flatbuffers.DoubleVector; -import com.google.flatbuffers.FlatBufferBuilder; -import com.google.flatbuffers.FloatVector; -import com.google.flatbuffers.IntVector; -import com.google.flatbuffers.LongVector; -import com.google.flatbuffers.ShortVector; -import com.google.flatbuffers.StringVector; -import com.google.flatbuffers.Struct; -import com.google.flatbuffers.Table; -import com.google.flatbuffers.UnionVector; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -/** - * Logically the same as Utf8, but the internal representation uses a view - * struct that contains the string length and either the string's entire data - * inline (for small strings) or an inlined prefix, an index of another buffer, - * and an offset pointing to a slice in that buffer (for non-small strings). - * - * Since it uses a variable number of data buffers, each Field with this type - * must have a corresponding entry in `variadicBufferCounts`. - */ -@SuppressWarnings("unused") -public final class Utf8View extends Table { - public static void ValidateVersion() { Constants.FLATBUFFERS_24_3_25(); } - public static Utf8View getRootAsUtf8View(ByteBuffer _bb) { return getRootAsUtf8View(_bb, new Utf8View()); } - public static Utf8View getRootAsUtf8View(ByteBuffer _bb, Utf8View obj) { _bb.order(ByteOrder.LITTLE_ENDIAN); return (obj.__assign(_bb.getInt(_bb.position()) + _bb.position(), _bb)); } - public void __init(int _i, ByteBuffer _bb) { __reset(_i, _bb); } - public Utf8View __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; } - - - public static void startUtf8View(FlatBufferBuilder builder) { builder.startTable(0); } - public static int endUtf8View(FlatBufferBuilder builder) { - int o = builder.endTable(); - return o; - } - - public static final class Vector extends BaseVector { - public Vector __assign(int _vector, int _element_size, ByteBuffer _bb) { __reset(_vector, _element_size, _bb); return this; } - - public Utf8View get(int j) { return get(new Utf8View(), j); } - public Utf8View get(Utf8View obj, int j) { return obj.__assign(__indirect(__element(j), bb), bb); } - } -} - diff --git a/java/gandiva/CMakeLists.txt b/java/gandiva/CMakeLists.txt deleted file mode 100644 index 369829d7a30d5..0000000000000 --- a/java/gandiva/CMakeLists.txt +++ /dev/null @@ -1,94 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -find_package(Gandiva REQUIRED) - -include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} - ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) - -add_jar(arrow_java_jni_gandiva_jar - src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java - src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java - src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java - src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java - GENERATE_NATIVE_HEADERS - arrow_java_jni_gandiva_headers) - -set(GANDIVA_PROTO_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) -set(GANDIVA_PROTO_OUTPUT_FILES "${GANDIVA_PROTO_OUTPUT_DIR}/gandiva/types.pb.cc" - "${GANDIVA_PROTO_OUTPUT_DIR}/gandiva/types.pb.h") - -set_source_files_properties(${GANDIVA_PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE) - -set(GANDIVA_PROTO_DIR ${CMAKE_CURRENT_SOURCE_DIR}/proto) -get_filename_component(GANDIVA_PROTO_FILE_ABSOLUTE - ${GANDIVA_PROTO_DIR}/gandiva/types.proto ABSOLUTE) - -find_package(Protobuf REQUIRED) -add_custom_command(OUTPUT ${GANDIVA_PROTO_OUTPUT_FILES} - COMMAND protobuf::protoc --proto_path ${GANDIVA_PROTO_DIR} --cpp_out - ${GANDIVA_PROTO_OUTPUT_DIR} ${GANDIVA_PROTO_FILE_ABSOLUTE} - DEPENDS ${GANDIVA_PROTO_FILE_ABSOLUTE} - COMMENT "Running Protobuf compiler on gandiva/types.proto" - VERBATIM) - -add_custom_target(garrow_java_jni_gandiva_proto ALL DEPENDS ${GANDIVA_PROTO_OUTPUT_FILES}) -add_library(arrow_java_jni_gandiva SHARED - src/main/cpp/config_builder.cc - src/main/cpp/config_holder.cc - src/main/cpp/expression_registry_helper.cc - src/main/cpp/jni_common.cc - ${GANDIVA_PROTO_OUTPUT_FILES}) -set_property(TARGET arrow_java_jni_gandiva PROPERTY OUTPUT_NAME "gandiva_jni") -target_link_libraries(arrow_java_jni_gandiva - arrow_java_jni_gandiva_headers - jni - protobuf::libprotobuf - Gandiva::gandiva_static) - -# Localize thirdparty symbols using a linker version script. This hides them -# from the client application. The OS X linker does not support the -# version-script option. -if(CMAKE_VERSION VERSION_LESS 3.18) - if(APPLE OR WIN32) - set(CXX_LINKER_SUPPORTS_VERSION_SCRIPT FALSE) - else() - set(CXX_LINKER_SUPPORTS_VERSION_SCRIPT TRUE) - endif() -else() - include(CheckLinkerFlag) - check_linker_flag(CXX - "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/main/cpp/symbols.map" - CXX_LINKER_SUPPORTS_VERSION_SCRIPT) -endif() -# filter out everything that is not needed for the jni bridge -# statically linked stdc++ has conflicts with stdc++ loaded by other libraries. -if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT) - set_target_properties(arrow_java_jni_gandiva - PROPERTIES LINK_FLAGS - "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/main/cpp/symbols.map" - ) -endif() - -set(ARROW_JAVA_JNI_GANDIVA_LIBDIR - "${CMAKE_INSTALL_PREFIX}/lib/gandiva_jni/${ARROW_JAVA_JNI_ARCH_DIR}") -set(ARROW_JAVA_JNI_GANDIVA_BINDIR - "${CMAKE_INSTALL_PREFIX}/bin/gandiva_jni/${ARROW_JAVA_JNI_ARCH_DIR}") - -install(TARGETS arrow_java_jni_gandiva - LIBRARY DESTINATION ${ARROW_JAVA_JNI_GANDIVA_LIBDIR} - RUNTIME DESTINATION ${ARROW_JAVA_JNI_GANDIVA_BINDIR}) diff --git a/java/gandiva/README.md b/java/gandiva/README.md deleted file mode 100644 index 22a292eaff028..0000000000000 --- a/java/gandiva/README.md +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -# Gandiva Java - -## Setup Build Environment - -install: - - java 7 or later - - maven 3.3 or later - -## Building and running tests - -``` -cd java -mvn install -Dgandiva.cpp.build.dir= -``` diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml deleted file mode 100644 index 5367bfdedfdff..0000000000000 --- a/java/gandiva/pom.xml +++ /dev/null @@ -1,145 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - - org.apache.arrow.gandiva - arrow-gandiva - jar - Arrow Gandiva - Java wrappers around the native Gandiva SQL expression compiler. - - - true - ../../../cpp/release-build - - - - - org.apache.arrow - arrow-memory-core - - - org.immutables - value-annotations - - - org.apache.arrow - arrow-memory-netty - test - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - com.google.protobuf - protobuf-java - - - com.google.guava - guava - - - org.slf4j - slf4j-api - - - - - - - ${arrow.cpp.build.dir} - - **/*gandiva_jni.* - - - - - - org.xolstice.maven.plugins - protobuf-maven-plugin - - - src - - compile - - - proto - - - - - - - - - release - - - - org.apache.maven.plugins - maven-source-plugin - - - attach-sources - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - - attach-javadocs - - jar - - - - - - org.apache.maven.plugins - maven-gpg-plugin - - - sign-artifacts - - sign - - verify - - - - - - - - diff --git a/java/gandiva/proto/gandiva/types.proto b/java/gandiva/proto/gandiva/types.proto deleted file mode 100644 index 4ce342681d614..0000000000000 --- a/java/gandiva/proto/gandiva/types.proto +++ /dev/null @@ -1,255 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -syntax = "proto3"; -package gandiva.types; - -option java_package = "org.apache.arrow.gandiva.ipc"; -option java_outer_classname = "GandivaTypes"; -option optimize_for = SPEED; - -enum GandivaType { - NONE = 0; // arrow::Type::NA - BOOL = 1; // arrow::Type::BOOL - UINT8 = 2; // arrow::Type::UINT8 - INT8 = 3; // arrow::Type::INT8 - UINT16 = 4; // represents arrow::Type fields in src/arrow/type.h - INT16 = 5; - UINT32 = 6; - INT32 = 7; - UINT64 = 8; - INT64 = 9; - HALF_FLOAT = 10; - FLOAT = 11; - DOUBLE = 12; - UTF8 = 13; - BINARY = 14; - FIXED_SIZE_BINARY = 15; - DATE32 = 16; - DATE64 = 17; - TIMESTAMP = 18; - TIME32 = 19; - TIME64 = 20; - INTERVAL = 21; - DECIMAL = 22; - LIST = 23; - STRUCT = 24; - UNION = 25; - DICTIONARY = 26; - MAP = 27; -} - -enum DateUnit { - DAY = 0; - MILLI = 1; -} - -enum TimeUnit { - SEC = 0; - MILLISEC = 1; - MICROSEC = 2; - NANOSEC = 3; -} - -enum IntervalType { - YEAR_MONTH = 0; - DAY_TIME = 1; -} - -enum SelectionVectorType { - SV_NONE = 0; - SV_INT16 = 1; - SV_INT32 = 2; -} - -message ExtGandivaType { - optional GandivaType type = 1; - optional uint32 width = 2; // used by FIXED_SIZE_BINARY - optional int32 precision = 3; // used by DECIMAL - optional int32 scale = 4; // used by DECIMAL - optional DateUnit dateUnit = 5; // used by DATE32/DATE64 - optional TimeUnit timeUnit = 6; // used by TIME32/TIME64 - optional string timeZone = 7; // used by TIMESTAMP - optional IntervalType intervalType = 8; // used by INTERVAL -} - -message Field { - // name of the field - optional string name = 1; - optional ExtGandivaType type = 2; - optional bool nullable = 3; - // for complex data types like structs, unions - repeated Field children = 4; -} - -message FieldNode { - optional Field field = 1; -} - -message FunctionNode { - optional string functionName = 1; - repeated TreeNode inArgs = 2; - optional ExtGandivaType returnType = 3; -} - -message IfNode { - optional TreeNode cond = 1; - optional TreeNode thenNode = 2; - optional TreeNode elseNode = 3; - optional ExtGandivaType returnType = 4; -} - -message AndNode { - repeated TreeNode args = 1; -} - -message OrNode { - repeated TreeNode args = 1; -} - -message NullNode { - optional ExtGandivaType type = 1; -} - -message IntNode { - optional int32 value = 1; -} - -message FloatNode { - optional float value = 1; -} - -message DoubleNode { - optional double value = 1; -} - -message BooleanNode { - optional bool value = 1; -} - -message LongNode { - optional int64 value = 1; -} - -message StringNode { - optional bytes value = 1; -} - -message BinaryNode { - optional bytes value = 1; -} - -message DecimalNode { - optional string value = 1; - optional int32 precision = 2; - optional int32 scale = 3; -} - - -message TreeNode { - optional FieldNode fieldNode = 1; - optional FunctionNode fnNode = 2; - - // control expressions - optional IfNode ifNode = 6; - optional AndNode andNode = 7; - optional OrNode orNode = 8; - - // literals - optional NullNode nullNode = 11; - optional IntNode intNode = 12; - optional FloatNode floatNode = 13; - optional LongNode longNode = 14; - optional BooleanNode booleanNode = 15; - optional DoubleNode doubleNode = 16; - optional StringNode stringNode = 17; - optional BinaryNode binaryNode = 18; - optional DecimalNode decimalNode = 19; - - // in expr - optional InNode inNode = 21; -} - -message ExpressionRoot { - optional TreeNode root = 1; - optional Field resultType = 2; -} - -message ExpressionList { - repeated ExpressionRoot exprs = 2; -} - -message Condition { - optional TreeNode root = 1; -} - -message Schema { - repeated Field columns = 1; -} - -message GandivaDataTypes { - repeated ExtGandivaType dataType = 1; -} - -message GandivaFunctions { - repeated FunctionSignature function = 1; -} - -message FunctionSignature { - optional string name = 1; - optional ExtGandivaType returnType = 2; - repeated ExtGandivaType paramTypes = 3; -} - -message InNode { - optional TreeNode node = 1; - optional IntConstants intValues = 2; - optional LongConstants longValues = 3; - optional StringConstants stringValues = 4; - optional BinaryConstants binaryValues = 5; - optional DecimalConstants decimalValues = 6; - optional FloatConstants floatValues = 7; - optional DoubleConstants doubleValues = 8; -} - -message IntConstants { - repeated IntNode intValues = 1; -} - -message LongConstants { - repeated LongNode longValues = 1; -} - -message DecimalConstants { - repeated DecimalNode decimalValues = 1; -} - -message FloatConstants { - repeated FloatNode floatValues = 1; -} - -message DoubleConstants { - repeated DoubleNode doubleValues = 1; -} - -message StringConstants { - repeated StringNode stringValues = 1; -} - -message BinaryConstants { - repeated BinaryNode binaryValues = 1; -} diff --git a/java/gandiva/src/main/cpp/config_builder.cc b/java/gandiva/src/main/cpp/config_builder.cc deleted file mode 100644 index 85c661ee94330..0000000000000 --- a/java/gandiva/src/main/cpp/config_builder.cc +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "config_holder.h" -#include "env_helper.h" -#include "org_apache_arrow_gandiva_evaluator_ConfigurationBuilder.h" - -using gandiva::ConfigHolder; -using gandiva::Configuration; -using gandiva::ConfigurationBuilder; - -/* - * Class: org_apache_arrow_gandiva_evaluator_ConfigBuilder - * Method: buildConfigInstance - * Signature: (ZZ)J - */ -JNIEXPORT jlong JNICALL -Java_org_apache_arrow_gandiva_evaluator_ConfigurationBuilder_buildConfigInstance( - JNIEnv* env, jobject configuration, jboolean optimize, jboolean target_host_cpu) { - ConfigurationBuilder configuration_builder; - std::shared_ptr config = configuration_builder.build(); - config->set_optimize(optimize); - config->target_host_cpu(target_host_cpu); - return ConfigHolder::MapInsert(config); -} - -/* - * Class: org_apache_arrow_gandiva_evaluator_ConfigBuilder - * Method: releaseConfigInstance - * Signature: (J)V - */ -JNIEXPORT void JNICALL -Java_org_apache_arrow_gandiva_evaluator_ConfigurationBuilder_releaseConfigInstance( - JNIEnv* env, jobject configuration, jlong config_id) { - ConfigHolder::MapErase(config_id); -} diff --git a/java/gandiva/src/main/cpp/config_holder.h b/java/gandiva/src/main/cpp/config_holder.h deleted file mode 100644 index ae031495ab2e6..0000000000000 --- a/java/gandiva/src/main/cpp/config_holder.h +++ /dev/null @@ -1,68 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - -#include - -namespace gandiva { - -class ConfigHolder { - public: - static int64_t MapInsert(std::shared_ptr config) { - g_mtx_.lock(); - - int64_t result = config_id_++; - configuration_map_.insert( - std::pair>(result, config)); - - g_mtx_.unlock(); - return result; - } - - static void MapErase(int64_t config_id_) { - g_mtx_.lock(); - configuration_map_.erase(config_id_); - g_mtx_.unlock(); - } - - static std::shared_ptr MapLookup(int64_t config_id_) { - std::shared_ptr result = nullptr; - - try { - result = configuration_map_.at(config_id_); - } catch (const std::out_of_range&) { - } - - return result; - } - - private: - // map of configuration objects created so far - static std::unordered_map> configuration_map_; - - static std::mutex g_mtx_; - - // atomic counter for projector module ids - static int64_t config_id_; -}; -} // namespace gandiva diff --git a/java/gandiva/src/main/cpp/expression_registry_helper.cc b/java/gandiva/src/main/cpp/expression_registry_helper.cc deleted file mode 100644 index 66b97c8b9ef44..0000000000000 --- a/java/gandiva/src/main/cpp/expression_registry_helper.cc +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include -#include -#include -#include -#include - -using gandiva::DataTypePtr; -using gandiva::ExpressionRegistry; - -gandiva::types::TimeUnit MapTimeUnit(arrow::TimeUnit::type& unit) { - switch (unit) { - case arrow::TimeUnit::MILLI: - return gandiva::types::TimeUnit::MILLISEC; - case arrow::TimeUnit::SECOND: - return gandiva::types::TimeUnit::SEC; - case arrow::TimeUnit::MICRO: - return gandiva::types::TimeUnit::MICROSEC; - case arrow::TimeUnit::NANO: - return gandiva::types::TimeUnit::NANOSEC; - } - // satisfy gcc. should be unreachable. - return gandiva::types::TimeUnit::SEC; -} - -void ArrowToProtobuf(DataTypePtr type, gandiva::types::ExtGandivaType* gandiva_data_type) { - switch (type->id()) { - case arrow::Type::BOOL: - gandiva_data_type->set_type(gandiva::types::GandivaType::BOOL); - break; - case arrow::Type::UINT8: - gandiva_data_type->set_type(gandiva::types::GandivaType::UINT8); - break; - case arrow::Type::INT8: - gandiva_data_type->set_type(gandiva::types::GandivaType::INT8); - break; - case arrow::Type::UINT16: - gandiva_data_type->set_type(gandiva::types::GandivaType::UINT16); - break; - case arrow::Type::INT16: - gandiva_data_type->set_type(gandiva::types::GandivaType::INT16); - break; - case arrow::Type::UINT32: - gandiva_data_type->set_type(gandiva::types::GandivaType::UINT32); - break; - case arrow::Type::INT32: - gandiva_data_type->set_type(gandiva::types::GandivaType::INT32); - break; - case arrow::Type::UINT64: - gandiva_data_type->set_type(gandiva::types::GandivaType::UINT64); - break; - case arrow::Type::INT64: - gandiva_data_type->set_type(gandiva::types::GandivaType::INT64); - break; - case arrow::Type::HALF_FLOAT: - gandiva_data_type->set_type(gandiva::types::GandivaType::HALF_FLOAT); - break; - case arrow::Type::FLOAT: - gandiva_data_type->set_type(gandiva::types::GandivaType::FLOAT); - break; - case arrow::Type::DOUBLE: - gandiva_data_type->set_type(gandiva::types::GandivaType::DOUBLE); - break; - case arrow::Type::STRING: - gandiva_data_type->set_type(gandiva::types::GandivaType::UTF8); - break; - case arrow::Type::BINARY: - gandiva_data_type->set_type(gandiva::types::GandivaType::BINARY); - break; - case arrow::Type::DATE32: - gandiva_data_type->set_type(gandiva::types::GandivaType::DATE32); - break; - case arrow::Type::DATE64: - gandiva_data_type->set_type(gandiva::types::GandivaType::DATE64); - break; - case arrow::Type::TIMESTAMP: { - gandiva_data_type->set_type(gandiva::types::GandivaType::TIMESTAMP); - std::shared_ptr cast_time_stamp_type = - std::dynamic_pointer_cast(type); - arrow::TimeUnit::type unit = cast_time_stamp_type->unit(); - gandiva::types::TimeUnit time_unit = MapTimeUnit(unit); - gandiva_data_type->set_timeunit(time_unit); - break; - } - case arrow::Type::TIME32: { - gandiva_data_type->set_type(gandiva::types::GandivaType::TIME32); - std::shared_ptr cast_time_32_type = - std::dynamic_pointer_cast(type); - arrow::TimeUnit::type unit = cast_time_32_type->unit(); - gandiva::types::TimeUnit time_unit = MapTimeUnit(unit); - gandiva_data_type->set_timeunit(time_unit); - break; - } - case arrow::Type::TIME64: { - gandiva_data_type->set_type(gandiva::types::GandivaType::TIME32); - std::shared_ptr cast_time_64_type = - std::dynamic_pointer_cast(type); - arrow::TimeUnit::type unit = cast_time_64_type->unit(); - gandiva::types::TimeUnit time_unit = MapTimeUnit(unit); - gandiva_data_type->set_timeunit(time_unit); - break; - } - case arrow::Type::NA: - gandiva_data_type->set_type(gandiva::types::GandivaType::NONE); - break; - case arrow::Type::DECIMAL: { - gandiva_data_type->set_type(gandiva::types::GandivaType::DECIMAL); - gandiva_data_type->set_precision(0); - gandiva_data_type->set_scale(0); - break; - } - case arrow::Type::INTERVAL_MONTHS: - gandiva_data_type->set_type(gandiva::types::GandivaType::INTERVAL); - gandiva_data_type->set_intervaltype(gandiva::types::IntervalType::YEAR_MONTH); - break; - case arrow::Type::INTERVAL_DAY_TIME: - gandiva_data_type->set_type(gandiva::types::GandivaType::INTERVAL); - gandiva_data_type->set_intervaltype(gandiva::types::IntervalType::DAY_TIME); - break; - default: - // un-supported types. test ensures that - // when one of these are added build breaks. - DCHECK(false); - } -} - -JNIEXPORT jbyteArray JNICALL -Java_org_apache_arrow_gandiva_evaluator_ExpressionRegistryJniHelper_getGandivaSupportedDataTypes( // NOLINT - JNIEnv* env, jobject types_helper) { - gandiva::types::GandivaDataTypes gandiva_data_types; - auto supported_types = ExpressionRegistry::supported_types(); - for (auto const& type : supported_types) { - gandiva::types::ExtGandivaType* gandiva_data_type = gandiva_data_types.add_datatype(); - ArrowToProtobuf(type, gandiva_data_type); - } - auto size = static_cast(gandiva_data_types.ByteSizeLong()); - std::unique_ptr buffer{new jbyte[size]}; - gandiva_data_types.SerializeToArray(reinterpret_cast(buffer.get()), size); - jbyteArray ret = env->NewByteArray(size); - env->SetByteArrayRegion(ret, 0, size, buffer.get()); - return ret; -} - -/* - * Class: org_apache_arrow_gandiva_types_ExpressionRegistryJniHelper - * Method: getGandivaSupportedFunctions - * Signature: ()[B - */ -JNIEXPORT jbyteArray JNICALL -Java_org_apache_arrow_gandiva_evaluator_ExpressionRegistryJniHelper_getGandivaSupportedFunctions( // NOLINT - JNIEnv* env, jobject types_helper) { - ExpressionRegistry expr_registry; - gandiva::types::GandivaFunctions gandiva_functions; - for (auto function = expr_registry.function_signature_begin(); - function != expr_registry.function_signature_end(); function++) { - gandiva::types::FunctionSignature* function_signature = gandiva_functions.add_function(); - function_signature->set_name((*function).base_name()); - gandiva::types::ExtGandivaType* return_type = function_signature->mutable_returntype(); - ArrowToProtobuf((*function).ret_type(), return_type); - for (auto& param_type : (*function).param_types()) { - gandiva::types::ExtGandivaType* proto_param_type = function_signature->add_paramtypes(); - ArrowToProtobuf(param_type, proto_param_type); - } - } - auto size = static_cast(gandiva_functions.ByteSizeLong()); - std::unique_ptr buffer{new jbyte[size]}; - gandiva_functions.SerializeToArray(reinterpret_cast(buffer.get()), size); - jbyteArray ret = env->NewByteArray(size); - env->SetByteArrayRegion(ret, 0, size, buffer.get()); - return ret; -} diff --git a/java/gandiva/src/main/cpp/id_to_module_map.h b/java/gandiva/src/main/cpp/id_to_module_map.h deleted file mode 100644 index 98100955b5bea..0000000000000 --- a/java/gandiva/src/main/cpp/id_to_module_map.h +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - -namespace gandiva { - -template -class IdToModuleMap { - public: - IdToModuleMap() : module_id_(kInitModuleId) {} - - jlong Insert(HOLDER holder) { - mtx_.lock(); - jlong result = module_id_++; - map_.insert(std::pair(result, holder)); - mtx_.unlock(); - return result; - } - - void Erase(jlong module_id) { - mtx_.lock(); - map_.erase(module_id); - mtx_.unlock(); - } - - HOLDER Lookup(jlong module_id) { - HOLDER result = nullptr; - mtx_.lock(); - try { - result = map_.at(module_id); - } catch (const std::out_of_range&) { - } - mtx_.unlock(); - return result; - } - - private: - static const int kInitModuleId = 4; - - int64_t module_id_; - std::mutex mtx_; - // map from module ids returned to Java and module pointers - std::unordered_map map_; -}; - -} // namespace gandiva diff --git a/java/gandiva/src/main/cpp/jni_common.cc b/java/gandiva/src/main/cpp/jni_common.cc deleted file mode 100644 index ec1bb7623413a..0000000000000 --- a/java/gandiva/src/main/cpp/jni_common.cc +++ /dev/null @@ -1,1059 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "config_holder.h" -#include "env_helper.h" -#include "id_to_module_map.h" -#include "module_holder.h" - -using gandiva::ConditionPtr; -using gandiva::DataTypePtr; -using gandiva::ExpressionPtr; -using gandiva::ExpressionVector; -using gandiva::FieldPtr; -using gandiva::FieldVector; -using gandiva::Filter; -using gandiva::NodePtr; -using gandiva::NodeVector; -using gandiva::Projector; -using gandiva::SchemaPtr; -using gandiva::Status; -using gandiva::TreeExprBuilder; - -using gandiva::ArrayDataVector; -using gandiva::ConfigHolder; -using gandiva::Configuration; -using gandiva::ConfigurationBuilder; -using gandiva::FilterHolder; -using gandiva::ProjectorHolder; - -// forward declarations -NodePtr ProtoTypeToNode(const gandiva::types::TreeNode& node); - -static jint JNI_VERSION = JNI_VERSION_10; - -// extern refs - initialized for other modules. -jclass configuration_builder_class_; - -// refs for self. -static jclass gandiva_exception_; -static jclass vector_expander_class_; -static jclass vector_expander_ret_class_; -static jmethodID vector_expander_method_; -static jfieldID vector_expander_ret_address_; -static jfieldID vector_expander_ret_capacity_; - -// module maps -gandiva::IdToModuleMap> projector_modules_; -gandiva::IdToModuleMap> filter_modules_; - -jint JNI_OnLoad(JavaVM* vm, void* reserved) { - JNIEnv* env; - if (vm->GetEnv(reinterpret_cast(&env), JNI_VERSION) != JNI_OK) { - return JNI_ERR; - } - jclass local_configuration_builder_class_ = - env->FindClass("org/apache/arrow/gandiva/evaluator/ConfigurationBuilder"); - configuration_builder_class_ = - (jclass)env->NewGlobalRef(local_configuration_builder_class_); - env->DeleteLocalRef(local_configuration_builder_class_); - - jclass localExceptionClass = - env->FindClass("org/apache/arrow/gandiva/exceptions/GandivaException"); - gandiva_exception_ = (jclass)env->NewGlobalRef(localExceptionClass); - env->ExceptionDescribe(); - env->DeleteLocalRef(localExceptionClass); - - jclass local_expander_class = - env->FindClass("org/apache/arrow/gandiva/evaluator/VectorExpander"); - vector_expander_class_ = (jclass)env->NewGlobalRef(local_expander_class); - env->DeleteLocalRef(local_expander_class); - - vector_expander_method_ = env->GetMethodID( - vector_expander_class_, "expandOutputVectorAtIndex", - "(IJ)Lorg/apache/arrow/gandiva/evaluator/VectorExpander$ExpandResult;"); - - jclass local_expander_ret_class = - env->FindClass("org/apache/arrow/gandiva/evaluator/VectorExpander$ExpandResult"); - vector_expander_ret_class_ = (jclass)env->NewGlobalRef(local_expander_ret_class); - env->DeleteLocalRef(local_expander_ret_class); - - vector_expander_ret_address_ = - env->GetFieldID(vector_expander_ret_class_, "address", "J"); - vector_expander_ret_capacity_ = - env->GetFieldID(vector_expander_ret_class_, "capacity", "J"); - return JNI_VERSION; -} - -void JNI_OnUnload(JavaVM* vm, void* reserved) { - JNIEnv* env; - vm->GetEnv(reinterpret_cast(&env), JNI_VERSION); - env->DeleteGlobalRef(configuration_builder_class_); - env->DeleteGlobalRef(gandiva_exception_); - env->DeleteGlobalRef(vector_expander_class_); - env->DeleteGlobalRef(vector_expander_ret_class_); -} - -DataTypePtr ProtoTypeToTime32(const gandiva::types::ExtGandivaType& ext_type) { - switch (ext_type.timeunit()) { - case gandiva::types::SEC: - return arrow::time32(arrow::TimeUnit::SECOND); - case gandiva::types::MILLISEC: - return arrow::time32(arrow::TimeUnit::MILLI); - default: - std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for time32\n"; - return nullptr; - } -} - -DataTypePtr ProtoTypeToTime64(const gandiva::types::ExtGandivaType& ext_type) { - switch (ext_type.timeunit()) { - case gandiva::types::MICROSEC: - return arrow::time64(arrow::TimeUnit::MICRO); - case gandiva::types::NANOSEC: - return arrow::time64(arrow::TimeUnit::NANO); - default: - std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for time64\n"; - return nullptr; - } -} - -DataTypePtr ProtoTypeToTimestamp(const gandiva::types::ExtGandivaType& ext_type) { - switch (ext_type.timeunit()) { - case gandiva::types::SEC: - return arrow::timestamp(arrow::TimeUnit::SECOND); - case gandiva::types::MILLISEC: - return arrow::timestamp(arrow::TimeUnit::MILLI); - case gandiva::types::MICROSEC: - return arrow::timestamp(arrow::TimeUnit::MICRO); - case gandiva::types::NANOSEC: - return arrow::timestamp(arrow::TimeUnit::NANO); - default: - std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for timestamp\n"; - return nullptr; - } -} - -DataTypePtr ProtoTypeToInterval(const gandiva::types::ExtGandivaType& ext_type) { - switch (ext_type.intervaltype()) { - case gandiva::types::YEAR_MONTH: - return arrow::month_interval(); - case gandiva::types::DAY_TIME: - return arrow::day_time_interval(); - default: - std::cerr << "Unknown interval type: " << ext_type.intervaltype() << "\n"; - return nullptr; - } -} - -DataTypePtr ProtoTypeToDataType(const gandiva::types::ExtGandivaType& ext_type) { - switch (ext_type.type()) { - case gandiva::types::NONE: - return arrow::null(); - case gandiva::types::BOOL: - return arrow::boolean(); - case gandiva::types::UINT8: - return arrow::uint8(); - case gandiva::types::INT8: - return arrow::int8(); - case gandiva::types::UINT16: - return arrow::uint16(); - case gandiva::types::INT16: - return arrow::int16(); - case gandiva::types::UINT32: - return arrow::uint32(); - case gandiva::types::INT32: - return arrow::int32(); - case gandiva::types::UINT64: - return arrow::uint64(); - case gandiva::types::INT64: - return arrow::int64(); - case gandiva::types::HALF_FLOAT: - return arrow::float16(); - case gandiva::types::FLOAT: - return arrow::float32(); - case gandiva::types::DOUBLE: - return arrow::float64(); - case gandiva::types::UTF8: - return arrow::utf8(); - case gandiva::types::BINARY: - return arrow::binary(); - case gandiva::types::DATE32: - return arrow::date32(); - case gandiva::types::DATE64: - return arrow::date64(); - case gandiva::types::DECIMAL: - // TODO: error handling - return arrow::decimal(ext_type.precision(), ext_type.scale()); - case gandiva::types::TIME32: - return ProtoTypeToTime32(ext_type); - case gandiva::types::TIME64: - return ProtoTypeToTime64(ext_type); - case gandiva::types::TIMESTAMP: - return ProtoTypeToTimestamp(ext_type); - case gandiva::types::INTERVAL: - return ProtoTypeToInterval(ext_type); - case gandiva::types::FIXED_SIZE_BINARY: - case gandiva::types::LIST: - case gandiva::types::STRUCT: - case gandiva::types::UNION: - case gandiva::types::DICTIONARY: - case gandiva::types::MAP: - std::cerr << "Unhandled data type: " << ext_type.type() << "\n"; - return nullptr; - - default: - std::cerr << "Unknown data type: " << ext_type.type() << "\n"; - return nullptr; - } -} - -FieldPtr ProtoTypeToField(const gandiva::types::Field& f) { - const std::string& name = f.name(); - DataTypePtr type = ProtoTypeToDataType(f.type()); - bool nullable = true; - if (f.has_nullable()) { - nullable = f.nullable(); - } - - return field(name, type, nullable); -} - -NodePtr ProtoTypeToFieldNode(const gandiva::types::FieldNode& node) { - FieldPtr field_ptr = ProtoTypeToField(node.field()); - if (field_ptr == nullptr) { - std::cerr << "Unable to create field node from protobuf\n"; - return nullptr; - } - - return TreeExprBuilder::MakeField(field_ptr); -} - -NodePtr ProtoTypeToFnNode(const gandiva::types::FunctionNode& node) { - const std::string& name = node.functionname(); - NodeVector children; - - for (int i = 0; i < node.inargs_size(); i++) { - const gandiva::types::TreeNode& arg = node.inargs(i); - - NodePtr n = ProtoTypeToNode(arg); - if (n == nullptr) { - std::cerr << "Unable to create argument for function: " << name << "\n"; - return nullptr; - } - - children.push_back(n); - } - - DataTypePtr return_type = ProtoTypeToDataType(node.returntype()); - if (return_type == nullptr) { - std::cerr << "Unknown return type for function: " << name << "\n"; - return nullptr; - } - - return TreeExprBuilder::MakeFunction(name, children, return_type); -} - -NodePtr ProtoTypeToIfNode(const gandiva::types::IfNode& node) { - NodePtr cond = ProtoTypeToNode(node.cond()); - if (cond == nullptr) { - std::cerr << "Unable to create cond node for if node\n"; - return nullptr; - } - - NodePtr then_node = ProtoTypeToNode(node.thennode()); - if (then_node == nullptr) { - std::cerr << "Unable to create then node for if node\n"; - return nullptr; - } - - NodePtr else_node = ProtoTypeToNode(node.elsenode()); - if (else_node == nullptr) { - std::cerr << "Unable to create else node for if node\n"; - return nullptr; - } - - DataTypePtr return_type = ProtoTypeToDataType(node.returntype()); - if (return_type == nullptr) { - std::cerr << "Unknown return type for if node\n"; - return nullptr; - } - - return TreeExprBuilder::MakeIf(cond, then_node, else_node, return_type); -} - -NodePtr ProtoTypeToAndNode(const gandiva::types::AndNode& node) { - NodeVector children; - - for (int i = 0; i < node.args_size(); i++) { - const gandiva::types::TreeNode& arg = node.args(i); - - NodePtr n = ProtoTypeToNode(arg); - if (n == nullptr) { - std::cerr << "Unable to create argument for boolean and\n"; - return nullptr; - } - children.push_back(n); - } - return TreeExprBuilder::MakeAnd(children); -} - -NodePtr ProtoTypeToOrNode(const gandiva::types::OrNode& node) { - NodeVector children; - - for (int i = 0; i < node.args_size(); i++) { - const gandiva::types::TreeNode& arg = node.args(i); - - NodePtr n = ProtoTypeToNode(arg); - if (n == nullptr) { - std::cerr << "Unable to create argument for boolean or\n"; - return nullptr; - } - children.push_back(n); - } - return TreeExprBuilder::MakeOr(children); -} - -NodePtr ProtoTypeToInNode(const gandiva::types::InNode& node) { - NodePtr field = ProtoTypeToNode(node.node()); - - if (node.has_intvalues()) { - std::unordered_set int_values; - for (int i = 0; i < node.intvalues().intvalues_size(); i++) { - int_values.insert(node.intvalues().intvalues(i).value()); - } - return TreeExprBuilder::MakeInExpressionInt32(field, int_values); - } - - if (node.has_longvalues()) { - std::unordered_set long_values; - for (int i = 0; i < node.longvalues().longvalues_size(); i++) { - long_values.insert(node.longvalues().longvalues(i).value()); - } - return TreeExprBuilder::MakeInExpressionInt64(field, long_values); - } - - if (node.has_decimalvalues()) { - std::unordered_set decimal_values; - for (int i = 0; i < node.decimalvalues().decimalvalues_size(); i++) { - decimal_values.insert( - gandiva::DecimalScalar128(node.decimalvalues().decimalvalues(i).value(), - node.decimalvalues().decimalvalues(i).precision(), - node.decimalvalues().decimalvalues(i).scale())); - } - return TreeExprBuilder::MakeInExpressionDecimal(field, decimal_values); - } - - if (node.has_floatvalues()) { - std::unordered_set float_values; - for (int i = 0; i < node.floatvalues().floatvalues_size(); i++) { - float_values.insert(node.floatvalues().floatvalues(i).value()); - } - return TreeExprBuilder::MakeInExpressionFloat(field, float_values); - } - - if (node.has_doublevalues()) { - std::unordered_set double_values; - for (int i = 0; i < node.doublevalues().doublevalues_size(); i++) { - double_values.insert(node.doublevalues().doublevalues(i).value()); - } - return TreeExprBuilder::MakeInExpressionDouble(field, double_values); - } - - if (node.has_stringvalues()) { - std::unordered_set stringvalues; - for (int i = 0; i < node.stringvalues().stringvalues_size(); i++) { - stringvalues.insert(node.stringvalues().stringvalues(i).value()); - } - return TreeExprBuilder::MakeInExpressionString(field, stringvalues); - } - - if (node.has_binaryvalues()) { - std::unordered_set stringvalues; - for (int i = 0; i < node.binaryvalues().binaryvalues_size(); i++) { - stringvalues.insert(node.binaryvalues().binaryvalues(i).value()); - } - return TreeExprBuilder::MakeInExpressionBinary(field, stringvalues); - } - // not supported yet. - std::cerr << "Unknown constant type for in expression.\n"; - return nullptr; -} - -NodePtr ProtoTypeToNullNode(const gandiva::types::NullNode& node) { - DataTypePtr data_type = ProtoTypeToDataType(node.type()); - if (data_type == nullptr) { - std::cerr << "Unknown type " << data_type->ToString() << " for null node\n"; - return nullptr; - } - - return TreeExprBuilder::MakeNull(data_type); -} - -NodePtr ProtoTypeToNode(const gandiva::types::TreeNode& node) { - if (node.has_fieldnode()) { - return ProtoTypeToFieldNode(node.fieldnode()); - } - - if (node.has_fnnode()) { - return ProtoTypeToFnNode(node.fnnode()); - } - - if (node.has_ifnode()) { - return ProtoTypeToIfNode(node.ifnode()); - } - - if (node.has_andnode()) { - return ProtoTypeToAndNode(node.andnode()); - } - - if (node.has_ornode()) { - return ProtoTypeToOrNode(node.ornode()); - } - - if (node.has_innode()) { - return ProtoTypeToInNode(node.innode()); - } - - if (node.has_nullnode()) { - return ProtoTypeToNullNode(node.nullnode()); - } - - if (node.has_intnode()) { - return TreeExprBuilder::MakeLiteral(node.intnode().value()); - } - - if (node.has_floatnode()) { - return TreeExprBuilder::MakeLiteral(node.floatnode().value()); - } - - if (node.has_longnode()) { - return TreeExprBuilder::MakeLiteral(node.longnode().value()); - } - - if (node.has_booleannode()) { - return TreeExprBuilder::MakeLiteral(node.booleannode().value()); - } - - if (node.has_doublenode()) { - return TreeExprBuilder::MakeLiteral(node.doublenode().value()); - } - - if (node.has_stringnode()) { - return TreeExprBuilder::MakeStringLiteral(node.stringnode().value()); - } - - if (node.has_binarynode()) { - return TreeExprBuilder::MakeBinaryLiteral(node.binarynode().value()); - } - - if (node.has_decimalnode()) { - std::string value = node.decimalnode().value(); - gandiva::DecimalScalar128 literal(value, node.decimalnode().precision(), - node.decimalnode().scale()); - return TreeExprBuilder::MakeDecimalLiteral(literal); - } - std::cerr << "Unknown node type in protobuf\n"; - return nullptr; -} - -ExpressionPtr ProtoTypeToExpression(const gandiva::types::ExpressionRoot& root) { - NodePtr root_node = ProtoTypeToNode(root.root()); - if (root_node == nullptr) { - std::cerr << "Unable to create expression node from expression protobuf\n"; - return nullptr; - } - - FieldPtr field = ProtoTypeToField(root.resulttype()); - if (field == nullptr) { - std::cerr << "Unable to extra return field from expression protobuf\n"; - return nullptr; - } - - return TreeExprBuilder::MakeExpression(root_node, field); -} - -ConditionPtr ProtoTypeToCondition(const gandiva::types::Condition& condition) { - NodePtr root_node = ProtoTypeToNode(condition.root()); - if (root_node == nullptr) { - return nullptr; - } - - return TreeExprBuilder::MakeCondition(root_node); -} - -SchemaPtr ProtoTypeToSchema(const gandiva::types::Schema& schema) { - std::vector fields; - - for (int i = 0; i < schema.columns_size(); i++) { - FieldPtr field = ProtoTypeToField(schema.columns(i)); - if (field == nullptr) { - std::cerr << "Unable to extract arrow field from schema\n"; - return nullptr; - } - - fields.push_back(field); - } - - return arrow::schema(fields); -} - -// Common for both projector and filters. - -bool ParseProtobuf(uint8_t* buf, int bufLen, google::protobuf::Message* msg) { - google::protobuf::io::CodedInputStream cis(buf, bufLen); - cis.SetRecursionLimit(2000); - return msg->ParseFromCodedStream(&cis); -} - -Status make_record_batch_with_buf_addrs(SchemaPtr schema, int num_rows, - jlong* in_buf_addrs, jlong* in_buf_sizes, - int in_bufs_len, - std::shared_ptr* batch) { - std::vector> columns; - auto num_fields = schema->num_fields(); - int buf_idx = 0; - int sz_idx = 0; - - for (int i = 0; i < num_fields; i++) { - auto field = schema->field(i); - std::vector> buffers; - - if (buf_idx >= in_bufs_len) { - return Status::Invalid("insufficient number of in_buf_addrs"); - } - jlong validity_addr = in_buf_addrs[buf_idx++]; - jlong validity_size = in_buf_sizes[sz_idx++]; - auto validity = std::shared_ptr( - new arrow::Buffer(reinterpret_cast(validity_addr), validity_size)); - buffers.push_back(validity); - - if (buf_idx >= in_bufs_len) { - return Status::Invalid("insufficient number of in_buf_addrs"); - } - jlong value_addr = in_buf_addrs[buf_idx++]; - jlong value_size = in_buf_sizes[sz_idx++]; - auto data = std::shared_ptr( - new arrow::Buffer(reinterpret_cast(value_addr), value_size)); - buffers.push_back(data); - - if (arrow::is_binary_like(field->type()->id())) { - if (buf_idx >= in_bufs_len) { - return Status::Invalid("insufficient number of in_buf_addrs"); - } - - // add offsets buffer for variable-len fields. - jlong offsets_addr = in_buf_addrs[buf_idx++]; - jlong offsets_size = in_buf_sizes[sz_idx++]; - auto offsets = std::shared_ptr( - new arrow::Buffer(reinterpret_cast(offsets_addr), offsets_size)); - buffers.push_back(offsets); - } - - auto array_data = arrow::ArrayData::Make(field->type(), num_rows, std::move(buffers)); - columns.push_back(array_data); - } - *batch = arrow::RecordBatch::Make(schema, num_rows, columns); - return Status::OK(); -} - -// projector related functions. -void releaseProjectorInput(jbyteArray schema_arr, jbyte* schema_bytes, - jbyteArray exprs_arr, jbyte* exprs_bytes, JNIEnv* env) { - env->ReleaseByteArrayElements(schema_arr, schema_bytes, JNI_ABORT); - env->ReleaseByteArrayElements(exprs_arr, exprs_bytes, JNI_ABORT); -} - -JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_buildProjector( - JNIEnv* env, jobject obj, jbyteArray schema_arr, jbyteArray exprs_arr, - jint selection_vector_type, jlong configuration_id) { - jlong module_id = 0LL; - std::shared_ptr projector; - std::shared_ptr holder; - - gandiva::types::Schema schema; - jsize schema_len = env->GetArrayLength(schema_arr); - jbyte* schema_bytes = env->GetByteArrayElements(schema_arr, 0); - - gandiva::types::ExpressionList exprs; - jsize exprs_len = env->GetArrayLength(exprs_arr); - jbyte* exprs_bytes = env->GetByteArrayElements(exprs_arr, 0); - - ExpressionVector expr_vector; - SchemaPtr schema_ptr; - FieldVector ret_types; - gandiva::Status status; - auto mode = gandiva::SelectionVector::MODE_NONE; - - std::shared_ptr config = ConfigHolder::MapLookup(configuration_id); - std::stringstream ss; - - if (config == nullptr) { - ss << "configuration is mandatory."; - releaseProjectorInput(schema_arr, schema_bytes, exprs_arr, exprs_bytes, env); - goto err_out; - } - - if (!ParseProtobuf(reinterpret_cast(schema_bytes), schema_len, &schema)) { - ss << "Unable to parse schema protobuf\n"; - releaseProjectorInput(schema_arr, schema_bytes, exprs_arr, exprs_bytes, env); - goto err_out; - } - - if (!ParseProtobuf(reinterpret_cast(exprs_bytes), exprs_len, &exprs)) { - releaseProjectorInput(schema_arr, schema_bytes, exprs_arr, exprs_bytes, env); - ss << "Unable to parse expressions protobuf\n"; - goto err_out; - } - - // convert gandiva::types::Schema to arrow::Schema - schema_ptr = ProtoTypeToSchema(schema); - if (schema_ptr == nullptr) { - ss << "Unable to construct arrow schema object from schema protobuf\n"; - releaseProjectorInput(schema_arr, schema_bytes, exprs_arr, exprs_bytes, env); - goto err_out; - } - - // create Expression out of the list of exprs - for (int i = 0; i < exprs.exprs_size(); i++) { - ExpressionPtr root = ProtoTypeToExpression(exprs.exprs(i)); - - if (root == nullptr) { - ss << "Unable to construct expression object from expression protobuf\n"; - releaseProjectorInput(schema_arr, schema_bytes, exprs_arr, exprs_bytes, env); - goto err_out; - } - - expr_vector.push_back(root); - ret_types.push_back(root->result()); - } - - switch (selection_vector_type) { - case gandiva::types::SV_NONE: - mode = gandiva::SelectionVector::MODE_NONE; - break; - case gandiva::types::SV_INT16: - mode = gandiva::SelectionVector::MODE_UINT16; - break; - case gandiva::types::SV_INT32: - mode = gandiva::SelectionVector::MODE_UINT32; - break; - } - // good to invoke the evaluator now - status = Projector::Make(schema_ptr, expr_vector, mode, config, &projector); - - if (!status.ok()) { - ss << "Failed to make LLVM module due to " << status.message() << "\n"; - releaseProjectorInput(schema_arr, schema_bytes, exprs_arr, exprs_bytes, env); - goto err_out; - } - - // store the result in a map - holder = std::shared_ptr( - new ProjectorHolder(schema_ptr, ret_types, std::move(projector))); - module_id = projector_modules_.Insert(holder); - releaseProjectorInput(schema_arr, schema_bytes, exprs_arr, exprs_bytes, env); - return module_id; - -err_out: - env->ThrowNew(gandiva_exception_, ss.str().c_str()); - return module_id; -} - -/// -/// \brief Resizable buffer which resizes by doing a callback into java. -/// -class JavaResizableBuffer : public arrow::ResizableBuffer { - public: - JavaResizableBuffer(JNIEnv* env, jobject jexpander, int32_t vector_idx, uint8_t* buffer, - int32_t len) - : ResizableBuffer(buffer, len), - env_(env), - jexpander_(jexpander), - vector_idx_(vector_idx) { - size_ = 0; - } - - Status Resize(const int64_t new_size, bool shrink_to_fit) override; - - Status Reserve(const int64_t new_capacity) override; - - private: - JNIEnv* env_; - jobject jexpander_; - int32_t vector_idx_; -}; - -Status JavaResizableBuffer::Reserve(const int64_t new_capacity) { - // callback into java to expand the buffer - jobject ret = env_->CallObjectMethod(jexpander_, vector_expander_method_, vector_idx_, - new_capacity); - if (env_->ExceptionCheck()) { - env_->ExceptionDescribe(); - env_->ExceptionClear(); - return Status::OutOfMemory("buffer expand failed in java"); - } - - jlong ret_address = env_->GetLongField(ret, vector_expander_ret_address_); - jlong ret_capacity = env_->GetLongField(ret, vector_expander_ret_capacity_); - - data_ = reinterpret_cast(ret_address); - capacity_ = ret_capacity; - return Status::OK(); -} - -Status JavaResizableBuffer::Resize(const int64_t new_size, bool shrink_to_fit) { - if (shrink_to_fit == true) { - return Status::NotImplemented("shrink not implemented"); - } - - if (ARROW_PREDICT_TRUE(new_size <= capacity())) { - // no need to expand. - size_ = new_size; - return Status::OK(); - } - - RETURN_NOT_OK(Reserve(new_size)); - DCHECK_GE(capacity_, new_size); - size_ = new_size; - return Status::OK(); -} - -#define CHECK_OUT_BUFFER_IDX_AND_BREAK(idx, len) \ - if (idx >= len) { \ - status = gandiva::Status::Invalid("insufficient number of out_buf_addrs"); \ - break; \ - } - -JNIEXPORT void JNICALL -Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector( - JNIEnv* env, jobject object, jobject jexpander, jlong module_id, jint num_rows, - jlongArray buf_addrs, jlongArray buf_sizes, jint sel_vec_type, jint sel_vec_rows, - jlong sel_vec_addr, jlong sel_vec_size, jlongArray out_buf_addrs, - jlongArray out_buf_sizes) { - Status status; - std::shared_ptr holder = projector_modules_.Lookup(module_id); - if (holder == nullptr) { - std::stringstream ss; - ss << "Unknown module id " << module_id; - env->ThrowNew(gandiva_exception_, ss.str().c_str()); - return; - } - - int in_bufs_len = env->GetArrayLength(buf_addrs); - if (in_bufs_len != env->GetArrayLength(buf_sizes)) { - env->ThrowNew(gandiva_exception_, "mismatch in arraylen of buf_addrs and buf_sizes"); - return; - } - - int out_bufs_len = env->GetArrayLength(out_buf_addrs); - if (out_bufs_len != env->GetArrayLength(out_buf_sizes)) { - env->ThrowNew(gandiva_exception_, - "mismatch in arraylen of out_buf_addrs and out_buf_sizes"); - return; - } - - jlong* in_buf_addrs = env->GetLongArrayElements(buf_addrs, 0); - jlong* in_buf_sizes = env->GetLongArrayElements(buf_sizes, 0); - - jlong* out_bufs = env->GetLongArrayElements(out_buf_addrs, 0); - jlong* out_sizes = env->GetLongArrayElements(out_buf_sizes, 0); - - do { - std::shared_ptr in_batch; - status = make_record_batch_with_buf_addrs(holder->schema(), num_rows, in_buf_addrs, - in_buf_sizes, in_bufs_len, &in_batch); - if (!status.ok()) { - break; - } - - std::shared_ptr selection_vector; - auto selection_buffer = std::make_shared( - reinterpret_cast(sel_vec_addr), sel_vec_size); - int output_row_count = 0; - switch (sel_vec_type) { - case gandiva::types::SV_NONE: { - output_row_count = num_rows; - break; - } - case gandiva::types::SV_INT16: { - status = gandiva::SelectionVector::MakeImmutableInt16( - sel_vec_rows, selection_buffer, &selection_vector); - output_row_count = sel_vec_rows; - break; - } - case gandiva::types::SV_INT32: { - status = gandiva::SelectionVector::MakeImmutableInt32( - sel_vec_rows, selection_buffer, &selection_vector); - output_row_count = sel_vec_rows; - break; - } - } - if (!status.ok()) { - break; - } - - auto ret_types = holder->rettypes(); - ArrayDataVector output; - int buf_idx = 0; - int sz_idx = 0; - int output_vector_idx = 0; - for (FieldPtr field : ret_types) { - std::vector> buffers; - - CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len); - uint8_t* validity_buf = reinterpret_cast(out_bufs[buf_idx++]); - jlong bitmap_sz = out_sizes[sz_idx++]; - buffers.push_back(std::make_shared(validity_buf, bitmap_sz)); - - if (arrow::is_binary_like(field->type()->id())) { - CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len); - uint8_t* offsets_buf = reinterpret_cast(out_bufs[buf_idx++]); - jlong offsets_sz = out_sizes[sz_idx++]; - buffers.push_back( - std::make_shared(offsets_buf, offsets_sz)); - } - - CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len); - uint8_t* value_buf = reinterpret_cast(out_bufs[buf_idx++]); - jlong data_sz = out_sizes[sz_idx++]; - if (arrow::is_binary_like(field->type()->id())) { - if (jexpander == nullptr) { - status = Status::Invalid( - "expression has variable len output columns, but the expander object is " - "null"); - break; - } - buffers.push_back(std::make_shared( - env, jexpander, output_vector_idx, value_buf, data_sz)); - } else { - buffers.push_back(std::make_shared(value_buf, data_sz)); - } - - auto array_data = arrow::ArrayData::Make(field->type(), output_row_count, buffers); - output.push_back(array_data); - ++output_vector_idx; - } - if (!status.ok()) { - break; - } - status = holder->projector()->Evaluate(*in_batch, selection_vector.get(), output); - } while (0); - - env->ReleaseLongArrayElements(buf_addrs, in_buf_addrs, JNI_ABORT); - env->ReleaseLongArrayElements(buf_sizes, in_buf_sizes, JNI_ABORT); - env->ReleaseLongArrayElements(out_buf_addrs, out_bufs, JNI_ABORT); - env->ReleaseLongArrayElements(out_buf_sizes, out_sizes, JNI_ABORT); - - if (!status.ok()) { - std::stringstream ss; - ss << "Evaluate returned " << status.message() << "\n"; - env->ThrowNew(gandiva_exception_, status.message().c_str()); - return; - } -} - -JNIEXPORT void JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_closeProjector( - JNIEnv* env, jobject cls, jlong module_id) { - projector_modules_.Erase(module_id); -} - -// filter related functions. -void releaseFilterInput(jbyteArray schema_arr, jbyte* schema_bytes, - jbyteArray condition_arr, jbyte* condition_bytes, JNIEnv* env) { - env->ReleaseByteArrayElements(schema_arr, schema_bytes, JNI_ABORT); - env->ReleaseByteArrayElements(condition_arr, condition_bytes, JNI_ABORT); -} - -JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_buildFilter( - JNIEnv* env, jobject obj, jbyteArray schema_arr, jbyteArray condition_arr, - jlong configuration_id) { - jlong module_id = 0LL; - std::shared_ptr filter; - std::shared_ptr holder; - - gandiva::types::Schema schema; - jsize schema_len = env->GetArrayLength(schema_arr); - jbyte* schema_bytes = env->GetByteArrayElements(schema_arr, 0); - - gandiva::types::Condition condition; - jsize condition_len = env->GetArrayLength(condition_arr); - jbyte* condition_bytes = env->GetByteArrayElements(condition_arr, 0); - - ConditionPtr condition_ptr; - SchemaPtr schema_ptr; - gandiva::Status status; - - std::shared_ptr config = ConfigHolder::MapLookup(configuration_id); - std::stringstream ss; - - if (config == nullptr) { - ss << "configuration is mandatory."; - releaseFilterInput(schema_arr, schema_bytes, condition_arr, condition_bytes, env); - goto err_out; - } - - if (!ParseProtobuf(reinterpret_cast(schema_bytes), schema_len, &schema)) { - ss << "Unable to parse schema protobuf\n"; - releaseFilterInput(schema_arr, schema_bytes, condition_arr, condition_bytes, env); - goto err_out; - } - - if (!ParseProtobuf(reinterpret_cast(condition_bytes), condition_len, - &condition)) { - ss << "Unable to parse condition protobuf\n"; - releaseFilterInput(schema_arr, schema_bytes, condition_arr, condition_bytes, env); - goto err_out; - } - - // convert gandiva::types::Schema to arrow::Schema - schema_ptr = ProtoTypeToSchema(schema); - if (schema_ptr == nullptr) { - ss << "Unable to construct arrow schema object from schema protobuf\n"; - releaseFilterInput(schema_arr, schema_bytes, condition_arr, condition_bytes, env); - goto err_out; - } - - condition_ptr = ProtoTypeToCondition(condition); - if (condition_ptr == nullptr) { - ss << "Unable to construct condition object from condition protobuf\n"; - releaseFilterInput(schema_arr, schema_bytes, condition_arr, condition_bytes, env); - goto err_out; - } - - // good to invoke the filter builder now - status = Filter::Make(schema_ptr, condition_ptr, config, &filter); - if (!status.ok()) { - ss << "Failed to make LLVM module due to " << status.message() << "\n"; - releaseFilterInput(schema_arr, schema_bytes, condition_arr, condition_bytes, env); - goto err_out; - } - - // store the result in a map - holder = std::shared_ptr(new FilterHolder(schema_ptr, std::move(filter))); - module_id = filter_modules_.Insert(holder); - releaseFilterInput(schema_arr, schema_bytes, condition_arr, condition_bytes, env); - return module_id; - -err_out: - env->ThrowNew(gandiva_exception_, ss.str().c_str()); - return module_id; -} - -JNIEXPORT jint JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateFilter( - JNIEnv* env, jobject cls, jlong module_id, jint num_rows, jlongArray buf_addrs, - jlongArray buf_sizes, jint jselection_vector_type, jlong out_buf_addr, - jlong out_buf_size) { - gandiva::Status status; - std::shared_ptr holder = filter_modules_.Lookup(module_id); - if (holder == nullptr) { - env->ThrowNew(gandiva_exception_, "Unknown module id\n"); - return -1; - } - - int in_bufs_len = env->GetArrayLength(buf_addrs); - if (in_bufs_len != env->GetArrayLength(buf_sizes)) { - env->ThrowNew(gandiva_exception_, "mismatch in arraylen of buf_addrs and buf_sizes"); - return -1; - } - - jlong* in_buf_addrs = env->GetLongArrayElements(buf_addrs, 0); - jlong* in_buf_sizes = env->GetLongArrayElements(buf_sizes, 0); - std::shared_ptr selection_vector; - - do { - std::shared_ptr in_batch; - - status = make_record_batch_with_buf_addrs(holder->schema(), num_rows, in_buf_addrs, - in_buf_sizes, in_bufs_len, &in_batch); - if (!status.ok()) { - break; - } - - auto selection_vector_type = - static_cast(jselection_vector_type); - auto out_buffer = std::make_shared( - reinterpret_cast(out_buf_addr), out_buf_size); - switch (selection_vector_type) { - case gandiva::types::SV_INT16: - status = - gandiva::SelectionVector::MakeInt16(num_rows, out_buffer, &selection_vector); - break; - case gandiva::types::SV_INT32: - status = - gandiva::SelectionVector::MakeInt32(num_rows, out_buffer, &selection_vector); - break; - default: - status = gandiva::Status::Invalid("unknown selection vector type"); - } - if (!status.ok()) { - break; - } - - status = holder->filter()->Evaluate(*in_batch, selection_vector); - } while (0); - - env->ReleaseLongArrayElements(buf_addrs, in_buf_addrs, JNI_ABORT); - env->ReleaseLongArrayElements(buf_sizes, in_buf_sizes, JNI_ABORT); - - if (!status.ok()) { - std::stringstream ss; - ss << "Evaluate returned " << status.message() << "\n"; - env->ThrowNew(gandiva_exception_, status.message().c_str()); - return -1; - } else { - int64_t num_slots = selection_vector->GetNumSlots(); - // Check integer overflow - if (num_slots > INT_MAX) { - std::stringstream ss; - ss << "The selection vector has " << num_slots - << " slots, which is larger than the " << INT_MAX << " limit.\n"; - const std::string message = ss.str(); - env->ThrowNew(gandiva_exception_, message.c_str()); - return -1; - } - return static_cast(num_slots); - } -} - -JNIEXPORT void JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_closeFilter( - JNIEnv* env, jobject cls, jlong module_id) { - filter_modules_.Erase(module_id); -} diff --git a/java/gandiva/src/main/cpp/module_holder.h b/java/gandiva/src/main/cpp/module_holder.h deleted file mode 100644 index 74bad29e68c94..0000000000000 --- a/java/gandiva/src/main/cpp/module_holder.h +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include - -namespace gandiva { - -class Projector; -class Filter; - -class ProjectorHolder { - public: - ProjectorHolder(SchemaPtr schema, FieldVector ret_types, - std::shared_ptr projector) - : schema_(schema), ret_types_(ret_types), projector_(std::move(projector)) {} - - SchemaPtr schema() { return schema_; } - FieldVector rettypes() { return ret_types_; } - std::shared_ptr projector() { return projector_; } - - private: - SchemaPtr schema_; - FieldVector ret_types_; - std::shared_ptr projector_; -}; - -class FilterHolder { - public: - FilterHolder(SchemaPtr schema, std::shared_ptr filter) - : schema_(schema), filter_(std::move(filter)) {} - - SchemaPtr schema() { return schema_; } - std::shared_ptr filter() { return filter_; } - - private: - SchemaPtr schema_; - std::shared_ptr filter_; -}; - -} // namespace gandiva diff --git a/java/gandiva/src/main/java/module-info.java b/java/gandiva/src/main/java/module-info.java deleted file mode 100644 index 49deed1857691..0000000000000 --- a/java/gandiva/src/main/java/module-info.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -open module org.apache.arrow.gandiva { - exports org.apache.arrow.gandiva.expression; - exports org.apache.arrow.gandiva.exceptions; - exports org.apache.arrow.gandiva.evaluator; - exports org.apache.arrow.gandiva.ipc; - - requires com.google.common; - requires com.google.protobuf; - requires org.apache.arrow.format; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; - requires org.slf4j; -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java deleted file mode 100644 index 70c181e8ee0b1..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import java.util.Objects; - -/** Used to construct gandiva configuration objects. */ -public class ConfigurationBuilder { - - public long buildConfigInstance(ConfigOptions configOptions) { - return buildConfigInstance(configOptions.optimize, configOptions.targetCPU); - } - - private native long buildConfigInstance(boolean optimize, boolean detectHostCPU); - - public native void releaseConfigInstance(long configId); - - /** ConfigOptions contains the configuration parameters to provide to gandiva. */ - public static class ConfigOptions { - private boolean optimize = true; - private boolean targetCPU = true; - - public static ConfigOptions getDefault() { - return new ConfigOptions(); - } - - public ConfigOptions() {} - - public ConfigOptions withOptimize(boolean optimize) { - this.optimize = optimize; - return this; - } - - public ConfigOptions withTargetCPU(boolean targetCPU) { - this.targetCPU = targetCPU; - return this; - } - - @Override - public int hashCode() { - return Objects.hash(optimize, targetCPU); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof ConfigOptions)) { - return false; - } - return this.optimize == ((ConfigOptions) obj).optimize - && this.targetCPU == ((ConfigOptions) obj).targetCPU; - } - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java deleted file mode 100644 index 13c86285c0aa5..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; - -/** Utility methods for working with {@link Decimal} values. */ -public class DecimalTypeUtil { - private DecimalTypeUtil() {} - - /** Enum for supported mathematical operations. */ - public enum OperationType { - ADD, - SUBTRACT, - MULTIPLY, - DIVIDE, - MOD - } - - private static final int MIN_ADJUSTED_SCALE = 6; - /// The maximum precision representable by a 16-byte decimal - private static final int MAX_PRECISION = 38; - - /** Determines the scale and precision of applying the given operation to the operands. */ - public static Decimal getResultTypeForOperation( - OperationType operation, Decimal operand1, Decimal operand2) { - int s1 = operand1.getScale(); - int s2 = operand2.getScale(); - int p1 = operand1.getPrecision(); - int p2 = operand2.getPrecision(); - int resultScale = 0; - int resultPrecision = 0; - switch (operation) { - case ADD: - case SUBTRACT: - resultScale = Math.max(operand1.getScale(), operand2.getScale()); - resultPrecision = - resultScale - + Math.max( - operand1.getPrecision() - operand1.getScale(), - operand2.getPrecision() - operand2.getScale()) - + 1; - break; - case MULTIPLY: - resultScale = s1 + s2; - resultPrecision = p1 + p2 + 1; - break; - case DIVIDE: - resultScale = - Math.max(MIN_ADJUSTED_SCALE, operand1.getScale() + operand2.getPrecision() + 1); - resultPrecision = - operand1.getPrecision() - operand1.getScale() + operand2.getScale() + resultScale; - break; - case MOD: - resultScale = Math.max(operand1.getScale(), operand2.getScale()); - resultPrecision = - Math.min( - operand1.getPrecision() - operand1.getScale(), - operand2.getPrecision() - operand2.getScale()) - + resultScale; - break; - default: - throw new RuntimeException("Needs support"); - } - return adjustScaleIfNeeded(resultPrecision, resultScale); - } - - private static Decimal adjustScaleIfNeeded(int precision, int scale) { - if (precision > MAX_PRECISION) { - int minScale = Math.min(scale, MIN_ADJUSTED_SCALE); - int delta = precision - MAX_PRECISION; - precision = MAX_PRECISION; - scale = Math.max(scale - delta, minScale); - } - return new Decimal(precision, scale, 128); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java deleted file mode 100644 index 49625edf27a53..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.protobuf.InvalidProtocolBufferException; -import java.util.List; -import java.util.Set; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.gandiva.ipc.GandivaTypes.ExtGandivaType; -import org.apache.arrow.gandiva.ipc.GandivaTypes.GandivaDataTypes; -import org.apache.arrow.gandiva.ipc.GandivaTypes.GandivaFunctions; -import org.apache.arrow.gandiva.ipc.GandivaTypes.GandivaType; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** - * Used to get the functions and data types supported by Gandiva. All types are in Arrow namespace. - */ -public class ExpressionRegistry { - - private static final int BIT_WIDTH8 = 8; - private static final int BIT_WIDTH_16 = 16; - private static final int BIT_WIDTH_32 = 32; - private static final int BIT_WIDTH_64 = 64; - private static final boolean IS_SIGNED_FALSE = false; - private static final boolean IS_SIGNED_TRUE = true; - - private final Set supportedTypes; - private final Set functionSignatures; - - private static volatile ExpressionRegistry INSTANCE; - - private ExpressionRegistry( - Set supportedTypes, Set functionSignatures) { - this.supportedTypes = supportedTypes; - this.functionSignatures = functionSignatures; - } - - /** - * Returns a singleton instance of the class. - * - * @return singleton instance - * @throws GandivaException if error in Gandiva Library integration. - */ - public static ExpressionRegistry getInstance() throws GandivaException { - if (INSTANCE == null) { - synchronized (ExpressionRegistry.class) { - if (INSTANCE == null) { - // ensure library is setup. - JniLoader.getInstance(); - Set typesFromGandiva = getSupportedTypesFromGandiva(); - Set functionsFromGandiva = getSupportedFunctionsFromGandiva(); - INSTANCE = new ExpressionRegistry(typesFromGandiva, functionsFromGandiva); - } - } - } - return INSTANCE; - } - - public Set getSupportedFunctions() { - return functionSignatures; - } - - public Set getSupportedTypes() { - return supportedTypes; - } - - private static Set getSupportedTypesFromGandiva() throws GandivaException { - Set supportedTypes = Sets.newHashSet(); - try { - byte[] gandivaSupportedDataTypes = - new ExpressionRegistryJniHelper().getGandivaSupportedDataTypes(); - GandivaDataTypes gandivaDataTypes = GandivaDataTypes.parseFrom(gandivaSupportedDataTypes); - for (ExtGandivaType type : gandivaDataTypes.getDataTypeList()) { - supportedTypes.add(getArrowType(type)); - } - } catch (InvalidProtocolBufferException invalidProtException) { - throw new GandivaException("Could not get supported types.", invalidProtException); - } - return supportedTypes; - } - - private static Set getSupportedFunctionsFromGandiva() throws GandivaException { - Set supportedTypes = Sets.newHashSet(); - try { - byte[] gandivaSupportedFunctions = - new ExpressionRegistryJniHelper().getGandivaSupportedFunctions(); - GandivaFunctions gandivaFunctions = GandivaFunctions.parseFrom(gandivaSupportedFunctions); - for (GandivaTypes.FunctionSignature protoFunctionSignature : - gandivaFunctions.getFunctionList()) { - - String functionName = protoFunctionSignature.getName(); - ArrowType returnType = getArrowType(protoFunctionSignature.getReturnType()); - List paramTypes = Lists.newArrayList(); - for (ExtGandivaType type : protoFunctionSignature.getParamTypesList()) { - paramTypes.add(getArrowType(type)); - } - FunctionSignature functionSignature = - new FunctionSignature(functionName, returnType, paramTypes); - supportedTypes.add(functionSignature); - } - } catch (InvalidProtocolBufferException invalidProtException) { - throw new GandivaException("Could not get supported functions.", invalidProtException); - } - return supportedTypes; - } - - private static ArrowType getArrowType(ExtGandivaType type) { - switch (type.getType().getNumber()) { - case GandivaType.BOOL_VALUE: - return ArrowType.Bool.INSTANCE; - case GandivaType.UINT8_VALUE: - return new ArrowType.Int(BIT_WIDTH8, IS_SIGNED_FALSE); - case GandivaType.INT8_VALUE: - return new ArrowType.Int(BIT_WIDTH8, IS_SIGNED_TRUE); - case GandivaType.UINT16_VALUE: - return new ArrowType.Int(BIT_WIDTH_16, IS_SIGNED_FALSE); - case GandivaType.INT16_VALUE: - return new ArrowType.Int(BIT_WIDTH_16, IS_SIGNED_TRUE); - case GandivaType.UINT32_VALUE: - return new ArrowType.Int(BIT_WIDTH_32, IS_SIGNED_FALSE); - case GandivaType.INT32_VALUE: - return new ArrowType.Int(BIT_WIDTH_32, IS_SIGNED_TRUE); - case GandivaType.UINT64_VALUE: - return new ArrowType.Int(BIT_WIDTH_64, IS_SIGNED_FALSE); - case GandivaType.INT64_VALUE: - return new ArrowType.Int(BIT_WIDTH_64, IS_SIGNED_TRUE); - case GandivaType.HALF_FLOAT_VALUE: - return new ArrowType.FloatingPoint(FloatingPointPrecision.HALF); - case GandivaType.FLOAT_VALUE: - return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); - case GandivaType.DOUBLE_VALUE: - return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - case GandivaType.UTF8_VALUE: - return new ArrowType.Utf8(); - case GandivaType.BINARY_VALUE: - return new ArrowType.Binary(); - case GandivaType.DATE32_VALUE: - return new ArrowType.Date(DateUnit.DAY); - case GandivaType.DATE64_VALUE: - return new ArrowType.Date(DateUnit.MILLISECOND); - case GandivaType.TIMESTAMP_VALUE: - return new ArrowType.Timestamp(mapArrowTimeUnit(type.getTimeUnit()), null); - case GandivaType.TIME32_VALUE: - return new ArrowType.Time(mapArrowTimeUnit(type.getTimeUnit()), BIT_WIDTH_32); - case GandivaType.TIME64_VALUE: - return new ArrowType.Time(mapArrowTimeUnit(type.getTimeUnit()), BIT_WIDTH_64); - case GandivaType.NONE_VALUE: - return new ArrowType.Null(); - case GandivaType.DECIMAL_VALUE: - return new ArrowType.Decimal(0, 0, 128); - case GandivaType.INTERVAL_VALUE: - return new ArrowType.Interval(mapArrowIntervalUnit(type.getIntervalType())); - case GandivaType.FIXED_SIZE_BINARY_VALUE: - case GandivaType.MAP_VALUE: - case GandivaType.DICTIONARY_VALUE: - case GandivaType.LIST_VALUE: - case GandivaType.STRUCT_VALUE: - case GandivaType.UNION_VALUE: - default: - assert false; - } - return null; - } - - private static TimeUnit mapArrowTimeUnit(GandivaTypes.TimeUnit timeUnit) { - switch (timeUnit.getNumber()) { - case GandivaTypes.TimeUnit.MICROSEC_VALUE: - return TimeUnit.MICROSECOND; - case GandivaTypes.TimeUnit.MILLISEC_VALUE: - return TimeUnit.MILLISECOND; - case GandivaTypes.TimeUnit.NANOSEC_VALUE: - return TimeUnit.NANOSECOND; - case GandivaTypes.TimeUnit.SEC_VALUE: - return TimeUnit.SECOND; - default: - return null; - } - } - - private static IntervalUnit mapArrowIntervalUnit(GandivaTypes.IntervalType intervalType) { - switch (intervalType.getNumber()) { - case GandivaTypes.IntervalType.YEAR_MONTH_VALUE: - return IntervalUnit.YEAR_MONTH; - case GandivaTypes.IntervalType.DAY_TIME_VALUE: - return IntervalUnit.DAY_TIME; - default: - return null; - } - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java deleted file mode 100644 index 6c0ee1f2751a7..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryJniHelper.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -/** JNI Adapter used to get supported types and functions from Gandiva. */ -class ExpressionRegistryJniHelper { - - native byte[] getGandivaSupportedDataTypes(); - - native byte[] getGandivaSupportedFunctions(); -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java deleted file mode 100644 index f584564e0920c..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Filter.java +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.gandiva.exceptions.EvaluatorClosedException; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.expression.ArrowTypeHelper; -import org.apache.arrow.gandiva.expression.Condition; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.ipc.message.ArrowBuffer; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Schema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * This class provides a mechanism to filter a RecordBatch by evaluating a condition expression. - * Follow these steps to use this class: 1) Use the static method make() to create an instance of - * this class that evaluates a condition. 2) Invoke the method evaluate() to evaluate the filter - * against a RecordBatch 3) Invoke close() to release resources - */ -public class Filter { - - private static final Logger logger = LoggerFactory.getLogger(Filter.class); - - private final JniWrapper wrapper; - private final long moduleId; - private final Schema schema; - private boolean closed; - - private Filter(JniWrapper wrapper, long moduleId, Schema schema) { - this.wrapper = wrapper; - this.moduleId = moduleId; - this.schema = schema; - this.closed = false; - } - - /** - * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke - * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param condition condition to be evaluated against data - * @return A native filter object that can be used to invoke on a RecordBatch - */ - public static Filter make(Schema schema, Condition condition) throws GandivaException { - return make(schema, condition, JniLoader.getDefaultConfiguration()); - } - - /** - * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke - * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param condition condition to be evaluated against data - * @param configOptions ConfigOptions parameter - * @return A native filter object that can be used to invoke on a RecordBatch - */ - public static Filter make( - Schema schema, Condition condition, ConfigurationBuilder.ConfigOptions configOptions) - throws GandivaException { - return make(schema, condition, JniLoader.getConfiguration(configOptions)); - } - - /** - * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke - * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param condition condition to be evaluated against data - * @param optimize Flag to choose if the generated llvm code is to be optimized - * @return A native filter object that can be used to invoke on a RecordBatch - */ - @Deprecated - public static Filter make(Schema schema, Condition condition, boolean optimize) - throws GandivaException { - return make( - schema, - condition, - JniLoader.getConfiguration( - (new ConfigurationBuilder.ConfigOptions()).withOptimize(optimize))); - } - - /** - * Invoke this function to generate LLVM code to evaluate the condition expression. Invoke - * Filter::Evaluate() against a RecordBatch to evaluate the filter on this record batch - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param condition condition to be evaluated against data - * @param configurationId Custom configuration created through config builder. - * @return A native evaluator object that can be used to invoke these projections on a RecordBatch - */ - public static Filter make(Schema schema, Condition condition, long configurationId) - throws GandivaException { - // Invoke the JNI layer to create the LLVM module representing the filter. - GandivaTypes.Condition conditionBuf = condition.toProtobuf(); - GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema); - JniWrapper wrapper = JniLoader.getInstance().getWrapper(); - long moduleId = - wrapper.buildFilter(schemaBuf.toByteArray(), conditionBuf.toByteArray(), configurationId); - logger.debug("Created module for the filter with id {}", moduleId); - return new Filter(wrapper, moduleId, schema); - } - - /** - * Invoke this function to evaluate a filter against a recordBatch. - * - * @param recordBatch Record batch including the data - * @param selectionVector Result of applying the filter on the data - */ - public void evaluate(ArrowRecordBatch recordBatch, SelectionVector selectionVector) - throws GandivaException { - evaluate( - recordBatch.getLength(), - recordBatch.getBuffers(), - recordBatch.getBuffersLayout(), - selectionVector); - } - - /** - * Invoke this function to evaluate filter against a set of arrow buffers. (this is an optimised - * version that skips taking references). - * - * @param numRows number of rows. - * @param buffers List of input arrow buffers - * @param selectionVector Result of applying the filter on the data - */ - public void evaluate(int numRows, List buffers, SelectionVector selectionVector) - throws GandivaException { - List buffersLayout = new ArrayList<>(); - long offset = 0; - for (ArrowBuf arrowBuf : buffers) { - long size = arrowBuf.readableBytes(); - buffersLayout.add(new ArrowBuffer(offset, size)); - offset += size; - } - evaluate(numRows, buffers, buffersLayout, selectionVector); - } - - private void evaluate( - int numRows, - List buffers, - List buffersLayout, - SelectionVector selectionVector) - throws GandivaException { - if (this.closed) { - throw new EvaluatorClosedException(); - } - if (selectionVector.getMaxRecords() < numRows) { - logger.error( - "selectionVector has capacity for " - + selectionVector.getMaxRecords() - + " rows, minimum required " - + numRows); - throw new GandivaException("SelectionVector too small"); - } - - long[] bufAddrs = new long[buffers.size()]; - long[] bufSizes = new long[buffers.size()]; - - int idx = 0; - for (ArrowBuf buf : buffers) { - bufAddrs[idx++] = buf.memoryAddress(); - } - - idx = 0; - for (ArrowBuffer bufLayout : buffersLayout) { - bufSizes[idx++] = bufLayout.getSize(); - } - - int numRecords = - wrapper.evaluateFilter( - this.moduleId, - numRows, - bufAddrs, - bufSizes, - selectionVector.getType().getNumber(), - selectionVector.getBuffer().memoryAddress(), - selectionVector.getBuffer().capacity()); - if (numRecords >= 0) { - selectionVector.setRecordCount(numRecords); - } - } - - /** Closes the LLVM module representing this filter. */ - public void close() throws GandivaException { - if (this.closed) { - return; - } - - wrapper.closeFilter(this.moduleId); - this.closed = true; - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java deleted file mode 100644 index 164b6aaac6684..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/FunctionSignature.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; -import java.util.List; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** POJO to define a function signature. */ -public class FunctionSignature { - private final String name; - private final ArrowType returnType; - private final List paramTypes; - - public ArrowType getReturnType() { - return returnType; - } - - public List getParamTypes() { - return paramTypes; - } - - public String getName() { - return name; - } - - /** - * Ctor. - * - * @param name - name of the function. - * @param returnType - data type of return - * @param paramTypes - data type of input args. - */ - public FunctionSignature(String name, ArrowType returnType, List paramTypes) { - this.name = name; - this.returnType = returnType; - this.paramTypes = paramTypes; - } - - /** - * Override equals. - * - * @param signature - signature to compare - * @return true if equal and false if not. - */ - public boolean equals(Object signature) { - if (signature == null) { - return false; - } - if (getClass() != signature.getClass()) { - return false; - } - final FunctionSignature other = (FunctionSignature) signature; - return this.name.equalsIgnoreCase(other.name) - && Objects.equal(this.returnType, other.returnType) - && Objects.equal(this.paramTypes, other.paramTypes); - } - - @Override - public int hashCode() { - return Objects.hashCode(this.name.toLowerCase(), this.returnType, this.paramTypes); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("name ", name) - .add("return type ", returnType) - .add("param types ", paramTypes) - .toString(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java deleted file mode 100644 index 6f4cdc58c5a98..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static java.util.UUID.randomUUID; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.StandardCopyOption; -import java.util.Locale; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import org.apache.arrow.gandiva.exceptions.GandivaException; - -/** This class handles loading of the jni library, and acts as a bridge for the native functions. */ -class JniLoader { - private static final String LIBRARY_NAME = "gandiva_jni"; - - private static volatile JniLoader INSTANCE; - private static volatile long defaultConfiguration = 0L; - private static final ConcurrentMap configurationMap = - new ConcurrentHashMap<>(); - - private final JniWrapper wrapper; - - private JniLoader() { - this.wrapper = new JniWrapper(); - } - - static JniLoader getInstance() throws GandivaException { - if (INSTANCE == null) { - synchronized (JniLoader.class) { - if (INSTANCE == null) { - INSTANCE = setupInstance(); - } - } - } - return INSTANCE; - } - - private static JniLoader setupInstance() throws GandivaException { - try { - String tempDir = System.getProperty("java.io.tmpdir"); - loadGandivaLibraryFromJar(tempDir); - return new JniLoader(); - } catch (IOException ioException) { - throw new GandivaException("unable to create native instance", ioException); - } - } - - private static void loadGandivaLibraryFromJar(final String tmpDir) - throws IOException, GandivaException { - final String libraryToLoad = - LIBRARY_NAME + "/" + getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME); - final File libraryFile = moveFileFromJarToTemp(tmpDir, libraryToLoad, LIBRARY_NAME); - System.load(libraryFile.getAbsolutePath()); - } - - private static String getNormalizedArch() { - String arch = System.getProperty("os.arch").toLowerCase(Locale.US); - switch (arch) { - case "amd64": - arch = "x86_64"; - break; - case "aarch64": - arch = "aarch_64"; - break; - default: - break; - } - return arch; - } - - private static File moveFileFromJarToTemp( - final String tmpDir, String libraryToLoad, String libraryName) - throws IOException, GandivaException { - final File temp = setupFile(tmpDir, libraryName); - try (final InputStream is = - JniLoader.class.getClassLoader().getResourceAsStream(libraryToLoad)) { - if (is == null) { - throw new GandivaException(libraryToLoad + " was not found inside JAR."); - } else { - Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); - } - } - return temp; - } - - private static File setupFile(String tmpDir, String libraryToLoad) - throws IOException, GandivaException { - // accommodate multiple processes running with gandiva jar. - // length should be ok since uuid is only 36 characters. - final String randomizeFileName = libraryToLoad + randomUUID(); - final File temp = new File(tmpDir, randomizeFileName); - if (temp.exists() && !temp.delete()) { - throw new GandivaException( - "File: " + temp.getAbsolutePath() + " already exists and cannot be removed."); - } - if (!temp.createNewFile()) { - throw new GandivaException("File: " + temp.getAbsolutePath() + " could not be created."); - } - temp.deleteOnExit(); - return temp; - } - - /** Returns the jni wrapper. */ - JniWrapper getWrapper() throws GandivaException { - return wrapper; - } - - static long getConfiguration(ConfigurationBuilder.ConfigOptions configOptions) - throws GandivaException { - if (!configurationMap.containsKey(configOptions)) { - synchronized (ConfigurationBuilder.class) { - if (!configurationMap.containsKey(configOptions)) { - JniLoader.getInstance(); // setup - long configInstance = new ConfigurationBuilder().buildConfigInstance(configOptions); - configurationMap.put(configOptions, configInstance); - if (ConfigurationBuilder.ConfigOptions.getDefault().equals(configOptions)) { - defaultConfiguration = configInstance; - } - return configInstance; - } - } - } - return configurationMap.get(configOptions); - } - - /** - * Get the default configuration to invoke gandiva. - * - * @return default configuration - * @throws GandivaException if unable to get native builder instance. - */ - static long getDefaultConfiguration() throws GandivaException { - if (defaultConfiguration == 0L) { - synchronized (ConfigurationBuilder.class) { - if (defaultConfiguration == 0L) { - JniLoader.getInstance(); // setup - ConfigurationBuilder.ConfigOptions defaultConfigOptions = - ConfigurationBuilder.ConfigOptions.getDefault(); - defaultConfiguration = - new ConfigurationBuilder().buildConfigInstance(defaultConfigOptions); - configurationMap.put(defaultConfigOptions, defaultConfiguration); - } - } - } - return defaultConfiguration; - } - - /** Remove the configuration. */ - static void removeConfiguration(ConfigurationBuilder.ConfigOptions configOptions) { - if (configurationMap.containsKey(configOptions)) { - synchronized (ConfigurationBuilder.class) { - if (configurationMap.containsKey(configOptions)) { - (new ConfigurationBuilder()) - .releaseConfigInstance(configurationMap.remove(configOptions)); - if (configOptions.equals(ConfigurationBuilder.ConfigOptions.getDefault())) { - defaultConfiguration = 0; - } - } - } - } - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java deleted file mode 100644 index acb25ebae28a9..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import org.apache.arrow.gandiva.exceptions.GandivaException; - -/** - * This class is implemented in JNI. This provides the Java interface to invoke functions in JNI. - * This file is used to generated the .h files required for jni. Avoid all external dependencies in - * this file. - */ -public class JniWrapper { - - /** - * Generates the projector module to evaluate the expressions with custom configuration. - * - * @param schemaBuf The schema serialized as a protobuf. See Types.proto to see the protobuf - * specification - * @param exprListBuf The serialized protobuf of the expression vector. Each expression is created - * using TreeBuilder::MakeExpression. - * @param selectionVectorType type of selection vector - * @param configId Configuration to gandiva. - * @return A moduleId that is passed to the evaluateProjector() and closeProjector() methods - */ - native long buildProjector( - byte[] schemaBuf, byte[] exprListBuf, int selectionVectorType, long configId) - throws GandivaException; - - /** - * Evaluate the expressions represented by the moduleId on a record batch and store the output in - * ValueVectors. Throws an exception in case of errors - * - * @param expander VectorExpander object. Used for callbacks from cpp. - * @param moduleId moduleId representing expressions. Created using a call to buildNativeCode - * @param numRows Number of rows in the record batch - * @param bufAddrs An array of memory addresses. Each memory address points to a validity vector - * or a data vector (will add support for offset vectors later). - * @param bufSizes An array of buffer sizes. For each memory address in bufAddrs, the size of the - * buffer is present in bufSizes - * @param outAddrs An array of output buffers, including the validity and data addresses. - * @param outSizes The allocated size of the output buffers. On successful evaluation, the result - * is stored in the output buffers - */ - native void evaluateProjector( - Object expander, - long moduleId, - int numRows, - long[] bufAddrs, - long[] bufSizes, - int selectionVectorType, - int selectionVectorSize, - long selectionVectorBufferAddr, - long selectionVectorBufferSize, - long[] outAddrs, - long[] outSizes) - throws GandivaException; - - /** - * Closes the projector referenced by moduleId. - * - * @param moduleId moduleId that needs to be closed - */ - native void closeProjector(long moduleId); - - /** - * Generates the filter module to evaluate the condition expression with custom configuration. - * - * @param schemaBuf The schema serialized as a protobuf. See Types.proto to see the protobuf - * specification - * @param conditionBuf The serialized protobuf of the condition expression. Each expression is - * created using TreeBuilder::MakeCondition - * @param configId Configuration to gandiva. - * @return A moduleId that is passed to the evaluateFilter() and closeFilter() methods - */ - native long buildFilter(byte[] schemaBuf, byte[] conditionBuf, long configId) - throws GandivaException; - - /** - * Evaluate the filter represented by the moduleId on a record batch and store the output in - * buffer 'outAddr'. Throws an exception in case of errors - * - * @param moduleId moduleId representing expressions. Created using a call to buildNativeCode - * @param numRows Number of rows in the record batch - * @param bufAddrs An array of memory addresses. Each memory address points to a validity vector - * or a data vector (will add support for offset vectors later). - * @param bufSizes An array of buffer sizes. For each memory address in bufAddrs, the size of the - * buffer is present in bufSizes - * @param selectionVectorType type of selection vector - * @param outAddr output buffer, whose type is represented by selectionVectorType - * @param outSize The allocated size of the output buffer. On successful evaluation, the result is - * stored in the output buffer - */ - native int evaluateFilter( - long moduleId, - int numRows, - long[] bufAddrs, - long[] bufSizes, - int selectionVectorType, - long outAddr, - long outSize) - throws GandivaException; - - /** - * Closes the filter referenced by moduleId. - * - * @param moduleId moduleId that needs to be closed - */ - native void closeFilter(long moduleId); -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java deleted file mode 100644 index 5c16c46e5ea46..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.gandiva.exceptions.EvaluatorClosedException; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.exceptions.UnsupportedTypeException; -import org.apache.arrow.gandiva.expression.ArrowTypeHelper; -import org.apache.arrow.gandiva.expression.ExpressionTree; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.FixedWidthVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VariableWidthVector; -import org.apache.arrow.vector.ipc.message.ArrowBuffer; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * This class provides a mechanism to evaluate a set of expressions against a RecordBatch. Follow - * these steps to use this class: 1) Use the static method make() to create an instance of this - * class that evaluates a set of expressions 2) Invoke the method evaluate() to evaluate these - * expressions against a RecordBatch 3) Invoke close() to release resources - */ -public class Projector { - private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(Projector.class); - - private JniWrapper wrapper; - private final long moduleId; - private final Schema schema; - private final int numExprs; - private boolean closed; - - private Projector(JniWrapper wrapper, long moduleId, Schema schema, int numExprs) { - this.wrapper = wrapper; - this.moduleId = moduleId; - this.schema = schema; - this.numExprs = numExprs; - this.closed = false; - } - - /** - * Invoke this function to generate LLVM code to evaluate the list of project expressions. Invoke - * Projector::Evaluate() against a RecordBatch to evaluate the record batch against these - * projections. - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param exprs List of expressions to be evaluated against data - * @return A native evaluator object that can be used to invoke these projections on a RecordBatch - */ - public static Projector make(Schema schema, List exprs) throws GandivaException { - return make(schema, exprs, SelectionVectorType.SV_NONE, JniLoader.getDefaultConfiguration()); - } - - /** - * Invoke this function to generate LLVM code to evaluate the list of project expressions. Invoke - * Projector::Evaluate() against a RecordBatch to evaluate the record batch against these - * projections. - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param exprs List of expressions to be evaluated against data - * @param configOptions ConfigOptions parameter - * @return A native evaluator object that can be used to invoke these projections on a RecordBatch - */ - public static Projector make( - Schema schema, List exprs, ConfigurationBuilder.ConfigOptions configOptions) - throws GandivaException { - return make( - schema, exprs, SelectionVectorType.SV_NONE, JniLoader.getConfiguration(configOptions)); - } - - /** - * Invoke this function to generate LLVM code to evaluate the list of project expressions. Invoke - * Projector::Evaluate() against a RecordBatch to evaluate the record batch against these - * projections. - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param exprs List of expressions to be evaluated against data - * @param optimize Flag to choose if the generated llvm code is to be optimized - * @return A native evaluator object that can be used to invoke these projections on a RecordBatch - */ - @Deprecated - public static Projector make(Schema schema, List exprs, boolean optimize) - throws GandivaException { - return make( - schema, - exprs, - SelectionVectorType.SV_NONE, - JniLoader.getConfiguration( - (new ConfigurationBuilder.ConfigOptions()).withOptimize(optimize))); - } - - /** - * Invoke this function to generate LLVM code to evaluate the list of project expressions. Invoke - * Projector::Evaluate() against a RecordBatch to evaluate the record batch against these - * projections. - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param exprs List of expressions to be evaluated against data - * @param selectionVectorType type of selection vector - * @return A native evaluator object that can be used to invoke these projections on a RecordBatch - */ - public static Projector make( - Schema schema, List exprs, SelectionVectorType selectionVectorType) - throws GandivaException { - return make(schema, exprs, selectionVectorType, JniLoader.getDefaultConfiguration()); - } - - /** - * Invoke this function to generate LLVM code to evaluate the list of project expressions. Invoke - * Projector::Evaluate() against a RecordBatch to evaluate the record batch against these - * projections. - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param exprs List of expressions to be evaluated against data - * @param selectionVectorType type of selection vector - * @param configOptions ConfigOptions parameter - * @return A native evaluator object that can be used to invoke these projections on a RecordBatch - */ - public static Projector make( - Schema schema, - List exprs, - SelectionVectorType selectionVectorType, - ConfigurationBuilder.ConfigOptions configOptions) - throws GandivaException { - return make(schema, exprs, selectionVectorType, JniLoader.getConfiguration(configOptions)); - } - - /** - * Invoke this function to generate LLVM code to evaluate the list of project expressions. Invoke - * Projector::Evaluate() against a RecordBatch to evaluate the record batch against these - * projections. - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param exprs List of expressions to be evaluated against data - * @param selectionVectorType type of selection vector - * @param optimize Flag to choose if the generated llvm code is to be optimized - * @return A native evaluator object that can be used to invoke these projections on a RecordBatch - */ - @Deprecated - public static Projector make( - Schema schema, - List exprs, - SelectionVectorType selectionVectorType, - boolean optimize) - throws GandivaException { - return make( - schema, - exprs, - selectionVectorType, - JniLoader.getConfiguration( - (new ConfigurationBuilder.ConfigOptions()).withOptimize(optimize))); - } - - /** - * Invoke this function to generate LLVM code to evaluate the list of project expressions. Invoke - * Projector::Evaluate() against a RecordBatch to evaluate the record batch against these - * projections. - * - * @param schema Table schema. The field names in the schema should match the fields used to - * create the TreeNodes - * @param exprs List of expressions to be evaluated against data - * @param selectionVectorType type of selection vector - * @param configurationId Custom configuration created through config builder. - * @return A native evaluator object that can be used to invoke these projections on a RecordBatch - */ - public static Projector make( - Schema schema, - List exprs, - SelectionVectorType selectionVectorType, - long configurationId) - throws GandivaException { - // serialize the schema and the list of expressions as a protobuf - GandivaTypes.ExpressionList.Builder builder = GandivaTypes.ExpressionList.newBuilder(); - for (ExpressionTree expr : exprs) { - builder.addExprs(expr.toProtobuf()); - } - - // Invoke the JNI layer to create the LLVM module representing the expressions - GandivaTypes.Schema schemaBuf = ArrowTypeHelper.arrowSchemaToProtobuf(schema); - JniWrapper wrapper = JniLoader.getInstance().getWrapper(); - long moduleId = - wrapper.buildProjector( - schemaBuf.toByteArray(), - builder.build().toByteArray(), - selectionVectorType.getNumber(), - configurationId); - logger.debug("Created module for the projector with id {}", moduleId); - return new Projector(wrapper, moduleId, schema, exprs.size()); - } - - /** - * Invoke this function to evaluate a set of expressions against a recordBatch. - * - * @param recordBatch Record batch including the data - * @param outColumns Result of applying the project on the data - */ - public void evaluate(ArrowRecordBatch recordBatch, List outColumns) - throws GandivaException { - evaluate( - recordBatch.getLength(), - recordBatch.getBuffers(), - recordBatch.getBuffersLayout(), - SelectionVectorType.SV_NONE.getNumber(), - recordBatch.getLength(), - 0, - 0, - outColumns); - } - - /** - * Invoke this function to evaluate a set of expressions against a set of arrow buffers. (this is - * an optimised version that skips taking references). - * - * @param numRows number of rows. - * @param buffers List of input arrow buffers - * @param outColumns Result of applying the project on the data - */ - public void evaluate(int numRows, List buffers, List outColumns) - throws GandivaException { - List buffersLayout = new ArrayList<>(); - long offset = 0; - for (ArrowBuf arrowBuf : buffers) { - long size = arrowBuf.readableBytes(); - buffersLayout.add(new ArrowBuffer(offset, size)); - offset += size; - } - evaluate( - numRows, - buffers, - buffersLayout, - SelectionVectorType.SV_NONE.getNumber(), - numRows, - 0, - 0, - outColumns); - } - - /** - * Invoke this function to evaluate a set of expressions against a {@link ArrowRecordBatch}. - * - * @param recordBatch The data to evaluate against. - * @param selectionVector Selection vector which stores the selected rows. - * @param outColumns Result of applying the project on the data - */ - public void evaluate( - ArrowRecordBatch recordBatch, SelectionVector selectionVector, List outColumns) - throws GandivaException { - evaluate( - recordBatch.getLength(), - recordBatch.getBuffers(), - recordBatch.getBuffersLayout(), - selectionVector.getType().getNumber(), - selectionVector.getRecordCount(), - selectionVector.getBuffer().memoryAddress(), - selectionVector.getBuffer().capacity(), - outColumns); - } - - /** - * Invoke this function to evaluate a set of expressions against a set of arrow buffers on the - * selected positions. (this is an optimised version that skips taking references). - * - * @param numRows number of rows. - * @param buffers List of input arrow buffers - * @param selectionVector Selection vector which stores the selected rows. - * @param outColumns Result of applying the project on the data - */ - public void evaluate( - int numRows, - List buffers, - SelectionVector selectionVector, - List outColumns) - throws GandivaException { - List buffersLayout = new ArrayList<>(); - long offset = 0; - for (ArrowBuf arrowBuf : buffers) { - long size = arrowBuf.readableBytes(); - buffersLayout.add(new ArrowBuffer(offset, size)); - offset += size; - } - evaluate( - numRows, - buffers, - buffersLayout, - selectionVector.getType().getNumber(), - selectionVector.getRecordCount(), - selectionVector.getBuffer().memoryAddress(), - selectionVector.getBuffer().capacity(), - outColumns); - } - - private void evaluate( - int numRows, - List buffers, - List buffersLayout, - int selectionVectorType, - int selectionVectorRecordCount, - long selectionVectorAddr, - long selectionVectorSize, - List outColumns) - throws GandivaException { - if (this.closed) { - throw new EvaluatorClosedException(); - } - - if (numExprs != outColumns.size()) { - logger.info("Expected " + numExprs + " columns, got " + outColumns.size()); - throw new GandivaException("Incorrect number of columns for the output vector"); - } - - long[] bufAddrs = new long[buffers.size()]; - long[] bufSizes = new long[buffers.size()]; - - int idx = 0; - for (ArrowBuf buf : buffers) { - bufAddrs[idx++] = buf.memoryAddress(); - } - - idx = 0; - for (ArrowBuffer bufLayout : buffersLayout) { - bufSizes[idx++] = bufLayout.getSize(); - } - - boolean hasVariableWidthColumns = false; - BaseVariableWidthVector[] resizableVectors = new BaseVariableWidthVector[outColumns.size()]; - long[] outAddrs = new long[3 * outColumns.size()]; - long[] outSizes = new long[3 * outColumns.size()]; - idx = 0; - int outColumnIdx = 0; - for (ValueVector valueVector : outColumns) { - boolean isFixedWith = valueVector instanceof FixedWidthVector; - boolean isVarWidth = valueVector instanceof VariableWidthVector; - if (!isFixedWith && !isVarWidth) { - throw new UnsupportedTypeException( - "Unsupported value vector type " + valueVector.getField().getFieldType()); - } - - outAddrs[idx] = valueVector.getValidityBuffer().memoryAddress(); - outSizes[idx++] = valueVector.getValidityBuffer().capacity(); - if (isVarWidth) { - outAddrs[idx] = valueVector.getOffsetBuffer().memoryAddress(); - outSizes[idx++] = valueVector.getOffsetBuffer().capacity(); - hasVariableWidthColumns = true; - - // save vector to allow for resizing. - resizableVectors[outColumnIdx] = (BaseVariableWidthVector) valueVector; - } - outAddrs[idx] = valueVector.getDataBuffer().memoryAddress(); - outSizes[idx++] = valueVector.getDataBuffer().capacity(); - - valueVector.setValueCount(selectionVectorRecordCount); - outColumnIdx++; - } - - wrapper.evaluateProjector( - hasVariableWidthColumns ? new VectorExpander(resizableVectors) : null, - this.moduleId, - numRows, - bufAddrs, - bufSizes, - selectionVectorType, - selectionVectorRecordCount, - selectionVectorAddr, - selectionVectorSize, - outAddrs, - outSizes); - } - - /** Closes the LLVM module representing this evaluator. */ - public void close() throws GandivaException { - if (this.closed) { - return; - } - - wrapper.closeProjector(this.moduleId); - this.closed = true; - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVector.java deleted file mode 100644 index 4bb1f2be74a0e..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVector.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; - -import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType; -import org.apache.arrow.memory.ArrowBuf; - -/** - * A selection vector contains the indexes of "selected" records in a row batch. It is backed by an - * arrow buffer. Client manages the lifecycle of the arrow buffer - to release the reference. - */ -public abstract class SelectionVector { - private int recordCount; - private ArrowBuf buffer; - - public SelectionVector(ArrowBuf buffer) { - this.buffer = buffer; - } - - public final ArrowBuf getBuffer() { - return this.buffer; - } - - /* - * The maximum number of records that the selection vector can hold. - */ - public final int getMaxRecords() { - return capAtMaxInt(buffer.capacity() / getRecordSize()); - } - - /* - * The number of records held by the selection vector. - */ - public final int getRecordCount() { - return this.recordCount; - } - - /* - * Set the number of records in the selection vector. - */ - final void setRecordCount(int recordCount) { - if (recordCount * getRecordSize() > buffer.capacity()) { - throw new IllegalArgumentException( - "recordCount " - + recordCount - + " of size " - + getRecordSize() - + " exceeds buffer capacity " - + buffer.capacity()); - } - - this.recordCount = recordCount; - } - - /* - * Get the value at specified index. - */ - public abstract int getIndex(int index); - - /* - * Get the record size of the selection vector itself. - */ - abstract int getRecordSize(); - - abstract SelectionVectorType getType(); - - final void checkReadBounds(int index) { - if (index >= this.recordCount) { - throw new IllegalArgumentException("index " + index + " is >= recordCount " + recordCount); - } - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt16.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt16.java deleted file mode 100644 index 8ef35d0073cbb..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt16.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType; -import org.apache.arrow.memory.ArrowBuf; - -/** Selection vector with records of arrow type INT16. */ -public class SelectionVectorInt16 extends SelectionVector { - - public SelectionVectorInt16(ArrowBuf buffer) { - super(buffer); - } - - @Override - public int getRecordSize() { - return 2; - } - - @Override - public SelectionVectorType getType() { - return SelectionVectorType.SV_INT16; - } - - @Override - public int getIndex(int index) { - checkReadBounds(index); - - char value = getBuffer().getChar(index * getRecordSize()); - return (int) value; - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt32.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt32.java deleted file mode 100644 index 324dd54be244b..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/SelectionVectorInt32.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType; -import org.apache.arrow.memory.ArrowBuf; - -/** Selection vector with records of arrow type INT32. */ -public class SelectionVectorInt32 extends SelectionVector { - - public SelectionVectorInt32(ArrowBuf buffer) { - super(buffer); - } - - @Override - public int getRecordSize() { - return 4; - } - - @Override - public SelectionVectorType getType() { - return SelectionVectorType.SV_INT32; - } - - @Override - public int getIndex(int index) { - checkReadBounds(index); - - return getBuffer().getInt(index * getRecordSize()); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java deleted file mode 100644 index a50a1f5677006..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import org.apache.arrow.vector.BaseVariableWidthVector; - -/** - * This class provides the functionality to expand output vectors using a callback mechanism from - * gandiva. - */ -public class VectorExpander { - private final BaseVariableWidthVector[] vectors; - - public VectorExpander(BaseVariableWidthVector[] vectors) { - this.vectors = vectors; - } - - /** Result of vector expansion. */ - public static class ExpandResult { - public long address; - public long capacity; - - public ExpandResult(long address, long capacity) { - this.address = address; - this.capacity = capacity; - } - } - - /** - * Expand vector at specified index. This is used as a back call from jni, and is only relevant - * for variable width vectors. - * - * @param index index of buffer in the list passed to jni. - * @param toCapacity the size to which the buffer should be expanded to. - * @return address and size of the buffer after expansion. - */ - public ExpandResult expandOutputVectorAtIndex(int index, long toCapacity) { - if (index >= vectors.length || vectors[index] == null) { - throw new IllegalArgumentException("invalid index " + index); - } - - BaseVariableWidthVector vector = vectors[index]; - while (vector.getDataBuffer().capacity() < toCapacity) { - vector.reallocDataBuffer(); - } - return new ExpandResult( - vector.getDataBuffer().memoryAddress(), vector.getDataBuffer().capacity()); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/EvaluatorClosedException.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/EvaluatorClosedException.java deleted file mode 100644 index 0526e2cdeaf67..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/EvaluatorClosedException.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.exceptions; - -/** Indicates an attempted call to methods on a closed evaluator. */ -public class EvaluatorClosedException extends GandivaException { - public EvaluatorClosedException() { - super("Cannot invoke methods on evaluator after closing it"); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java deleted file mode 100644 index 4f3f0f8fcf29d..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/GandivaException.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.exceptions; - -/** Base class for all specialized exceptions this package uses. */ -public class GandivaException extends Exception { - - public GandivaException(String msg) { - super(msg); - } - - public GandivaException(String msg, Exception cause) { - super(msg, cause); - } - - @Override - public String toString() { - return getMessage(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/UnsupportedTypeException.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/UnsupportedTypeException.java deleted file mode 100644 index 82b4b371d2bee..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/exceptions/UnsupportedTypeException.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.exceptions; - -/** Represents an exception thrown while dealing with unsupported types. */ -public class UnsupportedTypeException extends GandivaException { - public UnsupportedTypeException(String msg) { - super(msg); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/AndNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/AndNode.java deleted file mode 100644 index 5fdff20b2a8c5..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/AndNode.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import java.util.List; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** Node representing a logical And expression. */ -class AndNode implements TreeNode { - private final List children; - - AndNode(List children) { - this.children = children; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.AndNode.Builder andNode = GandivaTypes.AndNode.newBuilder(); - - for (TreeNode arg : children) { - andNode.addArgs(arg.toProtobuf()); - } - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setAndNode(andNode.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java deleted file mode 100644 index 1bfac58b270c1..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java +++ /dev/null @@ -1,400 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.flatbuf.DateUnit; -import org.apache.arrow.flatbuf.IntervalUnit; -import org.apache.arrow.flatbuf.TimeUnit; -import org.apache.arrow.flatbuf.Type; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.exceptions.UnsupportedTypeException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Utility methods to convert between Arrow and Gandiva types. */ -public class ArrowTypeHelper { - private ArrowTypeHelper() {} - - static final int WIDTH_8 = 8; - static final int WIDTH_16 = 16; - static final int WIDTH_32 = 32; - static final int WIDTH_64 = 64; - - private static void initArrowTypeInt( - ArrowType.Int intType, GandivaTypes.ExtGandivaType.Builder builder) throws GandivaException { - int width = intType.getBitWidth(); - - if (intType.getIsSigned()) { - switch (width) { - case WIDTH_8: - { - builder.setType(GandivaTypes.GandivaType.INT8); - return; - } - case WIDTH_16: - { - builder.setType(GandivaTypes.GandivaType.INT16); - return; - } - case WIDTH_32: - { - builder.setType(GandivaTypes.GandivaType.INT32); - return; - } - case WIDTH_64: - { - builder.setType(GandivaTypes.GandivaType.INT64); - return; - } - default: - { - throw new UnsupportedTypeException("Unsupported width for integer type"); - } - } - } - - // unsigned int - switch (width) { - case WIDTH_8: - { - builder.setType(GandivaTypes.GandivaType.UINT8); - return; - } - case WIDTH_16: - { - builder.setType(GandivaTypes.GandivaType.UINT16); - return; - } - case WIDTH_32: - { - builder.setType(GandivaTypes.GandivaType.UINT32); - return; - } - case WIDTH_64: - { - builder.setType(GandivaTypes.GandivaType.UINT64); - return; - } - default: - { - throw new UnsupportedTypeException("Unsupported width for integer type"); - } - } - } - - private static void initArrowTypeFloat( - ArrowType.FloatingPoint floatType, GandivaTypes.ExtGandivaType.Builder builder) - throws GandivaException { - switch (floatType.getPrecision()) { - case HALF: - { - builder.setType(GandivaTypes.GandivaType.HALF_FLOAT); - break; - } - case SINGLE: - { - builder.setType(GandivaTypes.GandivaType.FLOAT); - break; - } - case DOUBLE: - { - builder.setType(GandivaTypes.GandivaType.DOUBLE); - break; - } - default: - { - throw new UnsupportedTypeException("Floating point type with unknown precision"); - } - } - } - - private static void initArrowTypeDecimal( - ArrowType.Decimal decimalType, GandivaTypes.ExtGandivaType.Builder builder) { - Preconditions.checkArgument( - decimalType.getPrecision() > 0 && decimalType.getPrecision() <= 38, - "Gandiva only supports decimals of upto 38 " - + "precision. Input precision : " - + decimalType.getPrecision()); - builder.setPrecision(decimalType.getPrecision()); - builder.setScale(decimalType.getScale()); - builder.setType(GandivaTypes.GandivaType.DECIMAL); - } - - private static void initArrowTypeDate( - ArrowType.Date dateType, GandivaTypes.ExtGandivaType.Builder builder) { - short dateUnit = dateType.getUnit().getFlatbufID(); - switch (dateUnit) { - case DateUnit.DAY: - { - builder.setType(GandivaTypes.GandivaType.DATE32); - break; - } - case DateUnit.MILLISECOND: - { - builder.setType(GandivaTypes.GandivaType.DATE64); - break; - } - default: - { - // not supported - break; - } - } - } - - private static void initArrowTypeTime( - ArrowType.Time timeType, GandivaTypes.ExtGandivaType.Builder builder) { - short timeUnit = timeType.getUnit().getFlatbufID(); - switch (timeUnit) { - case TimeUnit.SECOND: - { - builder.setType(GandivaTypes.GandivaType.TIME32); - builder.setTimeUnit(GandivaTypes.TimeUnit.SEC); - break; - } - case TimeUnit.MILLISECOND: - { - builder.setType(GandivaTypes.GandivaType.TIME32); - builder.setTimeUnit(GandivaTypes.TimeUnit.MILLISEC); - break; - } - case TimeUnit.MICROSECOND: - { - builder.setType(GandivaTypes.GandivaType.TIME64); - builder.setTimeUnit(GandivaTypes.TimeUnit.MICROSEC); - break; - } - case TimeUnit.NANOSECOND: - { - builder.setType(GandivaTypes.GandivaType.TIME64); - builder.setTimeUnit(GandivaTypes.TimeUnit.NANOSEC); - break; - } - default: - { - // not supported - } - } - } - - private static void initArrowTypeTimestamp( - ArrowType.Timestamp timestampType, GandivaTypes.ExtGandivaType.Builder builder) { - short timeUnit = timestampType.getUnit().getFlatbufID(); - switch (timeUnit) { - case TimeUnit.SECOND: - { - builder.setType(GandivaTypes.GandivaType.TIMESTAMP); - builder.setTimeUnit(GandivaTypes.TimeUnit.SEC); - break; - } - case TimeUnit.MILLISECOND: - { - builder.setType(GandivaTypes.GandivaType.TIMESTAMP); - builder.setTimeUnit(GandivaTypes.TimeUnit.MILLISEC); - break; - } - case TimeUnit.MICROSECOND: - { - builder.setType(GandivaTypes.GandivaType.TIMESTAMP); - builder.setTimeUnit(GandivaTypes.TimeUnit.MICROSEC); - break; - } - case TimeUnit.NANOSECOND: - { - builder.setType(GandivaTypes.GandivaType.TIMESTAMP); - builder.setTimeUnit(GandivaTypes.TimeUnit.NANOSEC); - break; - } - default: - { - // not supported - } - } - } - - private static void initArrowTypeInterval( - ArrowType.Interval interval, GandivaTypes.ExtGandivaType.Builder builder) { - short intervalUnit = interval.getUnit().getFlatbufID(); - switch (intervalUnit) { - case IntervalUnit.YEAR_MONTH: - { - builder.setType(GandivaTypes.GandivaType.INTERVAL); - builder.setIntervalType(GandivaTypes.IntervalType.YEAR_MONTH); - break; - } - case IntervalUnit.DAY_TIME: - { - builder.setType(GandivaTypes.GandivaType.INTERVAL); - builder.setIntervalType(GandivaTypes.IntervalType.DAY_TIME); - break; - } - default: - { - // not supported - } - } - } - - /** - * Converts an arrow type into a protobuf. - * - * @param arrowType Arrow type to be converted - * @return Protobuf representing the arrow type - */ - public static GandivaTypes.ExtGandivaType arrowTypeToProtobuf(ArrowType arrowType) - throws GandivaException { - GandivaTypes.ExtGandivaType.Builder builder = GandivaTypes.ExtGandivaType.newBuilder(); - - byte typeId = arrowType.getTypeID().getFlatbufID(); - switch (typeId) { - case Type.NONE: - { // 0 - builder.setType(GandivaTypes.GandivaType.NONE); - break; - } - case Type.Null: - { // 1 - // TODO: Need to handle this later - break; - } - case Type.Int: - { // 2 - ArrowTypeHelper.initArrowTypeInt((ArrowType.Int) arrowType, builder); - break; - } - case Type.FloatingPoint: - { // 3 - ArrowTypeHelper.initArrowTypeFloat((ArrowType.FloatingPoint) arrowType, builder); - break; - } - case Type.Binary: - { // 4 - builder.setType(GandivaTypes.GandivaType.BINARY); - break; - } - case Type.Utf8: - { // 5 - builder.setType(GandivaTypes.GandivaType.UTF8); - break; - } - case Type.Bool: - { // 6 - builder.setType(GandivaTypes.GandivaType.BOOL); - break; - } - case Type.Decimal: - { // 7 - ArrowTypeHelper.initArrowTypeDecimal((ArrowType.Decimal) arrowType, builder); - break; - } - case Type.Date: - { // 8 - ArrowTypeHelper.initArrowTypeDate((ArrowType.Date) arrowType, builder); - break; - } - case Type.Time: - { // 9 - ArrowTypeHelper.initArrowTypeTime((ArrowType.Time) arrowType, builder); - break; - } - case Type.Timestamp: - { // 10 - ArrowTypeHelper.initArrowTypeTimestamp((ArrowType.Timestamp) arrowType, builder); - break; - } - case Type.Interval: - { // 11 - ArrowTypeHelper.initArrowTypeInterval((ArrowType.Interval) arrowType, builder); - break; - } - case Type.List: - { // 12 - break; - } - case Type.Struct_: - { // 13 - break; - } - case Type.Union: - { // 14 - break; - } - case Type.FixedSizeBinary: - { // 15 - break; - } - case Type.FixedSizeList: - { // 16 - break; - } - case Type.Map: - { // 17 - break; - } - default: - { - break; - } - } - - if (!builder.hasType()) { - // type has not been set - // throw an exception - throw new UnsupportedTypeException("Unsupported type " + arrowType.toString()); - } - - return builder.build(); - } - - /** - * Converts an arrow field object to a protobuf. - * - * @param field Arrow field to be converted - * @return Protobuf representing the arrow field - */ - public static GandivaTypes.Field arrowFieldToProtobuf(Field field) throws GandivaException { - GandivaTypes.Field.Builder builder = GandivaTypes.Field.newBuilder(); - builder.setName(field.getName()); - builder.setType(ArrowTypeHelper.arrowTypeToProtobuf(field.getType())); - builder.setNullable(field.isNullable()); - - for (Field child : field.getChildren()) { - builder.addChildren(ArrowTypeHelper.arrowFieldToProtobuf(child)); - } - - return builder.build(); - } - - /** - * Converts a schema object to a protobuf. - * - * @param schema Schema object to be converted - * @return Protobuf representing a schema object - */ - public static GandivaTypes.Schema arrowSchemaToProtobuf(Schema schema) throws GandivaException { - GandivaTypes.Schema.Builder builder = GandivaTypes.Schema.newBuilder(); - - for (Field field : schema.getFields()) { - builder.addColumns(ArrowTypeHelper.arrowFieldToProtobuf(field)); - } - - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BinaryNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BinaryNode.java deleted file mode 100644 index 2559a398fdfe8..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BinaryNode.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import com.google.protobuf.ByteString; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** Used to represent expression tree nodes representing binary constants. */ -class BinaryNode implements TreeNode { - private final byte[] value; - - public BinaryNode(byte[] value) { - this.value = value; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.BinaryNode binaryNode = - GandivaTypes.BinaryNode.newBuilder().setValue(ByteString.copyFrom(value)).build(); - - return GandivaTypes.TreeNode.newBuilder().setBinaryNode(binaryNode).build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BooleanNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BooleanNode.java deleted file mode 100644 index 5cbc0ace3ea7e..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/BooleanNode.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** - * Used to represent expression tree nodes representing boolean constants. Used while creating - * expressions like if (!x). - */ -class BooleanNode implements TreeNode { - private final Boolean value; - - BooleanNode(Boolean value) { - this.value = value; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.BooleanNode.Builder boolBuilder = GandivaTypes.BooleanNode.newBuilder(); - boolBuilder.setValue(value.booleanValue()); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setBooleanNode(boolBuilder.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/Condition.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/Condition.java deleted file mode 100644 index 89cc39812cd10..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/Condition.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** Opaque class representing a filter condition. */ -public class Condition { - private final TreeNode root; - - Condition(TreeNode root) { - this.root = root; - } - - /** - * Converts an condition expression into a protobuf. - * - * @return A protobuf representing the condition expression tree - */ - public GandivaTypes.Condition toProtobuf() throws GandivaException { - GandivaTypes.Condition.Builder builder = GandivaTypes.Condition.newBuilder(); - builder.setRoot(root.toProtobuf()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java deleted file mode 100644 index 0d0f72226e847..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DecimalNode.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** - * Used to represent expression tree nodes representing decimal constants. Used in the expression (x - * + 5.0) - */ -class DecimalNode implements TreeNode { - private final String value; - private final int precision; - private final int scale; - - DecimalNode(String value, int precision, int scale) { - this.value = value; - this.precision = precision; - this.scale = scale; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.DecimalNode.Builder decimalNode = GandivaTypes.DecimalNode.newBuilder(); - decimalNode.setValue(value); - decimalNode.setPrecision(precision); - decimalNode.setScale(scale); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setDecimalNode(decimalNode.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DoubleNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DoubleNode.java deleted file mode 100644 index 0fbb92d8a5678..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/DoubleNode.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** - * Used to represent expression tree nodes representing double constants. Used in the expression (x - * + 5.0) - */ -class DoubleNode implements TreeNode { - private final Double value; - - DoubleNode(Double value) { - this.value = value; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.DoubleNode.Builder doubleBuilder = GandivaTypes.DoubleNode.newBuilder(); - doubleBuilder.setValue(value.doubleValue()); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setDoubleNode(doubleBuilder.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ExpressionTree.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ExpressionTree.java deleted file mode 100644 index 67ad8eef53b8f..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ExpressionTree.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.vector.types.pojo.Field; - -/** Opaque class representing an expression. */ -public class ExpressionTree { - private final TreeNode root; - private final Field resultField; - - ExpressionTree(TreeNode root, Field resultField) { - this.root = root; - this.resultField = resultField; - } - - /** - * Converts an expression tree into a protobuf. - * - * @return A protobuf representing the expression tree - */ - public GandivaTypes.ExpressionRoot toProtobuf() throws GandivaException { - GandivaTypes.ExpressionRoot.Builder builder = GandivaTypes.ExpressionRoot.newBuilder(); - builder.setRoot(root.toProtobuf()); - builder.setResultType(ArrowTypeHelper.arrowFieldToProtobuf(resultField)); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FieldNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FieldNode.java deleted file mode 100644 index b1667fdaec6bf..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FieldNode.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.vector.types.pojo.Field; - -/** Opaque class that represents a tree node. */ -class FieldNode implements TreeNode { - private final Field field; - - FieldNode(Field field) { - this.field = field; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.FieldNode.Builder fieldNode = GandivaTypes.FieldNode.newBuilder(); - fieldNode.setField(ArrowTypeHelper.arrowFieldToProtobuf(field)); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setFieldNode(fieldNode.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FloatNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FloatNode.java deleted file mode 100644 index ae152372212ef..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FloatNode.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** - * Used to represent expression tree nodes representing float constants. Used in the expression (x + - * 5.0) - */ -class FloatNode implements TreeNode { - private final Float value; - - public FloatNode(Float value) { - this.value = value; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.FloatNode.Builder floatBuilder = GandivaTypes.FloatNode.newBuilder(); - floatBuilder.setValue(value.floatValue()); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setFloatNode(floatBuilder.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java deleted file mode 100644 index a5c6fb77f9623..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/FunctionNode.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import java.util.List; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** Node representing an arbitrary function in an expression. */ -class FunctionNode implements TreeNode { - private final String function; - private final List children; - private final ArrowType retType; - - FunctionNode(String function, List children, ArrowType retType) { - this.function = function; - this.children = children; - this.retType = retType; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.FunctionNode.Builder fnNode = GandivaTypes.FunctionNode.newBuilder(); - fnNode.setFunctionName(function); - fnNode.setReturnType(ArrowTypeHelper.arrowTypeToProtobuf(retType)); - - for (TreeNode arg : children) { - fnNode.addInArgs(arg.toProtobuf()); - } - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setFnNode(fnNode.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IfNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IfNode.java deleted file mode 100644 index cbb3f80f4e357..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IfNode.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** Node representing a if-then-else block expression. */ -class IfNode implements TreeNode { - private final TreeNode condition; - private final TreeNode thenNode; - private final TreeNode elseNode; - private final ArrowType retType; - - IfNode(TreeNode condition, TreeNode thenNode, TreeNode elseNode, ArrowType retType) { - this.condition = condition; - this.thenNode = thenNode; - this.elseNode = elseNode; - this.retType = retType; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.IfNode.Builder ifNodeBuilder = GandivaTypes.IfNode.newBuilder(); - ifNodeBuilder.setCond(condition.toProtobuf()); - ifNodeBuilder.setThenNode(thenNode.toProtobuf()); - ifNodeBuilder.setElseNode(elseNode.toProtobuf()); - ifNodeBuilder.setReturnType(ArrowTypeHelper.arrowTypeToProtobuf(retType)); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setIfNode(ifNodeBuilder.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java deleted file mode 100644 index 2089c9b7cbe67..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import com.google.protobuf.ByteString; -import java.math.BigDecimal; -import java.nio.charset.Charset; -import java.util.Set; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** In Node representation in java. */ -public class InNode implements TreeNode { - private static final Charset charset = Charset.forName("UTF-8"); - - private final Set intValues; - private final Set longValues; - private final Set floatValues; - private final Set doubleValues; - private final Set decimalValues; - private final Set stringValues; - private final Set binaryValues; - private final TreeNode input; - - private final Integer precision; - private final Integer scale; - - private InNode( - Set values, - Set longValues, - Set stringValues, - Set binaryValues, - Set decimalValues, - Integer precision, - Integer scale, - Set floatValues, - Set doubleValues, - TreeNode node) { - this.intValues = values; - this.longValues = longValues; - this.decimalValues = decimalValues; - this.precision = precision; - this.scale = scale; - this.stringValues = stringValues; - this.binaryValues = binaryValues; - this.floatValues = floatValues; - this.doubleValues = doubleValues; - this.input = node; - } - - /** - * Makes an IN node for int values. - * - * @param node Node with the 'IN' clause. - * @param intValues Int values to build the IN node. - * @return InNode referring to tree node. - */ - public static InNode makeIntInExpr(TreeNode node, Set intValues) { - return new InNode(intValues, null, null, null, null, null, null, null, null, node); - } - - /** - * Makes an IN node for long values. - * - * @param node Node with the 'IN' clause. - * @param longValues Long values to build the IN node. - * @return InNode referring to tree node. - */ - public static InNode makeLongInExpr(TreeNode node, Set longValues) { - return new InNode(null, longValues, null, null, null, null, null, null, null, node); - } - - /** - * Makes an IN node for float values. - * - * @param node Node with the 'IN' clause. - * @param floatValues Float values to build the IN node. - * @return InNode referring to tree node. - */ - public static InNode makeFloatInExpr(TreeNode node, Set floatValues) { - return new InNode(null, null, null, null, null, null, null, floatValues, null, node); - } - - /** - * Makes an IN node for double values. - * - * @param node Node with the 'IN' clause. - * @param doubleValues Double values to build the IN node. - * @return InNode referring to tree node. - */ - public static InNode makeDoubleInExpr(TreeNode node, Set doubleValues) { - return new InNode(null, null, null, null, null, null, null, null, doubleValues, node); - } - - public static InNode makeDecimalInExpr( - TreeNode node, Set decimalValues, Integer precision, Integer scale) { - return new InNode(null, null, null, null, decimalValues, precision, scale, null, null, node); - } - - public static InNode makeStringInExpr(TreeNode node, Set stringValues) { - return new InNode(null, null, stringValues, null, null, null, null, null, null, node); - } - - public static InNode makeBinaryInExpr(TreeNode node, Set binaryValues) { - return new InNode(null, null, null, binaryValues, null, null, null, null, null, node); - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.InNode.Builder inNode = GandivaTypes.InNode.newBuilder(); - - inNode.setNode(input.toProtobuf()); - - if (intValues != null) { - GandivaTypes.IntConstants.Builder intConstants = GandivaTypes.IntConstants.newBuilder(); - intValues.stream() - .forEach( - val -> - intConstants.addIntValues( - GandivaTypes.IntNode.newBuilder().setValue(val).build())); - inNode.setIntValues(intConstants.build()); - } else if (longValues != null) { - GandivaTypes.LongConstants.Builder longConstants = GandivaTypes.LongConstants.newBuilder(); - longValues.stream() - .forEach( - val -> - longConstants.addLongValues( - GandivaTypes.LongNode.newBuilder().setValue(val).build())); - inNode.setLongValues(longConstants.build()); - } else if (floatValues != null) { - GandivaTypes.FloatConstants.Builder floatConstants = GandivaTypes.FloatConstants.newBuilder(); - floatValues.stream() - .forEach( - val -> - floatConstants.addFloatValues( - GandivaTypes.FloatNode.newBuilder().setValue(val).build())); - inNode.setFloatValues(floatConstants.build()); - } else if (doubleValues != null) { - GandivaTypes.DoubleConstants.Builder doubleConstants = - GandivaTypes.DoubleConstants.newBuilder(); - doubleValues.stream() - .forEach( - val -> - doubleConstants.addDoubleValues( - GandivaTypes.DoubleNode.newBuilder().setValue(val).build())); - inNode.setDoubleValues(doubleConstants.build()); - } else if (decimalValues != null) { - GandivaTypes.DecimalConstants.Builder decimalConstants = - GandivaTypes.DecimalConstants.newBuilder(); - decimalValues.stream() - .forEach( - val -> - decimalConstants.addDecimalValues( - GandivaTypes.DecimalNode.newBuilder() - .setValue(val.toPlainString()) - .setPrecision(precision) - .setScale(scale) - .build())); - inNode.setDecimalValues(decimalConstants.build()); - } else if (stringValues != null) { - GandivaTypes.StringConstants.Builder stringConstants = - GandivaTypes.StringConstants.newBuilder(); - stringValues.stream() - .forEach( - val -> - stringConstants.addStringValues( - GandivaTypes.StringNode.newBuilder() - .setValue(ByteString.copyFrom(val.getBytes(charset))) - .build())); - inNode.setStringValues(stringConstants.build()); - } else if (binaryValues != null) { - GandivaTypes.BinaryConstants.Builder binaryConstants = - GandivaTypes.BinaryConstants.newBuilder(); - binaryValues.stream() - .forEach( - val -> - binaryConstants.addBinaryValues( - GandivaTypes.BinaryNode.newBuilder() - .setValue(ByteString.copyFrom(val)) - .build())); - inNode.setBinaryValues(binaryConstants.build()); - } - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setInNode(inNode.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IntNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IntNode.java deleted file mode 100644 index 26321b4dfd4e7..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/IntNode.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** - * Used to represent expression tree nodes representing int constants. Used in the expression (x + - * 5) - */ -class IntNode implements TreeNode { - private final Integer value; - - IntNode(Integer value) { - this.value = value; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.IntNode.Builder intBuilder = GandivaTypes.IntNode.newBuilder(); - intBuilder.setValue(value.intValue()); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setIntNode(intBuilder.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/LongNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/LongNode.java deleted file mode 100644 index 8a2c8812603f6..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/LongNode.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** - * Used to represent expression tree nodes representing long constants. Used in the expression (x + - * 5L) - */ -class LongNode implements TreeNode { - private final Long value; - - LongNode(Long value) { - this.value = value; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.LongNode.Builder longBuilder = GandivaTypes.LongNode.newBuilder(); - longBuilder.setValue(value.longValue()); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setLongNode(longBuilder.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/NullNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/NullNode.java deleted file mode 100644 index 110b367ff835b..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/NullNode.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** An expression indicating a null value. */ -class NullNode implements TreeNode { - private final ArrowType type; - - NullNode(ArrowType type) { - this.type = type; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.NullNode.Builder nullNode = GandivaTypes.NullNode.newBuilder(); - nullNode.setType(ArrowTypeHelper.arrowTypeToProtobuf(type)); - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setNullNode(nullNode.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/OrNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/OrNode.java deleted file mode 100644 index b8e273a5faf90..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/OrNode.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import java.util.List; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** Represents a logical OR Node. */ -class OrNode implements TreeNode { - private final List children; - - OrNode(List children) { - this.children = children; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.OrNode.Builder orNode = GandivaTypes.OrNode.newBuilder(); - - for (TreeNode arg : children) { - orNode.addArgs(arg.toProtobuf()); - } - - GandivaTypes.TreeNode.Builder builder = GandivaTypes.TreeNode.newBuilder(); - builder.setOrNode(orNode.build()); - return builder.build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/StringNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/StringNode.java deleted file mode 100644 index ad7ff9197aedf..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/StringNode.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import com.google.protobuf.ByteString; -import java.nio.charset.Charset; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** Used to represent expression tree nodes representing utf8 constants. */ -class StringNode implements TreeNode { - private static final Charset charset = Charset.forName("UTF-8"); - private final String value; - - public StringNode(String value) { - this.value = value; - } - - @Override - public GandivaTypes.TreeNode toProtobuf() throws GandivaException { - GandivaTypes.StringNode stringNode = - GandivaTypes.StringNode.newBuilder() - .setValue(ByteString.copyFrom(value.getBytes(charset))) - .build(); - - return GandivaTypes.TreeNode.newBuilder().setStringNode(stringNode).build(); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java deleted file mode 100644 index 96838e5ad1533..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; - -/** Contains helper functions for constructing expression trees. */ -public class TreeBuilder { - private TreeBuilder() {} - - /** Helper functions to create literal constants. */ - public static TreeNode makeLiteral(Boolean booleanConstant) { - return new BooleanNode(booleanConstant); - } - - public static TreeNode makeLiteral(Float floatConstant) { - return new FloatNode(floatConstant); - } - - public static TreeNode makeLiteral(Double doubleConstant) { - return new DoubleNode(doubleConstant); - } - - public static TreeNode makeLiteral(Integer integerConstant) { - return new IntNode(integerConstant); - } - - public static TreeNode makeLiteral(Long longConstant) { - return new LongNode(longConstant); - } - - public static TreeNode makeStringLiteral(String stringConstant) { - return new StringNode(stringConstant); - } - - public static TreeNode makeBinaryLiteral(byte[] binaryConstant) { - return new BinaryNode(binaryConstant); - } - - public static TreeNode makeDecimalLiteral(String decimalConstant, int precision, int scale) { - return new DecimalNode(decimalConstant, precision, scale); - } - - /** create a null literal. */ - public static TreeNode makeNull(ArrowType type) { - return new NullNode(type); - } - - /** - * Invoke this function to create a node representing a field, e.g. a column name. - * - * @param field represents the input argument - includes the name and type of the field - * @return Node representing a field - */ - public static TreeNode makeField(Field field) { - return new FieldNode(field); - } - - /** - * Invoke this function to create a node representing a function. - * - * @param function Name of the function, e.g. add - * @param children The arguments to the function - * @param retType The type of the return value of the operator - * @return Node representing a function - */ - public static TreeNode makeFunction(String function, List children, ArrowType retType) { - return new FunctionNode(function, children, retType); - } - - /** - * Invoke this function to create a node representing an if-clause. - * - * @param condition Node representing the condition - * @param thenNode Node representing the if-block - * @param elseNode Node representing the else-block - * @param retType Return type of the node - * @return Node representing an if-clause - */ - public static TreeNode makeIf( - TreeNode condition, TreeNode thenNode, TreeNode elseNode, ArrowType retType) { - return new IfNode(condition, thenNode, elseNode, retType); - } - - /** - * Invoke this function to create a node representing an and-clause. - * - * @param nodes Nodes in the 'and' clause. - * @return Node representing an and-clause - */ - public static TreeNode makeAnd(List nodes) { - return new AndNode(nodes); - } - - /** - * Invoke this function to create a node representing an or-clause. - * - * @param nodes Nodes in the 'or' clause. - * @return Node representing an or-clause - */ - public static TreeNode makeOr(List nodes) { - return new OrNode(nodes); - } - - /** - * Invoke this function to create an expression tree. - * - * @param root is returned by a call to MakeField, MakeFunction, or MakeIf - * @param resultField represents the return value of the expression - * @return ExpressionTree referring to the root of an expression tree - */ - public static ExpressionTree makeExpression(TreeNode root, Field resultField) { - return new ExpressionTree(root, resultField); - } - - /** - * Short cut to create an expression tree involving a single function, e.g. a+b+c. - * - * @param function Name of the function, e.g. add() - * @param inFields In arguments to the function - * @param resultField represents the return value of the expression - * @return ExpressionTree referring to the root of an expression tree - */ - public static ExpressionTree makeExpression( - String function, List inFields, Field resultField) { - List children = new ArrayList(inFields.size()); - for (Field field : inFields) { - children.add(makeField(field)); - } - - TreeNode root = makeFunction(function, children, resultField.getType()); - return makeExpression(root, resultField); - } - - /** - * Invoke this function to create a condition. - * - * @param root is returned by a call to MakeField, MakeFunction, MakeIf, .. - * @return condition referring to the root of an expression tree - */ - public static Condition makeCondition(TreeNode root) { - return new Condition(root); - } - - /** - * Short cut to create an expression tree involving a single function, e.g. a+b+c. - * - * @param function Name of the function, e.g. add() - * @param inFields In arguments to the function - * @return condition referring to the root of an expression tree - */ - public static Condition makeCondition(String function, List inFields) { - List children = new ArrayList<>(inFields.size()); - for (Field field : inFields) { - children.add(makeField(field)); - } - - TreeNode root = makeFunction(function, children, new ArrowType.Bool()); - return makeCondition(root); - } - - public static TreeNode makeInExpressionInt32(TreeNode resultNode, Set intValues) { - return InNode.makeIntInExpr(resultNode, intValues); - } - - public static TreeNode makeInExpressionBigInt(TreeNode resultNode, Set longValues) { - return InNode.makeLongInExpr(resultNode, longValues); - } - - public static TreeNode makeInExpressionDecimal( - TreeNode resultNode, Set decimalValues, Integer precision, Integer scale) { - return InNode.makeDecimalInExpr(resultNode, decimalValues, precision, scale); - } - - public static TreeNode makeInExpressionFloat(TreeNode resultNode, Set floatValues) { - return InNode.makeFloatInExpr(resultNode, floatValues); - } - - public static TreeNode makeInExpressionDouble(TreeNode resultNode, Set doubleValues) { - return InNode.makeDoubleInExpr(resultNode, doubleValues); - } - - public static TreeNode makeInExpressionString(TreeNode resultNode, Set stringValues) { - return InNode.makeStringInExpr(resultNode, stringValues); - } - - public static TreeNode makeInExpressionBinary(TreeNode resultNode, Set binaryValues) { - return InNode.makeBinaryInExpr(resultNode, binaryValues); - } -} diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeNode.java deleted file mode 100644 index b79ec6df2153c..0000000000000 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeNode.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; - -/** Defines an internal node in the expression tree. */ -public interface TreeNode { - /** - * Converts a TreeNode into a protobuf. - * - * @return A treenode protobuf - * @throws GandivaException in case the TreeNode cannot be processed - */ - GandivaTypes.TreeNode toProtobuf() throws GandivaException; -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java deleted file mode 100644 index 5f585e671f676..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import java.math.BigDecimal; -import java.time.Instant; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.expression.Condition; -import org.apache.arrow.gandiva.expression.ExpressionTree; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; - -class BaseEvaluatorTest { - - interface BaseEvaluator { - - void evaluate(ArrowRecordBatch recordBatch, BufferAllocator allocator) throws GandivaException; - - long getElapsedMillis(); - } - - class ProjectEvaluator implements BaseEvaluator { - - private Projector projector; - private DataAndVectorGenerator generator; - private int numExprs; - private int maxRowsInBatch; - private long elapsedTime = 0; - private List outputVectors = new ArrayList<>(); - - public ProjectEvaluator( - Projector projector, DataAndVectorGenerator generator, int numExprs, int maxRowsInBatch) { - this.projector = projector; - this.generator = generator; - this.numExprs = numExprs; - this.maxRowsInBatch = maxRowsInBatch; - } - - @Override - public void evaluate(ArrowRecordBatch recordBatch, BufferAllocator allocator) - throws GandivaException { - // set up output vectors - // for each expression, generate the output vector - for (int i = 0; i < numExprs; i++) { - ValueVector valueVector = generator.generateOutputVector(maxRowsInBatch); - outputVectors.add(valueVector); - } - - try { - long start = System.nanoTime(); - projector.evaluate(recordBatch, outputVectors); - long finish = System.nanoTime(); - elapsedTime += (finish - start); - } finally { - for (ValueVector valueVector : outputVectors) { - valueVector.close(); - } - } - outputVectors.clear(); - } - - @Override - public long getElapsedMillis() { - return TimeUnit.NANOSECONDS.toMillis(elapsedTime); - } - } - - class FilterEvaluator implements BaseEvaluator { - - private Filter filter; - private long elapsedTime = 0; - - public FilterEvaluator(Filter filter) { - this.filter = filter; - } - - @Override - public void evaluate(ArrowRecordBatch recordBatch, BufferAllocator allocator) - throws GandivaException { - ArrowBuf selectionBuffer = allocator.buffer(recordBatch.getLength() * 2); - SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer); - - try { - long start = System.nanoTime(); - filter.evaluate(recordBatch, selectionVector); - long finish = System.nanoTime(); - elapsedTime += (finish - start); - } finally { - selectionBuffer.close(); - } - } - - @Override - public long getElapsedMillis() { - return TimeUnit.NANOSECONDS.toMillis(elapsedTime); - } - } - - interface DataAndVectorGenerator { - - void writeData(ArrowBuf buffer); - - ValueVector generateOutputVector(int numRowsInBatch); - } - - class Int32DataAndVectorGenerator implements DataAndVectorGenerator { - - protected final BufferAllocator allocator; - protected final Random rand; - - Int32DataAndVectorGenerator(BufferAllocator allocator) { - this.allocator = allocator; - this.rand = new Random(); - } - - @Override - public void writeData(ArrowBuf buffer) { - buffer.writeInt(rand.nextInt()); - } - - @Override - public ValueVector generateOutputVector(int numRowsInBatch) { - IntVector intVector = new IntVector(BaseEvaluatorTest.EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRowsInBatch); - return intVector; - } - } - - class BoundedInt32DataAndVectorGenerator extends Int32DataAndVectorGenerator { - - private final int upperBound; - - BoundedInt32DataAndVectorGenerator(BufferAllocator allocator, int upperBound) { - super(allocator); - this.upperBound = upperBound; - } - - @Override - public void writeData(ArrowBuf buffer) { - buffer.writeInt(rand.nextInt(upperBound)); - } - } - - protected static final int THOUSAND = 1000; - protected static final int MILLION = THOUSAND * THOUSAND; - - protected static final String EMPTY_SCHEMA_PATH = ""; - - protected BufferAllocator allocator; - protected ArrowType boolType; - protected ArrowType int8; - protected ArrowType int32; - protected ArrowType int64; - protected ArrowType float64; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - boolType = new ArrowType.Bool(); - int8 = new ArrowType.Int(8, true); - int32 = new ArrowType.Int(32, true); - int64 = new ArrowType.Int(64, true); - float64 = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - ArrowBuf buf(int length) { - ArrowBuf buffer = allocator.buffer(length); - return buffer; - } - - ArrowBuf buf(byte[] bytes) { - ArrowBuf buffer = allocator.buffer(bytes.length); - buffer.writeBytes(bytes); - return buffer; - } - - ArrowBuf arrowBufWithAllValid(int size) { - int bufLen = (size + 7) / 8; - ArrowBuf buffer = allocator.buffer(bufLen); - for (int i = 0; i < bufLen; i++) { - buffer.writeByte(255); - } - - return buffer; - } - - ArrowBuf intBuf(int[] ints) { - ArrowBuf buffer = allocator.buffer(ints.length * 4); - for (int i = 0; i < ints.length; i++) { - buffer.writeInt(ints[i]); - } - return buffer; - } - - DecimalVector decimalVector(String[] values, int precision, int scale) { - DecimalVector vector = - new DecimalVector("decimal" + Math.random(), allocator, precision, scale); - vector.allocateNew(); - for (int i = 0; i < values.length; i++) { - BigDecimal decimal = new BigDecimal(values[i]).setScale(scale); - vector.setSafe(i, decimal); - } - - vector.setValueCount(values.length); - return vector; - } - - Set decimalSet(String[] values, Integer scale) { - Set decimalSet = new HashSet<>(); - for (int i = 0; i < values.length; i++) { - decimalSet.add(new BigDecimal(values[i]).setScale(scale)); - } - - return decimalSet; - } - - VarCharVector varcharVector(String[] values) { - VarCharVector vector = new VarCharVector("VarCharVector" + Math.random(), allocator); - vector.allocateNew(); - for (int i = 0; i < values.length; i++) { - vector.setSafe(i, values[i].getBytes(), 0, values[i].length()); - } - - vector.setValueCount(values.length); - return vector; - } - - ArrowBuf longBuf(long[] longs) { - ArrowBuf buffer = allocator.buffer(longs.length * 8); - for (int i = 0; i < longs.length; i++) { - buffer.writeLong(longs[i]); - } - return buffer; - } - - ArrowBuf doubleBuf(double[] data) { - ArrowBuf buffer = allocator.buffer(data.length * 8); - for (int i = 0; i < data.length; i++) { - buffer.writeDouble(data[i]); - } - - return buffer; - } - - ArrowBuf stringToMillis(String[] dates) { - ArrowBuf buffer = allocator.buffer(dates.length * 8); - for (int i = 0; i < dates.length; i++) { - Instant instant = Instant.parse(dates[i]); - buffer.writeLong(instant.toEpochMilli()); - } - - return buffer; - } - - ArrowBuf stringToDayInterval(String[] values) { - ArrowBuf buffer = allocator.buffer(values.length * 8); - for (int i = 0; i < values.length; i++) { - buffer.writeInt(Integer.parseInt(values[i].split(" ")[0])); // days - buffer.writeInt(Integer.parseInt(values[i].split(" ")[1])); // millis - } - return buffer; - } - - void releaseRecordBatch(ArrowRecordBatch recordBatch) { - // There are 2 references to the buffers - // One in the recordBatch - release that by calling close() - // One in the allocator - release that explicitly - List buffers = recordBatch.getBuffers(); - recordBatch.close(); - for (ArrowBuf buf : buffers) { - buf.getReferenceManager().release(); - } - } - - void releaseValueVectors(List valueVectors) { - for (ValueVector valueVector : valueVectors) { - valueVector.close(); - } - } - - void generateData(DataAndVectorGenerator generator, int numRecords, ArrowBuf buffer) { - for (int i = 0; i < numRecords; i++) { - generator.writeData(buffer); - } - } - - private void generateDataAndEvaluate( - DataAndVectorGenerator generator, - BaseEvaluator evaluator, - int numFields, - int numRows, - int maxRowsInBatch, - int inputFieldSize) - throws GandivaException, Exception { - int numRemaining = numRows; - List inputData = new ArrayList(); - List fieldNodes = new ArrayList(); - - // set the bitmap - while (numRemaining > 0) { - int numRowsInBatch = maxRowsInBatch; - if (numRowsInBatch > numRemaining) { - numRowsInBatch = numRemaining; - } - - // generate data - for (int i = 0; i < numFields; i++) { - ArrowBuf buf = allocator.buffer(numRowsInBatch * inputFieldSize); - ArrowBuf validity = arrowBufWithAllValid(maxRowsInBatch); - generateData(generator, numRowsInBatch, buf); - - fieldNodes.add(new ArrowFieldNode(numRowsInBatch, 0)); - inputData.add(validity); - inputData.add(buf); - } - - // create record batch - ArrowRecordBatch recordBatch = new ArrowRecordBatch(numRowsInBatch, fieldNodes, inputData); - - evaluator.evaluate(recordBatch, allocator); - - // fix numRemaining - numRemaining -= numRowsInBatch; - - // release refs - releaseRecordBatch(recordBatch); - - inputData.clear(); - fieldNodes.clear(); - } - } - - long timedProject( - DataAndVectorGenerator generator, - Schema schema, - List exprs, - int numRows, - int maxRowsInBatch, - int inputFieldSize) - throws GandivaException, Exception { - Projector projector = Projector.make(schema, exprs); - try { - ProjectEvaluator evaluator = - new ProjectEvaluator(projector, generator, exprs.size(), maxRowsInBatch); - generateDataAndEvaluate( - generator, evaluator, schema.getFields().size(), numRows, maxRowsInBatch, inputFieldSize); - return evaluator.getElapsedMillis(); - } finally { - projector.close(); - } - } - - long timedFilter( - DataAndVectorGenerator generator, - Schema schema, - Condition condition, - int numRows, - int maxRowsInBatch, - int inputFieldSize) - throws GandivaException, Exception { - - Filter filter = Filter.make(schema, condition); - try { - FilterEvaluator evaluator = new FilterEvaluator(filter); - generateDataAndEvaluate( - generator, evaluator, schema.getFields().size(), numRows, maxRowsInBatch, inputFieldSize); - return evaluator.getElapsedMillis(); - } finally { - filter.close(); - } - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java deleted file mode 100644 index b5a7af23bebb7..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtilTest.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.jupiter.api.Test; - -public class DecimalTypeUtilTest { - - @Test - public void testOutputTypesForAdd() { - ArrowType.Decimal operand1 = getDecimal(30, 10); - ArrowType.Decimal operand2 = getDecimal(30, 10); - ArrowType.Decimal resultType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.ADD, operand1, operand2); - assertTrue(getDecimal(31, 10).equals(resultType)); - - operand1 = getDecimal(30, 6); - operand2 = getDecimal(30, 5); - resultType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.ADD, operand1, operand2); - assertTrue(getDecimal(32, 6).equals(resultType)); - - operand1 = getDecimal(30, 10); - operand2 = getDecimal(38, 10); - resultType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.ADD, operand1, operand2); - assertTrue(getDecimal(38, 9).equals(resultType)); - - operand1 = getDecimal(38, 10); - operand2 = getDecimal(38, 38); - resultType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.ADD, operand1, operand2); - assertTrue(getDecimal(38, 9).equals(resultType)); - - operand1 = getDecimal(38, 10); - operand2 = getDecimal(38, 2); - resultType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.ADD, operand1, operand2); - assertTrue(getDecimal(38, 6).equals(resultType)); - } - - @Test - public void testOutputTypesForMultiply() { - ArrowType.Decimal operand1 = getDecimal(30, 10); - ArrowType.Decimal operand2 = getDecimal(30, 10); - ArrowType.Decimal resultType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.MULTIPLY, operand1, operand2); - assertTrue(getDecimal(38, 6).equals(resultType)); - - operand1 = getDecimal(38, 10); - operand2 = getDecimal(9, 2); - resultType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.MULTIPLY, operand1, operand2); - assertTrue(getDecimal(38, 6).equals(resultType)); - } - - @Test - public void testOutputTypesForMod() { - ArrowType.Decimal operand1 = getDecimal(30, 10); - ArrowType.Decimal operand2 = getDecimal(28, 7); - ArrowType.Decimal resultType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.MOD, operand1, operand2); - assertTrue(getDecimal(30, 10).equals(resultType)); - } - - private ArrowType.Decimal getDecimal(int precision, int scale) { - return new ArrowType.Decimal(precision, scale, 128); - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java deleted file mode 100644 index 58279b15c3b54..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.common.collect.Lists; -import java.util.Set; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.jupiter.api.Test; - -public class ExpressionRegistryTest { - - @Test - public void testTypes() throws GandivaException { - Set types = ExpressionRegistry.getInstance().getSupportedTypes(); - ArrowType.Int uint8 = new ArrowType.Int(8, false); - assertTrue(types.contains(uint8)); - } - - @Test - public void testFunctions() throws GandivaException { - ArrowType.Int uint8 = new ArrowType.Int(8, false); - FunctionSignature signature = - new FunctionSignature("add", uint8, Lists.newArrayList(uint8, uint8)); - Set functions = ExpressionRegistry.getInstance().getSupportedFunctions(); - assertTrue(functions.contains(signature)); - } - - @Test - public void testFunctionAliases() throws GandivaException { - ArrowType.Int int64 = new ArrowType.Int(64, true); - FunctionSignature signature = - new FunctionSignature("modulo", int64, Lists.newArrayList(int64, int64)); - Set functions = ExpressionRegistry.getInstance().getSupportedFunctions(); - assertTrue(functions.contains(signature)); - } - - @Test - public void testCaseInsensitiveFunctionName() throws GandivaException { - ArrowType.Utf8 utf8 = new ArrowType.Utf8(); - ArrowType.Int int64 = new ArrowType.Int(64, true); - FunctionSignature signature = - new FunctionSignature("castvarchar", utf8, Lists.newArrayList(utf8, int64)); - Set functions = ExpressionRegistry.getInstance().getSupportedFunctions(); - assertTrue(functions.contains(signature)); - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java deleted file mode 100644 index 75169a37a95d7..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; - -import com.google.common.collect.Lists; -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.expression.Condition; -import org.apache.arrow.gandiva.expression.ExpressionTree; -import org.apache.arrow.gandiva.expression.TreeBuilder; -import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class FilterProjectTest extends BaseEvaluatorTest { - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testSimpleSV16() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - Field c = Field.nullable("c", int32); - List args = Lists.newArrayList(a, b); - - Condition condition = TreeBuilder.makeCondition("less_than", args); - - Schema schema = new Schema(args); - Filter filter = Filter.make(schema, condition); - - ExpressionTree expression = TreeBuilder.makeExpression("add", Lists.newArrayList(a, b), c); - Projector projector = - Projector.make(schema, Lists.newArrayList(expression), SelectionVectorType.SV_INT16); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] bValues = new int[] {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 14, 15}; - int[] expected = {3, 7, 11, 15}; - - verifyTestCaseFor16(filter, projector, numRows, validity, aValues, bValues, expected); - } - - private void verifyTestCaseFor16( - Filter filter, - Projector projector, - int numRows, - byte[] validity, - int[] aValues, - int[] bValues, - int[] expected) - throws GandivaException { - ArrowBuf validitya = buf(validity); - ArrowBuf valuesa = intBuf(aValues); - ArrowBuf validityb = buf(validity); - ArrowBuf valuesb = intBuf(bValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(validitya, valuesa, validityb, valuesb)); - - ArrowBuf selectionBuffer = buf(numRows * 2); - SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer); - - filter.evaluate(batch, selectionVector); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(selectionVector.getRecordCount()); - - List output = new ArrayList(); - output.add(intVector); - projector.evaluate(batch, selectionVector, output); - for (int i = 0; i < selectionVector.getRecordCount(); i++) { - assertFalse(intVector.isNull(i)); - assertEquals(expected[i], intVector.get(i)); - } - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - selectionBuffer.close(); - filter.close(); - projector.close(); - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java deleted file mode 100644 index a98a7cb6b5466..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java +++ /dev/null @@ -1,337 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.List; -import java.util.stream.IntStream; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.expression.Condition; -import org.apache.arrow.gandiva.expression.TreeBuilder; -import org.apache.arrow.gandiva.expression.TreeNode; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class FilterTest extends BaseEvaluatorTest { - - private int[] selectionVectorToArray(SelectionVector vector) { - int[] actual = new int[vector.getRecordCount()]; - for (int i = 0; i < vector.getRecordCount(); ++i) { - actual[i] = vector.getIndex(i); - } - return actual; - } - - private Charset utf8Charset = Charset.forName("UTF-8"); - private Charset utf16Charset = Charset.forName("UTF-16"); - - List varBufs(String[] strings, Charset charset) { - ArrowBuf offsetsBuffer = allocator.buffer((strings.length + 1) * 4); - ArrowBuf dataBuffer = allocator.buffer(strings.length * 8); - - int startOffset = 0; - for (int i = 0; i < strings.length; i++) { - offsetsBuffer.writeInt(startOffset); - - final byte[] bytes = strings[i].getBytes(charset); - dataBuffer = dataBuffer.reallocIfNeeded(dataBuffer.writerIndex() + bytes.length); - dataBuffer.setBytes(startOffset, bytes, 0, bytes.length); - startOffset += bytes.length; - } - offsetsBuffer.writeInt(startOffset); // offset for the last element - - return Arrays.asList(offsetsBuffer, dataBuffer); - } - - List stringBufs(String[] strings) { - return varBufs(strings, utf8Charset); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testSimpleInString() throws GandivaException, Exception { - Field c1 = Field.nullable("c1", new ArrowType.Utf8()); - TreeNode l1 = TreeBuilder.makeLiteral(1L); - TreeNode l2 = TreeBuilder.makeLiteral(3L); - - List argsSchema = Lists.newArrayList(c1); - List args = Lists.newArrayList(TreeBuilder.makeField(c1), l1, l2); - TreeNode substr = TreeBuilder.makeFunction("substr", args, new ArrowType.Utf8()); - TreeNode inExpr = - TreeBuilder.makeInExpressionString(substr, Sets.newHashSet("one", "two", "thr", "fou")); - - Condition condition = TreeBuilder.makeCondition(inExpr); - - Schema schema = new Schema(argsSchema); - Filter filter = Filter.make(schema, condition); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - String[] c1Values = - new String[] { - "one", - "two", - "three", - "four", - "five", - "six", - "seven", - "eight", - "nine", - "ten", - "eleven", - "twelve", - "thirteen", - "fourteen", - "fifteen", - "sixteen" - }; - int[] expected = {0, 1, 2, 3}; - ArrowBuf c1Validity = buf(validity); - ArrowBuf c2Validity = buf(validity); - List dataBufsX = stringBufs(c1Values); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode), - Lists.newArrayList(c1Validity, dataBufsX.get(0), dataBufsX.get(1), c2Validity)); - - ArrowBuf selectionBuffer = buf(numRows * 2); - SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer); - - filter.evaluate(batch, selectionVector); - - int[] actual = selectionVectorToArray(selectionVector); - releaseRecordBatch(batch); - selectionBuffer.close(); - filter.close(); - assertArrayEquals(expected, actual); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testSimpleInInt() throws GandivaException, Exception { - Field c1 = Field.nullable("c1", int32); - - List argsSchema = Lists.newArrayList(c1); - TreeNode inExpr = - TreeBuilder.makeInExpressionInt32(TreeBuilder.makeField(c1), Sets.newHashSet(1, 2, 3, 4)); - - Condition condition = TreeBuilder.makeCondition(inExpr); - - Schema schema = new Schema(argsSchema); - Filter filter = Filter.make(schema, condition); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] expected = {0, 1, 2, 3}; - - ArrowBuf validitya = buf(validity); - ArrowBuf validityb = buf(validity); - ArrowBuf valuesa = intBuf(aValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode), - Lists.newArrayList(validitya, valuesa, validityb)); - - ArrowBuf selectionBuffer = buf(numRows * 2); - SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer); - - filter.evaluate(batch, selectionVector); - - // free buffers - int[] actual = selectionVectorToArray(selectionVector); - releaseRecordBatch(batch); - selectionBuffer.close(); - filter.close(); - assertArrayEquals(expected, actual); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testSimpleSV16() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Condition condition = TreeBuilder.makeCondition("less_than", args); - - Schema schema = new Schema(args); - Filter filter = Filter.make(schema, condition); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] bValues = new int[] {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 14, 15}; - int[] expected = {0, 2, 4, 6}; - - verifyTestCase(filter, numRows, validity, aValues, bValues, expected); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testSimpleSV16_AllMatched() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Condition condition = TreeBuilder.makeCondition("less_than", args); - - Schema schema = new Schema(args); - Filter filter = Filter.make(schema, condition); - - int numRows = 32; - - byte[] validity = new byte[numRows / 8]; - - IntStream.range(0, numRows / 8).forEach(i -> validity[i] = (byte) 255); - - int[] aValues = new int[numRows]; - IntStream.range(0, numRows).forEach(i -> aValues[i] = i); - - int[] bValues = new int[numRows]; - IntStream.range(0, numRows).forEach(i -> bValues[i] = i + 1); - - int[] expected = new int[numRows]; - IntStream.range(0, numRows).forEach(i -> expected[i] = i); - - verifyTestCase(filter, numRows, validity, aValues, bValues, expected); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testSimpleSV16_GreaterThan64Recs() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Condition condition = TreeBuilder.makeCondition("greater_than", args); - - Schema schema = new Schema(args); - Filter filter = Filter.make(schema, condition); - - int numRows = 1000; - - byte[] validity = new byte[numRows / 8]; - - IntStream.range(0, numRows / 8).forEach(i -> validity[i] = (byte) 255); - - int[] aValues = new int[numRows]; - IntStream.range(0, numRows).forEach(i -> aValues[i] = i); - - int[] bValues = new int[numRows]; - IntStream.range(0, numRows).forEach(i -> bValues[i] = i + 1); - - aValues[0] = 5; - bValues[0] = 0; - - int[] expected = {0}; - - verifyTestCase(filter, numRows, validity, aValues, bValues, expected); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testSimpleSV32() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Condition condition = TreeBuilder.makeCondition("less_than", args); - - Schema schema = new Schema(args); - Filter filter = Filter.make(schema, condition); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] bValues = new int[] {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 14, 15}; - int[] expected = {0, 2, 4, 6}; - - verifyTestCase(filter, numRows, validity, aValues, bValues, expected); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testSimpleFilterWithNoOptimisation() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Condition condition = TreeBuilder.makeCondition("less_than", args); - - Schema schema = new Schema(args); - Filter filter = Filter.make(schema, condition, false); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] bValues = new int[] {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 14, 15}; - int[] expected = {0, 2, 4, 6}; - - verifyTestCase(filter, numRows, validity, aValues, bValues, expected); - } - - private void verifyTestCase( - Filter filter, int numRows, byte[] validity, int[] aValues, int[] bValues, int[] expected) - throws GandivaException { - ArrowBuf validitya = buf(validity); - ArrowBuf valuesa = intBuf(aValues); - ArrowBuf validityb = buf(validity); - ArrowBuf valuesb = intBuf(bValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(validitya, valuesa, validityb, valuesb)); - - ArrowBuf selectionBuffer = buf(numRows * 2); - SelectionVectorInt16 selectionVector = new SelectionVectorInt16(selectionBuffer); - - filter.evaluate(batch, selectionVector); - - // free buffers - int[] actual = selectionVectorToArray(selectionVector); - releaseRecordBatch(batch); - selectionBuffer.close(); - filter.close(); - - assertArrayEquals(expected, actual); - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java deleted file mode 100644 index 65409938a9142..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/MicroBenchmarkTest.java +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.common.collect.Lists; -import java.util.List; -import org.apache.arrow.gandiva.expression.Condition; -import org.apache.arrow.gandiva.expression.ExpressionTree; -import org.apache.arrow.gandiva.expression.TreeBuilder; -import org.apache.arrow.gandiva.expression.TreeNode; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -@Disabled -public class MicroBenchmarkTest extends BaseEvaluatorTest { - - private double toleranceRatio = 4.0; - - @Test - public void testAdd3() throws Exception { - Field x = Field.nullable("x", int32); - Field n2x = Field.nullable("n2x", int32); - Field n3x = Field.nullable("n3x", int32); - - // x + n2x + n3x - TreeNode add1 = - TreeBuilder.makeFunction( - "add", Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeField(n2x)), int32); - TreeNode add = - TreeBuilder.makeFunction( - "add", Lists.newArrayList(add1, TreeBuilder.makeField(n3x)), int32); - ExpressionTree expr = TreeBuilder.makeExpression(add, x); - - List cols = Lists.newArrayList(x, n2x, n3x); - Schema schema = new Schema(cols); - - long timeTaken = - timedProject( - new Int32DataAndVectorGenerator(allocator), - schema, - Lists.newArrayList(expr), - 1 * MILLION, - 16 * THOUSAND, - 4); - System.out.println("Time taken for projecting 1m records of add3 is " + timeTaken + "ms"); - assertTrue(timeTaken <= 13 * toleranceRatio); - } - - @Test - public void testIf() throws Exception { - /* - * when x < 10 then 0 - * when x < 20 then 1 - * when x < 30 then 2 - * when x < 40 then 3 - * when x < 50 then 4 - * when x < 60 then 5 - * when x < 70 then 6 - * when x < 80 then 7 - * when x < 90 then 8 - * when x < 100 then 9 - * when x < 110 then 10 - * when x < 120 then 11 - * when x < 130 then 12 - * when x < 140 then 13 - * when x < 150 then 14 - * when x < 160 then 15 - * when x < 170 then 16 - * when x < 180 then 17 - * when x < 190 then 18 - * when x < 200 then 19 - * else 20 - */ - Field x = Field.nullable("x", int32); - TreeNode xNode = TreeBuilder.makeField(x); - - // if (x < 100) then 9 else 10 - int returnValue = 20; - TreeNode topNode = TreeBuilder.makeLiteral(returnValue); - int compareWith = 200; - while (compareWith >= 10) { - // cond (x < compareWith) - TreeNode condNode = - TreeBuilder.makeFunction( - "less_than", - Lists.newArrayList(xNode, TreeBuilder.makeLiteral(compareWith)), - boolType); - topNode = - TreeBuilder.makeIf( - condNode, // cond (x < compareWith) - TreeBuilder.makeLiteral(returnValue), // then returnValue - topNode, // else topNode - int32); - compareWith -= 10; - returnValue--; - } - - ExpressionTree expr = TreeBuilder.makeExpression(topNode, x); - Schema schema = new Schema(Lists.newArrayList(x)); - - long timeTaken = - timedProject( - new BoundedInt32DataAndVectorGenerator(allocator, 250), - schema, - Lists.newArrayList(expr), - 1 * MILLION, - 16 * THOUSAND, - 4); - System.out.println("Time taken for projecting 10m records of nestedIf is " + timeTaken + "ms"); - assertTrue(timeTaken <= 15 * toleranceRatio); - } - - @Test - public void testFilterAdd2() throws Exception { - Field x = Field.nullable("x", int32); - Field n2x = Field.nullable("n2x", int32); - Field n3x = Field.nullable("n3x", int32); - - // x + n2x < n3x - TreeNode add = - TreeBuilder.makeFunction( - "add", Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeField(n2x)), int32); - TreeNode lessThan = - TreeBuilder.makeFunction( - "less_than", Lists.newArrayList(add, TreeBuilder.makeField(n3x)), boolType); - Condition condition = TreeBuilder.makeCondition(lessThan); - - List cols = Lists.newArrayList(x, n2x, n3x); - Schema schema = new Schema(cols); - - long timeTaken = - timedFilter( - new Int32DataAndVectorGenerator(allocator), - schema, - condition, - 1 * MILLION, - 16 * THOUSAND, - 4); - System.out.println("Time taken for filtering 10m records of a+b args = Lists.newArrayList(a, b); - - ArrowType.Decimal outputType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.ADD, decimal, decimal); - Field retType = Field.nullable("c", outputType); - ExpressionTree root = TreeBuilder.makeExpression("add", args, retType); - - List exprs = Lists.newArrayList(root); - - Schema schema = new Schema(args); - Projector eval = Projector.make(schema, exprs); - - int numRows = 4; - byte[] validity = new byte[] {(byte) 255}; - String[] aValues = new String[] {"1.12345678", "2.12345678", "3.12345678", "4.12345678"}; - String[] bValues = new String[] {"2.12345678", "3.12345678", "4.12345678", "5.12345678"}; - - DecimalVector valuesa = decimalVector(aValues, precision, scale); - DecimalVector valuesb = decimalVector(bValues, precision, scale); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), - Lists.newArrayList( - valuesa.getValidityBuffer(), - valuesa.getDataBuffer(), - valuesb.getValidityBuffer(), - valuesb.getDataBuffer())); - - DecimalVector outVector = - new DecimalVector( - "decimal_output", allocator, outputType.getPrecision(), outputType.getScale()); - outVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(outVector); - eval.evaluate(batch, output); - - // should have scaled down. - BigDecimal[] expOutput = - new BigDecimal[] { - BigDecimal.valueOf(3.2469136), - BigDecimal.valueOf(5.2469136), - BigDecimal.valueOf(7.2469136), - BigDecimal.valueOf(9.2469136) - }; - - for (int i = 0; i < 4; i++) { - assertFalse(outVector.isNull(i)); - assertTrue( - expOutput[i].compareTo(outVector.getObject(i)) == 0, "index : " + i + " failed compare"); - } - - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void test_add_literal() throws GandivaException { - int precision = 2; - int scale = 0; - ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale, 128); - ArrowType.Decimal literalType = new ArrowType.Decimal(2, 1, 128); - Field a = Field.nullable("a", decimal); - - ArrowType.Decimal outputType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.ADD, decimal, literalType); - Field retType = Field.nullable("c", outputType); - TreeNode field = TreeBuilder.makeField(a); - TreeNode literal = TreeBuilder.makeDecimalLiteral("6", 2, 1); - List args = Lists.newArrayList(field, literal); - TreeNode root = TreeBuilder.makeFunction("add", args, outputType); - ExpressionTree tree = TreeBuilder.makeExpression(root, retType); - - List exprs = Lists.newArrayList(tree); - - Schema schema = new Schema(Lists.newArrayList(a)); - Projector eval = Projector.make(schema, exprs); - - int numRows = 4; - String[] aValues = new String[] {"1", "2", "3", "4"}; - - DecimalVector valuesa = decimalVector(aValues, precision, scale); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer())); - - DecimalVector outVector = - new DecimalVector( - "decimal_output", allocator, outputType.getPrecision(), outputType.getScale()); - outVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(outVector); - eval.evaluate(batch, output); - - BigDecimal[] expOutput = - new BigDecimal[] { - BigDecimal.valueOf(1.6), - BigDecimal.valueOf(2.6), - BigDecimal.valueOf(3.6), - BigDecimal.valueOf(4.6) - }; - - for (int i = 0; i < 4; i++) { - assertFalse(outVector.isNull(i)); - assertTrue(expOutput[i].compareTo(outVector.getObject(i)) == 0); - } - - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void test_multiply() throws GandivaException { - int precision = 38; - int scale = 8; - ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale, 128); - Field a = Field.nullable("a", decimal); - Field b = Field.nullable("b", decimal); - List args = Lists.newArrayList(a, b); - - ArrowType.Decimal outputType = - DecimalTypeUtil.getResultTypeForOperation( - DecimalTypeUtil.OperationType.MULTIPLY, decimal, decimal); - Field retType = Field.nullable("c", outputType); - ExpressionTree root = TreeBuilder.makeExpression("multiply", args, retType); - - List exprs = Lists.newArrayList(root); - - Schema schema = new Schema(args); - Projector eval = Projector.make(schema, exprs); - - int numRows = 4; - byte[] validity = new byte[] {(byte) 255}; - String[] aValues = - new String[] {"1.12345678", "2.12345678", "3.12345678", "999999999999.99999999"}; - String[] bValues = - new String[] {"2.12345678", "3.12345678", "4.12345678", "999999999999.99999999"}; - - DecimalVector valuesa = decimalVector(aValues, precision, scale); - DecimalVector valuesb = decimalVector(bValues, precision, scale); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), - Lists.newArrayList( - valuesa.getValidityBuffer(), - valuesa.getDataBuffer(), - valuesb.getValidityBuffer(), - valuesb.getDataBuffer())); - - DecimalVector outVector = - new DecimalVector( - "decimal_output", allocator, outputType.getPrecision(), outputType.getScale()); - outVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(outVector); - eval.evaluate(batch, output); - - // should have scaled down. - BigDecimal[] expOutput = - new BigDecimal[] { - BigDecimal.valueOf(2.385612), - BigDecimal.valueOf(6.632525), - BigDecimal.valueOf(12.879439), - new BigDecimal("999999999999999999980000.000000") - }; - - for (int i = 0; i < 4; i++) { - assertFalse(outVector.isNull(i)); - assertTrue( - expOutput[i].compareTo(outVector.getObject(i)) == 0, "index : " + i + " failed compare"); - } - - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testCompare() throws GandivaException { - Decimal aType = new Decimal(38, 3, 128); - Decimal bType = new Decimal(38, 2, 128); - Field a = Field.nullable("a", aType); - Field b = Field.nullable("b", bType); - List args = Lists.newArrayList(a, b); - - List exprs = - new ArrayList<>( - Arrays.asList( - TreeBuilder.makeExpression("equal", args, Field.nullable("eq", boolType)), - TreeBuilder.makeExpression("not_equal", args, Field.nullable("ne", boolType)), - TreeBuilder.makeExpression("less_than", args, Field.nullable("lt", boolType)), - TreeBuilder.makeExpression( - "less_than_or_equal_to", args, Field.nullable("le", boolType)), - TreeBuilder.makeExpression("greater_than", args, Field.nullable("gt", boolType)), - TreeBuilder.makeExpression( - "greater_than_or_equal_to", args, Field.nullable("ge", boolType)))); - - Schema schema = new Schema(args); - Projector eval = Projector.make(schema, exprs); - - List output = null; - ArrowRecordBatch batch = null; - try { - int numRows = 4; - String[] aValues = new String[] {"7.620", "2.380", "3.860", "-18.160"}; - String[] bValues = new String[] {"7.62", "3.50", "1.90", "-1.45"}; - - DecimalVector valuesa = decimalVector(aValues, aType.getPrecision(), aType.getScale()); - DecimalVector valuesb = decimalVector(bValues, bType.getPrecision(), bType.getScale()); - batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), - Lists.newArrayList( - valuesa.getValidityBuffer(), - valuesa.getDataBuffer(), - valuesb.getValidityBuffer(), - valuesb.getDataBuffer())); - - // expected results. - boolean[][] expected = { - {true, false, false, false}, // eq - {false, true, true, true}, // ne - {false, true, false, true}, // lt - {true, true, false, true}, // le - {false, false, true, false}, // gt - {true, false, true, false}, // ge - }; - - // Allocate output vectors. - output = - new ArrayList<>( - Arrays.asList( - new BitVector("eq", allocator), - new BitVector("ne", allocator), - new BitVector("lt", allocator), - new BitVector("le", allocator), - new BitVector("gt", allocator), - new BitVector("ge", allocator))); - for (ValueVector v : output) { - v.allocateNew(); - } - - // evaluate expressions. - eval.evaluate(batch, output); - - // compare the outputs. - for (int idx = 0; idx < output.size(); ++idx) { - boolean[] expectedArray = expected[idx]; - BitVector resultVector = (BitVector) output.get(idx); - - for (int i = 0; i < numRows; i++) { - assertFalse(resultVector.isNull(i)); - assertEquals( - expectedArray[i], - resultVector.getObject(i).booleanValue(), - "mismatch in result for expr at idx " + idx + " for row " + i); - } - } - } finally { - // free buffers - if (batch != null) { - releaseRecordBatch(batch); - } - if (output != null) { - releaseValueVectors(output); - } - eval.close(); - } - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testRound() throws GandivaException { - Decimal aType = new Decimal(38, 2, 128); - Decimal aWithScaleZero = new Decimal(38, 0, 128); - Decimal aWithScaleOne = new Decimal(38, 1, 128); - Field a = Field.nullable("a", aType); - List args = Lists.newArrayList(a); - - List exprs = - new ArrayList<>( - Arrays.asList( - TreeBuilder.makeExpression("abs", args, Field.nullable("abs", aType)), - TreeBuilder.makeExpression("ceil", args, Field.nullable("ceil", aWithScaleZero)), - TreeBuilder.makeExpression("floor", args, Field.nullable("floor", aWithScaleZero)), - TreeBuilder.makeExpression("round", args, Field.nullable("round", aWithScaleZero)), - TreeBuilder.makeExpression( - "truncate", args, Field.nullable("truncate", aWithScaleZero)), - TreeBuilder.makeExpression( - TreeBuilder.makeFunction( - "round", - Lists.newArrayList(TreeBuilder.makeField(a), TreeBuilder.makeLiteral(1)), - aWithScaleOne), - Field.nullable("round_scale_1", aWithScaleOne)), - TreeBuilder.makeExpression( - TreeBuilder.makeFunction( - "truncate", - Lists.newArrayList(TreeBuilder.makeField(a), TreeBuilder.makeLiteral(1)), - aWithScaleOne), - Field.nullable("truncate_scale_1", aWithScaleOne)))); - - Schema schema = new Schema(args); - Projector eval = Projector.make(schema, exprs); - - List output = null; - ArrowRecordBatch batch = null; - try { - int numRows = 4; - String[] aValues = new String[] {"1.23", "1.58", "-1.23", "-1.58"}; - - DecimalVector valuesa = decimalVector(aValues, aType.getPrecision(), aType.getScale()); - batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer())); - - // expected results. - BigDecimal[][] expected = { - { - BigDecimal.valueOf(1.23), - BigDecimal.valueOf(1.58), - BigDecimal.valueOf(1.23), - BigDecimal.valueOf(1.58) - }, // abs - { - BigDecimal.valueOf(2), - BigDecimal.valueOf(2), - BigDecimal.valueOf(-1), - BigDecimal.valueOf(-1) - }, // ceil - { - BigDecimal.valueOf(1), - BigDecimal.valueOf(1), - BigDecimal.valueOf(-2), - BigDecimal.valueOf(-2) - }, // floor - { - BigDecimal.valueOf(1), - BigDecimal.valueOf(2), - BigDecimal.valueOf(-1), - BigDecimal.valueOf(-2) - }, // round - { - BigDecimal.valueOf(1), - BigDecimal.valueOf(1), - BigDecimal.valueOf(-1), - BigDecimal.valueOf(-1) - }, // truncate - { - BigDecimal.valueOf(1.2), - BigDecimal.valueOf(1.6), - BigDecimal.valueOf(-1.2), - BigDecimal.valueOf(-1.6) - }, // round-to-scale-1 - { - BigDecimal.valueOf(1.2), - BigDecimal.valueOf(1.5), - BigDecimal.valueOf(-1.2), - BigDecimal.valueOf(-1.5) - }, // truncate-to-scale-1 - }; - - // Allocate output vectors. - output = - new ArrayList<>( - Arrays.asList( - new DecimalVector("abs", allocator, aType.getPrecision(), aType.getScale()), - new DecimalVector("ceil", allocator, aType.getPrecision(), 0), - new DecimalVector("floor", allocator, aType.getPrecision(), 0), - new DecimalVector("round", allocator, aType.getPrecision(), 0), - new DecimalVector("truncate", allocator, aType.getPrecision(), 0), - new DecimalVector("round_to_scale_1", allocator, aType.getPrecision(), 1), - new DecimalVector("truncate_to_scale_1", allocator, aType.getPrecision(), 1))); - for (ValueVector v : output) { - v.allocateNew(); - } - - // evaluate expressions. - eval.evaluate(batch, output); - - // compare the outputs. - for (int idx = 0; idx < output.size(); ++idx) { - BigDecimal[] expectedArray = expected[idx]; - DecimalVector resultVector = (DecimalVector) output.get(idx); - - for (int i = 0; i < numRows; i++) { - assertFalse(resultVector.isNull(i)); - assertTrue( - expectedArray[i].compareTo(resultVector.getObject(i)) == 0, - "mismatch in result for " - + "field " - + resultVector.getField().getName() - + " for row " - + i - + " expected " - + expectedArray[i] - + ", got " - + resultVector.getObject(i)); - } - } - } finally { - // free buffers - if (batch != null) { - releaseRecordBatch(batch); - } - if (output != null) { - releaseValueVectors(output); - } - eval.close(); - } - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testCastToDecimal() throws GandivaException { - Decimal decimalType = new Decimal(38, 2, 128); - Decimal decimalWithScaleOne = new Decimal(38, 1, 128); - Field dec = Field.nullable("dec", decimalType); - Field int64f = Field.nullable("int64", int64); - Field doublef = Field.nullable("float64", float64); - - List exprs = - new ArrayList<>( - Arrays.asList( - TreeBuilder.makeExpression( - "castDECIMAL", - Lists.newArrayList(int64f), - Field.nullable("int64_to_dec", decimalType)), - TreeBuilder.makeExpression( - "castDECIMAL", - Lists.newArrayList(doublef), - Field.nullable("float64_to_dec", decimalType)), - TreeBuilder.makeExpression( - "castDECIMAL", - Lists.newArrayList(dec), - Field.nullable("dec_to_dec", decimalWithScaleOne)))); - - Schema schema = new Schema(Lists.newArrayList(int64f, doublef, dec)); - Projector eval = Projector.make(schema, exprs); - - List output = null; - ArrowRecordBatch batch = null; - try { - int numRows = 4; - String[] aValues = new String[] {"1.23", "1.58", "-1.23", "-1.58"}; - DecimalVector valuesa = - decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale()); - batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList( - new ArrowFieldNode(numRows, 0), - new ArrowFieldNode(numRows, 0), - new ArrowFieldNode(numRows, 0)), - Lists.newArrayList( - arrowBufWithAllValid(4), - longBuf(new long[] {123, 158, -123, -158}), - arrowBufWithAllValid(4), - doubleBuf(new double[] {1.23, 1.58, -1.23, -1.58}), - valuesa.getValidityBuffer(), - valuesa.getDataBuffer())); - - // Allocate output vectors. - output = - new ArrayList<>( - Arrays.asList( - new DecimalVector( - "int64_to_dec", - allocator, - decimalType.getPrecision(), - decimalType.getScale()), - new DecimalVector( - "float64_to_dec", - allocator, - decimalType.getPrecision(), - decimalType.getScale()), - new DecimalVector( - "dec_to_dec", - allocator, - decimalWithScaleOne.getPrecision(), - decimalWithScaleOne.getScale()))); - for (ValueVector v : output) { - v.allocateNew(); - } - - // evaluate expressions. - eval.evaluate(batch, output); - - // compare the outputs. - BigDecimal[][] expected = { - { - BigDecimal.valueOf(123), - BigDecimal.valueOf(158), - BigDecimal.valueOf(-123), - BigDecimal.valueOf(-158) - }, - { - BigDecimal.valueOf(1.23), - BigDecimal.valueOf(1.58), - BigDecimal.valueOf(-1.23), - BigDecimal.valueOf(-1.58) - }, - { - BigDecimal.valueOf(1.2), - BigDecimal.valueOf(1.6), - BigDecimal.valueOf(-1.2), - BigDecimal.valueOf(-1.6) - } - }; - for (int idx = 0; idx < output.size(); ++idx) { - BigDecimal[] expectedArray = expected[idx]; - DecimalVector resultVector = (DecimalVector) output.get(idx); - for (int i = 0; i < numRows; i++) { - assertFalse(resultVector.isNull(i)); - assertTrue( - expectedArray[i].compareTo(resultVector.getObject(i)) == 0, - "mismatch in result for " - + "field " - + resultVector.getField().getName() - + " for row " - + i - + " expected " - + expectedArray[i] - + ", got " - + resultVector.getObject(i)); - } - } - } finally { - // free buffers - if (batch != null) { - releaseRecordBatch(batch); - } - if (output != null) { - releaseValueVectors(output); - } - eval.close(); - } - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testCastToLong() throws GandivaException { - Decimal decimalType = new Decimal(38, 2, 128); - Field dec = Field.nullable("dec", decimalType); - - Schema schema = new Schema(Lists.newArrayList(dec)); - Projector eval = - Projector.make( - schema, - Lists.newArrayList( - TreeBuilder.makeExpression( - "castBIGINT", Lists.newArrayList(dec), Field.nullable("dec_to_int64", int64)))); - - List output = null; - ArrowRecordBatch batch = null; - try { - int numRows = 5; - String[] aValues = new String[] {"1.23", "1.50", "98765.78", "-1.23", "-1.58"}; - DecimalVector valuesa = - decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale()); - batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer())); - - // Allocate output vectors. - BigIntVector resultVector = new BigIntVector("dec_to_int64", allocator); - resultVector.allocateNew(); - output = new ArrayList<>(Arrays.asList(resultVector)); - - // evaluate expressions. - eval.evaluate(batch, output); - - // compare the outputs. - long[] expected = {1, 2, 98766, -1, -2}; - for (int i = 0; i < numRows; i++) { - assertFalse(resultVector.isNull(i)); - assertEquals(expected[i], resultVector.get(i)); - } - } finally { - // free buffers - if (batch != null) { - releaseRecordBatch(batch); - } - if (output != null) { - releaseValueVectors(output); - } - eval.close(); - } - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testCastToDouble() throws GandivaException { - Decimal decimalType = new Decimal(38, 2, 128); - Field dec = Field.nullable("dec", decimalType); - - Schema schema = new Schema(Lists.newArrayList(dec)); - Projector eval = - Projector.make( - schema, - Lists.newArrayList( - TreeBuilder.makeExpression( - "castFLOAT8", - Lists.newArrayList(dec), - Field.nullable("dec_to_float64", float64)))); - - List output = null; - ArrowRecordBatch batch = null; - try { - int numRows = 4; - String[] aValues = new String[] {"1.23", "1.58", "-1.23", "-1.58"}; - DecimalVector valuesa = - decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale()); - batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(valuesa.getValidityBuffer(), valuesa.getDataBuffer())); - - // Allocate output vectors. - Float8Vector resultVector = new Float8Vector("dec_to_float64", allocator); - resultVector.allocateNew(); - output = new ArrayList<>(Arrays.asList(resultVector)); - - // evaluate expressions. - eval.evaluate(batch, output); - - // compare the outputs. - double[] expected = {1.23, 1.58, -1.23, -1.58}; - for (int i = 0; i < numRows; i++) { - assertFalse(resultVector.isNull(i)); - assertEquals(expected[i], resultVector.get(i), 0); - } - } finally { - // free buffers - if (batch != null) { - releaseRecordBatch(batch); - } - if (output != null) { - releaseValueVectors(output); - } - eval.close(); - } - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testCastToString() throws GandivaException { - Decimal decimalType = new Decimal(38, 2, 128); - Field dec = Field.nullable("dec", decimalType); - Field str = Field.nullable("str", new ArrowType.Utf8()); - TreeNode field = TreeBuilder.makeField(dec); - TreeNode literal = TreeBuilder.makeLiteral(5L); - List args = Lists.newArrayList(field, literal); - TreeNode cast = TreeBuilder.makeFunction("castVARCHAR", args, new ArrowType.Utf8()); - TreeNode root = - TreeBuilder.makeFunction( - "equal", Lists.newArrayList(cast, TreeBuilder.makeField(str)), new ArrowType.Bool()); - ExpressionTree tree = - TreeBuilder.makeExpression(root, Field.nullable("are_equal", new ArrowType.Bool())); - - Schema schema = new Schema(Lists.newArrayList(dec, str)); - Projector eval = Projector.make(schema, Lists.newArrayList(tree)); - - List output = null; - ArrowRecordBatch batch = null; - try { - int numRows = 4; - String[] aValues = new String[] {"10.51", "100.23", "-1000.23", "-0000.10"}; - String[] expected = {"10.51", "100.2", "-1000", "-0.10"}; - DecimalVector valuesa = - decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale()); - VarCharVector result = varcharVector(expected); - batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList( - valuesa.getValidityBuffer(), - valuesa.getDataBuffer(), - result.getValidityBuffer(), - result.getOffsetBuffer(), - result.getDataBuffer())); - - BitVector resultVector = new BitVector("res", allocator); - resultVector.allocateNew(); - output = new ArrayList<>(Arrays.asList(resultVector)); - - // evaluate expressions. - eval.evaluate(batch, output); - - // compare the outputs. - for (int i = 0; i < numRows; i++) { - assertTrue(resultVector.getObject(i).booleanValue()); - } - } finally { - // free buffers - if (batch != null) { - releaseRecordBatch(batch); - } - if (output != null) { - releaseValueVectors(output); - } - eval.close(); - } - } - - @Test - @Disabled("GH-43576 - Fix and enable this test") - public void testCastStringToDecimal() throws GandivaException { - Decimal decimalType = new Decimal(4, 2, 128); - Field dec = Field.nullable("dec", decimalType); - - Field str = Field.nullable("str", new ArrowType.Utf8()); - TreeNode field = TreeBuilder.makeField(str); - List args = Lists.newArrayList(field); - TreeNode cast = TreeBuilder.makeFunction("castDECIMAL", args, decimalType); - ExpressionTree tree = TreeBuilder.makeExpression(cast, Field.nullable("dec_str", decimalType)); - - Schema schema = new Schema(Lists.newArrayList(str)); - Projector eval = Projector.make(schema, Lists.newArrayList(tree)); - - List output = null; - ArrowRecordBatch batch = null; - try { - int numRows = 4; - String[] aValues = new String[] {"10.5134", "-0.1", "10.516", "-1000"}; - VarCharVector valuesa = varcharVector(aValues); - batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList( - valuesa.getValidityBuffer(), valuesa.getOffsetBuffer(), valuesa.getDataBuffer())); - - DecimalVector resultVector = - new DecimalVector("res", allocator, decimalType.getPrecision(), decimalType.getScale()); - resultVector.allocateNew(); - output = new ArrayList<>(Arrays.asList(resultVector)); - - BigDecimal[] expected = { - BigDecimal.valueOf(10.51), - BigDecimal.valueOf(-0.10), - BigDecimal.valueOf(10.52), - BigDecimal.valueOf(0.00) - }; - // evaluate expressions. - eval.evaluate(batch, output); - - // compare the outputs. - for (int i = 0; i < numRows; i++) { - assertTrue( - expected[i].compareTo(resultVector.getObject(i)) == 0, - "mismatch in result for " - + "field " - + resultVector.getField().getName() - + " for row " - + i - + " expected " - + expected[i] - + ", got " - + resultVector.getObject(i)); - } - } finally { - // free buffers - if (batch != null) { - releaseRecordBatch(batch); - } - if (output != null) { - releaseValueVectors(output); - } - eval.close(); - } - } - - @Test - public void testInvalidDecimal() throws GandivaException { - Decimal decimalType = new Decimal(0, 0, 128); - Field int64f = Field.nullable("int64", int64); - - Schema schema = new Schema(Lists.newArrayList(int64f)); - IllegalArgumentException exception = - assertThrows( - IllegalArgumentException.class, - () -> { - Projector eval = - Projector.make( - schema, - Lists.newArrayList( - TreeBuilder.makeExpression( - "castDECIMAL", - Lists.newArrayList(int64f), - Field.nullable("invalid_dec", decimalType)))); - }); - assertEquals( - "Gandiva only supports decimals of upto 38 precision. Input precision : 0", - exception.getMessage()); - } - - @Test - public void testInvalidDecimalGt38() throws GandivaException { - Decimal decimalType = new Decimal(42, 0, 128); - Field int64f = Field.nullable("int64", int64); - - Schema schema = new Schema(Lists.newArrayList(int64f)); - IllegalArgumentException exception = - assertThrows( - IllegalArgumentException.class, - () -> { - Projector eval = - Projector.make( - schema, - Lists.newArrayList( - TreeBuilder.makeExpression( - "castDECIMAL", - Lists.newArrayList(int64f), - Field.nullable("invalid_dec", decimalType)))); - }); - assertEquals( - "Gandiva only supports decimals of upto 38 precision. Input precision : 42", - exception.getMessage()); - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java deleted file mode 100644 index 0d86bd9e72923..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java +++ /dev/null @@ -1,2609 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import java.math.BigDecimal; -import java.nio.charset.Charset; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Set; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.IntStream; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.expression.ExpressionTree; -import org.apache.arrow.gandiva.expression.TreeBuilder; -import org.apache.arrow.gandiva.expression.TreeNode; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.holders.NullableIntervalDayHolder; -import org.apache.arrow.vector.holders.NullableIntervalYearHolder; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -@Disabled("Disabled until GH-43981 is solved") -public class ProjectorTest extends BaseEvaluatorTest { - - private Charset utf8Charset = Charset.forName("UTF-8"); - private Charset utf16Charset = Charset.forName("UTF-16"); - - List varBufs(String[] strings, Charset charset) { - ArrowBuf offsetsBuffer = allocator.buffer((strings.length + 1) * 4); - - long dataBufferSize = 0L; - for (String string : strings) { - dataBufferSize += string.getBytes(charset).length; - } - - ArrowBuf dataBuffer = allocator.buffer(dataBufferSize); - - int startOffset = 0; - for (int i = 0; i < strings.length; i++) { - offsetsBuffer.writeInt(startOffset); - - final byte[] bytes = strings[i].getBytes(charset); - dataBuffer = dataBuffer.reallocIfNeeded(dataBuffer.writerIndex() + bytes.length); - dataBuffer.setBytes(startOffset, bytes, 0, bytes.length); - startOffset += bytes.length; - } - offsetsBuffer.writeInt(startOffset); // offset for the last element - - return Arrays.asList(offsetsBuffer, dataBuffer); - } - - List stringBufs(String[] strings) { - return varBufs(strings, utf8Charset); - } - - List binaryBufs(String[] strings) { - return varBufs(strings, utf16Charset); - } - - private void testMakeProjectorParallel(ConfigurationBuilder.ConfigOptions configOptions) - throws InterruptedException { - List schemas = Lists.newArrayList(); - Field a = Field.nullable("a", int64); - Field b = Field.nullable("b", int64); - IntStream.range(0, 1000) - .forEach( - i -> { - Field c = Field.nullable("" + i, int64); - List cols = Lists.newArrayList(a, b, c); - schemas.add(new Schema(cols)); - }); - - TreeNode aNode = TreeBuilder.makeField(a); - TreeNode bNode = TreeBuilder.makeField(b); - List args = Lists.newArrayList(aNode, bNode); - - TreeNode cond = TreeBuilder.makeFunction("greater_than", args, boolType); - TreeNode ifNode = TreeBuilder.makeIf(cond, aNode, bNode, int64); - - ExpressionTree expr = TreeBuilder.makeExpression(ifNode, Field.nullable("c", int64)); - List exprs = Lists.newArrayList(expr); - - // build projectors in parallel choosing schema at random - // this should hit the same cache entry thus exposing - // any threading issues. - ExecutorService executors = Executors.newFixedThreadPool(16); - - IntStream.range(0, 1000) - .forEach( - i -> { - executors.submit( - () -> { - try { - Projector evaluator = - configOptions == null - ? Projector.make(schemas.get((int) (Math.random() * 100)), exprs) - : Projector.make( - schemas.get((int) (Math.random() * 100)), exprs, configOptions); - evaluator.close(); - } catch (GandivaException e) { - e.printStackTrace(); - } - }); - }); - executors.shutdown(); - executors.awaitTermination(100, java.util.concurrent.TimeUnit.SECONDS); - } - - @Test - public void testMakeProjectorParallel() throws Exception { - testMakeProjectorParallel(null); - testMakeProjectorParallel(new ConfigurationBuilder.ConfigOptions().withTargetCPU(false)); - testMakeProjectorParallel( - new ConfigurationBuilder.ConfigOptions().withTargetCPU(false).withOptimize(false)); - } - - // Will be fixed by https://issues.apache.org/jira/browse/ARROW-4371 - @Disabled - @Test - public void testMakeProjector() throws GandivaException { - Field a = Field.nullable("a", int64); - Field b = Field.nullable("b", int64); - TreeNode aNode = TreeBuilder.makeField(a); - TreeNode bNode = TreeBuilder.makeField(b); - List args = Lists.newArrayList(aNode, bNode); - - List cols = Lists.newArrayList(a, b); - Schema schema = new Schema(cols); - - TreeNode cond = TreeBuilder.makeFunction("greater_than", args, boolType); - TreeNode ifNode = TreeBuilder.makeIf(cond, aNode, bNode, int64); - - ExpressionTree expr = TreeBuilder.makeExpression(ifNode, Field.nullable("c", int64)); - List exprs = Lists.newArrayList(expr); - - long startTime = System.currentTimeMillis(); - Projector evaluator1 = Projector.make(schema, exprs); - System.out.println( - "Projector build: iteration 1 took " + (System.currentTimeMillis() - startTime) + " ms"); - startTime = System.currentTimeMillis(); - Projector evaluator2 = Projector.make(schema, exprs); - System.out.println( - "Projector build: iteration 2 took " + (System.currentTimeMillis() - startTime) + " ms"); - startTime = System.currentTimeMillis(); - Projector evaluator3 = Projector.make(schema, exprs); - long timeToMakeProjector = (System.currentTimeMillis() - startTime); - // should be getting the projector from the cache; - // giving 5ms for varying system load. - assertTrue(timeToMakeProjector < 5L); - - evaluator1.close(); - evaluator2.close(); - evaluator3.close(); - } - - @Test - public void testMakeProjectorValidationError() throws InterruptedException { - - Field a = Field.nullable("a", int64); - TreeNode aNode = TreeBuilder.makeField(a); - List args = Lists.newArrayList(aNode); - - List cols = Lists.newArrayList(a); - Schema schema = new Schema(cols); - - TreeNode cond = TreeBuilder.makeFunction("non_existent_fn", args, boolType); - - ExpressionTree expr = TreeBuilder.makeExpression(cond, Field.nullable("c", int64)); - List exprs = Lists.newArrayList(expr); - - boolean exceptionThrown = false; - try { - Projector evaluator1 = Projector.make(schema, exprs); - } catch (GandivaException e) { - exceptionThrown = true; - } - - assertTrue(exceptionThrown); - - // allow GC to collect any temp resources. - Thread.sleep(1000); - - // try again to ensure no temporary resources. - exceptionThrown = false; - try { - Projector evaluator1 = Projector.make(schema, exprs); - } catch (GandivaException e) { - exceptionThrown = true; - } - - assertTrue(exceptionThrown); - } - - @Test - public void testEvaluate() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Field retType = Field.nullable("c", int32); - ExpressionTree root = TreeBuilder.makeExpression("add", args, retType); - - List exprs = Lists.newArrayList(root); - - Schema schema = new Schema(args); - Projector eval = Projector.make(schema, exprs); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] bValues = new int[] {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - - ArrowBuf validitya = buf(validity); - ArrowBuf valuesa = intBuf(aValues); - ArrowBuf validityb = buf(validity); - ArrowBuf valuesb = intBuf(bValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 8), new ArrowFieldNode(numRows, 8)), - Lists.newArrayList(validitya, valuesa, validityb, valuesb)); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(intVector); - eval.evaluate(batch, output); - - for (int i = 0; i < 8; i++) { - assertFalse(intVector.isNull(i)); - assertEquals(17, intVector.get(i)); - } - for (int i = 8; i < 16; i++) { - assertTrue(intVector.isNull(i)); - } - - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testEvaluateDivZero() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Field retType = Field.nullable("c", int32); - ExpressionTree root = TreeBuilder.makeExpression("divide", args, retType); - - List exprs = Lists.newArrayList(root); - - Schema schema = new Schema(args); - Projector eval = Projector.make(schema, exprs); - - int numRows = 2; - byte[] validity = new byte[] {(byte) 255}; - // second half is "undefined" - int[] aValues = new int[] {2, 2}; - int[] bValues = new int[] {1, 0}; - - ArrowBuf validitya = buf(validity); - ArrowBuf valuesa = intBuf(aValues); - ArrowBuf validityb = buf(validity); - ArrowBuf valuesb = intBuf(bValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(validitya, valuesa, validityb, valuesb)); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(intVector); - boolean exceptionThrown = false; - try { - eval.evaluate(batch, output); - } catch (GandivaException e) { - assertTrue(e.getMessage().contains("divide by zero")); - exceptionThrown = true; - } - assertTrue(exceptionThrown); - - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testDivZeroParallel() throws GandivaException, InterruptedException { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - Field c = Field.nullable("c", int32); - List cols = Lists.newArrayList(a, b); - Schema s = new Schema(cols); - - List args = Lists.newArrayList(a, b); - - ExpressionTree expr = TreeBuilder.makeExpression("divide", args, c); - List exprs = Lists.newArrayList(expr); - - ExecutorService executors = Executors.newFixedThreadPool(16); - - AtomicInteger errorCount = new AtomicInteger(0); - AtomicInteger errorCountExp = new AtomicInteger(0); - // pre-build the projector so that same projector is used for all executions. - Projector test = Projector.make(s, exprs); - - IntStream.range(0, 1000) - .forEach( - i -> { - executors.submit( - () -> { - try { - Projector evaluator = Projector.make(s, exprs); - int numRows = 2; - byte[] validity = new byte[] {(byte) 255}; - int[] aValues = new int[] {2, 2}; - int[] bValues; - if (i % 2 == 0) { - errorCountExp.incrementAndGet(); - bValues = new int[] {1, 0}; - } else { - bValues = new int[] {1, 1}; - } - - ArrowBuf validitya = buf(validity); - ArrowBuf valuesa = intBuf(aValues); - ArrowBuf validityb = buf(validity); - ArrowBuf valuesb = intBuf(bValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList( - new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(validitya, valuesa, validityb, valuesb)); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(intVector); - try { - evaluator.evaluate(batch, output); - } catch (GandivaException e) { - errorCount.incrementAndGet(); - } - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - evaluator.close(); - } catch (GandivaException ignore) { - } - }); - }); - executors.shutdown(); - executors.awaitTermination(100, java.util.concurrent.TimeUnit.SECONDS); - test.close(); - assertEquals(errorCountExp.intValue(), errorCount.intValue()); - } - - @Test - public void testAdd3() throws GandivaException, Exception { - Field x = Field.nullable("x", int32); - Field n2x = Field.nullable("n2x", int32); - Field n3x = Field.nullable("n3x", int32); - - List args = new ArrayList(); - - // x + n2x + n3x - TreeNode add1 = - TreeBuilder.makeFunction( - "add", Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeField(n2x)), int32); - TreeNode add = - TreeBuilder.makeFunction( - "add", Lists.newArrayList(add1, TreeBuilder.makeField(n3x)), int32); - ExpressionTree expr = TreeBuilder.makeExpression(add, x); - - List cols = Lists.newArrayList(x, n2x, n3x); - Schema schema = new Schema(cols); - - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] xValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] n2xValues = new int[] {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - int[] n3xValues = new int[] {1, 2, 3, 4, 4, 3, 2, 1, 5, 6, 7, 8, 8, 7, 6, 5}; - - int[] expected = new int[] {18, 19, 20, 21, 21, 20, 19, 18, 18, 19, 20, 21, 21, 20, 19, 18}; - - ArrowBuf xValidity = buf(validity); - ArrowBuf xData = intBuf(xValues); - ArrowBuf n2xValidity = buf(validity); - ArrowBuf n2xData = intBuf(n2xValues); - ArrowBuf n3xValidity = buf(validity); - ArrowBuf n3xData = intBuf(n3xValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 8); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode, fieldNode), - Lists.newArrayList(xValidity, xData, n2xValidity, n2xData, n3xValidity, n3xData)); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(intVector); - eval.evaluate(batch, output); - - for (int i = 0; i < 8; i++) { - assertFalse(intVector.isNull(i)); - assertEquals(expected[i], intVector.get(i)); - } - for (int i = 8; i < 16; i++) { - assertTrue(intVector.isNull(i)); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testStringFields() throws GandivaException { - /* - * when x < "hello" then octet_length(x) + a - * else octet_length(x) + b - */ - - Field x = Field.nullable("x", new ArrowType.Utf8()); - Field a = Field.nullable("a", new ArrowType.Int(32, true)); - Field b = Field.nullable("b", new ArrowType.Int(32, true)); - - ArrowType retType = new ArrowType.Int(32, true); - - TreeNode cond = - TreeBuilder.makeFunction( - "less_than", - Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeStringLiteral("hello")), - boolType); - TreeNode octetLenFuncNode = - TreeBuilder.makeFunction( - "octet_length", Lists.newArrayList(TreeBuilder.makeField(x)), retType); - TreeNode octetLenPlusANode = - TreeBuilder.makeFunction( - "add", Lists.newArrayList(TreeBuilder.makeField(a), octetLenFuncNode), retType); - TreeNode octetLenPlusBNode = - TreeBuilder.makeFunction( - "add", Lists.newArrayList(TreeBuilder.makeField(b), octetLenFuncNode), retType); - - TreeNode ifHello = TreeBuilder.makeIf(cond, octetLenPlusANode, octetLenPlusBNode, retType); - - ExpressionTree expr = TreeBuilder.makeExpression(ifHello, Field.nullable("res", retType)); - Schema schema = new Schema(Lists.newArrayList(a, x, b)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255, 0}; - // "A função" means "The function" in portugese - String[] valuesX = new String[] {"hell", "abc", "hellox", "ijk", "A função"}; - int[] valuesA = new int[] {10, 20, 30, 40, 50}; - int[] valuesB = new int[] {110, 120, 130, 140, 150}; - int[] expected = new int[] {14, 23, 136, 143, 60}; - - ArrowBuf validityX = buf(validity); - List dataBufsX = stringBufs(valuesX); - ArrowBuf validityA = buf(validity); - ArrowBuf dataA = intBuf(valuesA); - ArrowBuf validityB = buf(validity); - ArrowBuf dataB = intBuf(valuesB); - - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0), new ArrowFieldNode(numRows, 0)), - Lists.newArrayList( - validityA, dataA, validityX, dataBufsX.get(0), dataBufsX.get(1), validityB, dataB)); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(intVector); - eval.evaluate(batch, output); - - for (int i = 0; i < numRows; i++) { - assertFalse(intVector.isNull(i)); - assertEquals(expected[i], intVector.get(i)); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testStringOutput() throws GandivaException { - /* - * if (x >= 0) "hi" else "bye" - */ - - Field x = Field.nullable("x", new ArrowType.Int(32, true)); - - ArrowType retType = new ArrowType.Utf8(); - - TreeNode ifHiBye = - TreeBuilder.makeIf( - TreeBuilder.makeFunction( - "greater_than_or_equal_to", - Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeLiteral(0)), - boolType), - TreeBuilder.makeStringLiteral("hi"), - TreeBuilder.makeStringLiteral("bye"), - retType); - - ExpressionTree expr = TreeBuilder.makeExpression(ifHiBye, Field.nullable("res", retType)); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - // fill up input record batch - int numRows = 4; - byte[] validity = new byte[] {(byte) 255, 0}; - int[] xValues = new int[] {10, -10, 20, -20}; - String[] expected = new String[] {"hi", "bye", "hi", "bye"}; - ArrowBuf validityX = buf(validity); - ArrowBuf dataX = intBuf(xValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(validityX, dataX)); - - // allocate data for output vector. - VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - outVector.allocateNew(64, numRows); - - // evaluate expression - List output = new ArrayList<>(); - output.add(outVector); - eval.evaluate(batch, output); - - // match expected output. - for (int i = 0; i < numRows; i++) { - assertFalse(outVector.isNull(i)); - assertEquals(expected[i], new String(outVector.get(i))); - } - - // test with insufficient data buffer. - try { - outVector.allocateNew(4, numRows); - eval.evaluate(batch, output); - } finally { - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - } - - @Test - public void testRegex() throws GandivaException { - /* - * like "%map%" - */ - - Field x = Field.nullable("x", new ArrowType.Utf8()); - - TreeNode cond = - TreeBuilder.makeFunction( - "like", - Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeStringLiteral("%map%")), - boolType); - ExpressionTree expr = TreeBuilder.makeExpression(cond, Field.nullable("res", boolType)); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255, 0}; - String[] valuesX = new String[] {"mapD", "maps", "google maps", "map", "MapR"}; - boolean[] expected = new boolean[] {true, true, true, true, false}; - - ArrowBuf validityX = buf(validity); - List dataBufsX = stringBufs(valuesX); - - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1))); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < numRows; i++) { - assertFalse(bitVector.isNull(i)); - assertEquals(expected[i], bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testRegexpReplace() throws GandivaException { - - Field x = Field.nullable("x", new ArrowType.Utf8()); - Field replaceString = Field.nullable("replaceString", new ArrowType.Utf8()); - - Field retType = Field.nullable("c", new ArrowType.Utf8()); - - TreeNode cond = - TreeBuilder.makeFunction( - "regexp_replace", - Lists.newArrayList( - TreeBuilder.makeField(x), - TreeBuilder.makeStringLiteral("ana"), - TreeBuilder.makeField(replaceString)), - new ArrowType.Utf8()); - ExpressionTree expr = TreeBuilder.makeExpression(cond, retType); - Schema schema = new Schema(Lists.newArrayList(x, replaceString)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 15, 0}; - String[] valuesX = new String[] {"banana", "bananaana", "bananana", "anaana", "anaana"}; - String[] valuesReplace = new String[] {"ue", "", "", "c", ""}; - String[] expected = new String[] {"buena", "bna", "bn", "cc", null}; - - ArrowBuf validityX = buf(validity); - ArrowBuf validityReplace = buf(validity); - List dataBufsX = stringBufs(valuesX); - List dataBufsReplace = stringBufs(valuesReplace); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList( - validityX, - dataBufsX.get(0), - dataBufsX.get(1), - validityReplace, - dataBufsReplace.get(0), - dataBufsReplace.get(1))); - - // allocate data for output vector. - VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - outVector.allocateNew(numRows * 15, numRows); - - // evaluate expression - List output = new ArrayList<>(); - output.add(outVector); - eval.evaluate(batch, output); - eval.close(); - - // match expected output. - for (int i = 0; i < numRows - 1; i++) { - assertFalse(outVector.isNull(i), "Expect none value equals null"); - assertEquals(expected[i], new String(outVector.get(i))); - } - - assertTrue(outVector.isNull(numRows - 1), "Last value must be null"); - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCastIntervalDay() throws GandivaException { - - Field x = Field.nullable("x", new ArrowType.Utf8()); - - Field retType = Field.nullable("c", new ArrowType.Interval(IntervalUnit.DAY_TIME)); - - TreeNode cond = - TreeBuilder.makeFunction( - "castintervalday", - Lists.newArrayList(TreeBuilder.makeField(x)), - new ArrowType.Interval(IntervalUnit.DAY_TIME)); - ExpressionTree expr = TreeBuilder.makeExpression(cond, retType); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 4; - byte[] validity = new byte[] {(byte) 7, 0}; - String[] valuesX = new String[] {"1742461111", "P1Y1M1DT1H1M1S", "PT48H1M1S", "test"}; - int[][] expected = - new int[][] { // day and millis - {20, 14461111}, {1, 3661000}, {2, 61000}, null - }; - - ArrowBuf validityX = buf(validity); - List dataBufsX = stringBufs(valuesX); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1))); - - // allocate data for output vector. - IntervalDayVector outVector = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator); - outVector.allocateNew(); - - // evaluate expression - List output = new ArrayList<>(); - output.add(outVector); - eval.evaluate(batch, output); - eval.close(); - - // match expected output. - NullableIntervalDayHolder holder = new NullableIntervalDayHolder(); - for (int i = 0; i < numRows - 1; i++) { - assertFalse(outVector.isNull(i), "Expect none value equals null"); - outVector.get(i, holder); - - assertEquals(expected[i][0], holder.days); - assertEquals(expected[i][1], holder.milliseconds); - } - - assertTrue(outVector.isNull(numRows - 1), "Last value must be null"); - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCastIntervalYear() throws GandivaException { - - Field x = Field.nullable("x", new ArrowType.Utf8()); - - Field retType = Field.nullable("c", new ArrowType.Interval(IntervalUnit.YEAR_MONTH)); - - TreeNode cond = - TreeBuilder.makeFunction( - "castintervalyear", - Lists.newArrayList(TreeBuilder.makeField(x)), - new ArrowType.Interval(IntervalUnit.YEAR_MONTH)); - ExpressionTree expr = TreeBuilder.makeExpression(cond, retType); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 4; - byte[] validity = new byte[] {(byte) 7, 0}; - String[] valuesX = new String[] {"65851111", "P1Y1M1DT1H1M1S", "P1Y", "test"}; - int[][] expected = - new int[][] { // year and month - {0, 65851111}, {1, 1}, {1, 0}, null - }; - - ArrowBuf validityX = buf(validity); - List dataBufsX = stringBufs(valuesX); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1))); - - // allocate data for output vector. - IntervalYearVector outVector = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator); - outVector.allocateNew(); - - // evaluate expression - List output = new ArrayList<>(); - output.add(outVector); - eval.evaluate(batch, output); - eval.close(); - - // match expected output. - NullableIntervalYearHolder holder = new NullableIntervalYearHolder(); - for (int i = 0; i < numRows - 1; i++) { - assertFalse(outVector.isNull(i), "Expect none value equals null"); - outVector.get(i, holder); - - int numberMonths = - expected[i][0] * 12 - + // number of years - expected[i][1]; // number of months - - assertEquals(numberMonths, holder.value); - } - - assertTrue(outVector.isNull(numRows - 1), "Last value must be null"); - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testRand() throws GandivaException { - - TreeNode randWithSeed = - TreeBuilder.makeFunction("rand", Lists.newArrayList(TreeBuilder.makeLiteral(12)), float64); - TreeNode rand = TreeBuilder.makeFunction("rand", Lists.newArrayList(), float64); - ExpressionTree exprWithSeed = - TreeBuilder.makeExpression(randWithSeed, Field.nullable("res", float64)); - ExpressionTree expr = TreeBuilder.makeExpression(rand, Field.nullable("res2", float64)); - Field x = Field.nullable("x", new ArrowType.Utf8()); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector evalWithSeed = Projector.make(schema, Lists.newArrayList(exprWithSeed)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255, 0}; - String[] valuesX = new String[] {"mapD", "maps", "google maps", "map", "MapR"}; - double[] expected = - new double[] { - 0.1597116001879662D, - 0.7347813877263527D, - 0.6069965050584282D, - 0.7240285696335824D, - 0.09975540272957834D - }; - - ArrowBuf validityX = buf(validity); - List dataBufsX = stringBufs(valuesX); - - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1))); - - Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator); - float8Vector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(float8Vector); - evalWithSeed.evaluate(batch, output); - - for (int i = 0; i < numRows; i++) { - assertFalse(float8Vector.isNull(i)); - assertEquals(expected[i], float8Vector.getObject(i), 0.000000001); - } - - eval.evaluate(batch, output); // without seed - assertNotEquals(float8Vector.getObject(0), float8Vector.getObject(1), 0.000000001); - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - evalWithSeed.close(); - } - - @Test - public void testBinaryFields() throws GandivaException { - Field a = Field.nullable("a", new ArrowType.Binary()); - Field b = Field.nullable("b", new ArrowType.Binary()); - List args = Lists.newArrayList(a, b); - - ArrowType retType = new ArrowType.Bool(); - ExpressionTree expr = TreeBuilder.makeExpression("equal", args, Field.nullable("res", retType)); - - Schema schema = new Schema(Lists.newArrayList(args)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255, 0}; - String[] valuesA = new String[] {"a", "aa", "aaa", "aaaa", "A função"}; - String[] valuesB = new String[] {"a", "bb", "aaa", "bbbbb", "A função"}; - boolean[] expected = new boolean[] {true, false, true, false, true}; - - ArrowBuf validitya = buf(validity); - ArrowBuf validityb = buf(validity); - List inBufsA = binaryBufs(valuesA); - List inBufsB = binaryBufs(valuesB); - - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 8), new ArrowFieldNode(numRows, 8)), - Lists.newArrayList( - validitya, - inBufsA.get(0), - inBufsA.get(1), - validityb, - inBufsB.get(0), - inBufsB.get(1))); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < numRows; i++) { - assertFalse(bitVector.isNull(i)); - assertEquals(expected[i], bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - private TreeNode makeLongLessThanCond(TreeNode arg, long value) { - return TreeBuilder.makeFunction( - "less_than", Lists.newArrayList(arg, TreeBuilder.makeLiteral(value)), boolType); - } - - private TreeNode makeLongGreaterThanCond(TreeNode arg, long value) { - return TreeBuilder.makeFunction( - "greater_than", Lists.newArrayList(arg, TreeBuilder.makeLiteral(value)), boolType); - } - - private TreeNode ifLongLessThanElse( - TreeNode arg, long value, long thenValue, TreeNode elseNode, ArrowType type) { - return TreeBuilder.makeIf( - makeLongLessThanCond(arg, value), TreeBuilder.makeLiteral(thenValue), elseNode, type); - } - - @Test - public void testIf() throws GandivaException, Exception { - /* - * when x < 10 then 0 - * when x < 20 then 1 - * when x < 30 then 2 - * when x < 40 then 3 - * when x < 50 then 4 - * when x < 60 then 5 - * when x < 70 then 6 - * when x < 80 then 7 - * when x < 90 then 8 - * when x < 100 then 9 - * else 10 - */ - Field x = Field.nullable("x", int64); - TreeNode xNode = TreeBuilder.makeField(x); - - // if (x < 100) then 9 else 10 - TreeNode ifLess100 = ifLongLessThanElse(xNode, 100L, 9L, TreeBuilder.makeLiteral(10L), int64); - // if (x < 90) then 8 else ifLess100 - TreeNode ifLess90 = ifLongLessThanElse(xNode, 90L, 8L, ifLess100, int64); - // if (x < 80) then 7 else ifLess90 - TreeNode ifLess80 = ifLongLessThanElse(xNode, 80L, 7L, ifLess90, int64); - // if (x < 70) then 6 else ifLess80 - TreeNode ifLess70 = ifLongLessThanElse(xNode, 70L, 6L, ifLess80, int64); - // if (x < 60) then 5 else ifLess70 - TreeNode ifLess60 = ifLongLessThanElse(xNode, 60L, 5L, ifLess70, int64); - // if (x < 50) then 4 else ifLess60 - TreeNode ifLess50 = ifLongLessThanElse(xNode, 50L, 4L, ifLess60, int64); - // if (x < 40) then 3 else ifLess50 - TreeNode ifLess40 = ifLongLessThanElse(xNode, 40L, 3L, ifLess50, int64); - // if (x < 30) then 2 else ifLess40 - TreeNode ifLess30 = ifLongLessThanElse(xNode, 30L, 2L, ifLess40, int64); - // if (x < 20) then 1 else ifLess30 - TreeNode ifLess20 = ifLongLessThanElse(xNode, 20L, 1L, ifLess30, int64); - // if (x < 10) then 0 else ifLess20 - TreeNode ifLess10 = ifLongLessThanElse(xNode, 10L, 0L, ifLess20, int64); - - ExpressionTree expr = TreeBuilder.makeExpression(ifLess10, x); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, (byte) 255}; - long[] xValues = new long[] {9, 15, 21, 32, 43, 54, 65, 76, 87, 98, 109, 200, -10, 60, 77, 80}; - long[] expected = new long[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 0, 6, 7, 8}; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf xData = longBuf(xValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData)); - - BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - bigIntVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bigIntVector); - eval.evaluate(batch, output); - - for (int i = 0; i < numRows; i++) { - assertFalse(bigIntVector.isNull(i)); - assertEquals(expected[i], bigIntVector.get(i)); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testAnd() throws GandivaException, Exception { - /* - * x > 10 AND x < 20 - */ - ArrowType int64 = new ArrowType.Int(64, true); - - Field x = Field.nullable("x", int64); - TreeNode xNode = TreeBuilder.makeField(x); - TreeNode gt10 = makeLongGreaterThanCond(xNode, 10); - TreeNode lt20 = makeLongLessThanCond(xNode, 20); - TreeNode and = TreeBuilder.makeAnd(Lists.newArrayList(gt10, lt20)); - - Field res = Field.nullable("res", boolType); - - ExpressionTree expr = TreeBuilder.makeExpression(and, res); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 4; - byte[] validity = new byte[] {(byte) 255}; - long[] xValues = new long[] {9, 15, 17, 25}; - boolean[] expected = new boolean[] {false, true, true, false}; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf xData = longBuf(xValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData)); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < numRows; i++) { - assertFalse(bitVector.isNull(i)); - assertEquals(expected[i], bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testOr() throws GandivaException, Exception { - /* - * x > 10 OR x < 5 - */ - ArrowType int64 = new ArrowType.Int(64, true); - - Field x = Field.nullable("x", int64); - TreeNode xNode = TreeBuilder.makeField(x); - TreeNode gt10 = makeLongGreaterThanCond(xNode, 10); - TreeNode lt5 = makeLongLessThanCond(xNode, 5); - TreeNode or = TreeBuilder.makeOr(Lists.newArrayList(gt10, lt5)); - - Field res = Field.nullable("res", boolType); - - ExpressionTree expr = TreeBuilder.makeExpression(or, res); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 4; - byte[] validity = new byte[] {(byte) 255}; - long[] xValues = new long[] {4, 9, 15, 17}; - boolean[] expected = new boolean[] {true, false, true, true}; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf xData = longBuf(xValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData)); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < numRows; i++) { - assertFalse(bitVector.isNull(i)); - assertEquals(expected[i], bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testNull() throws GandivaException, Exception { - /* - * when x < 10 then 1 - * else null - */ - ArrowType int64 = new ArrowType.Int(64, true); - - Field x = Field.nullable("x", int64); - TreeNode xNode = TreeBuilder.makeField(x); - - // if (x < 10) then 1 else null - TreeNode ifLess10 = ifLongLessThanElse(xNode, 10L, 1L, TreeBuilder.makeNull(int64), int64); - - ExpressionTree expr = TreeBuilder.makeExpression(ifLess10, x); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 2; - byte[] validity = new byte[] {(byte) 255}; - long[] xValues = new long[] {5, 32}; - long[] expected = new long[] {1, 0}; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf xData = longBuf(xValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData)); - - BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - bigIntVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bigIntVector); - eval.evaluate(batch, output); - - // first element should be 1 - assertFalse(bigIntVector.isNull(0)); - assertEquals(expected[0], bigIntVector.get(0)); - - // second element should be null - assertTrue(bigIntVector.isNull(1)); - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testTimeNull() throws GandivaException, Exception { - - ArrowType time64 = new ArrowType.Time(TimeUnit.MICROSECOND, 64); - - Field x = Field.nullable("x", time64); - TreeNode xNode = TreeBuilder.makeNull(time64); - - ExpressionTree expr = TreeBuilder.makeExpression(xNode, x); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 2; - byte[] validity = new byte[] {(byte) 255}; - int[] xValues = new int[] {5, 32}; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf xData = intBuf(xValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData)); - - BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - bigIntVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bigIntVector); - eval.evaluate(batch, output); - - assertTrue(bigIntVector.isNull(0)); - assertTrue(bigIntVector.isNull(1)); - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testTimeEquals() throws GandivaException, Exception { - /* - * when isnotnull(x) then x - * else y - */ - Field x = Field.nullable("x", new ArrowType.Time(TimeUnit.MILLISECOND, 32)); - TreeNode xNode = TreeBuilder.makeField(x); - - Field y = Field.nullable("y", new ArrowType.Time(TimeUnit.MILLISECOND, 32)); - TreeNode yNode = TreeBuilder.makeField(y); - - // if isnotnull(x) then x else y - TreeNode condition = TreeBuilder.makeFunction("isnotnull", Lists.newArrayList(xNode), boolType); - TreeNode ifCoalesce = - TreeBuilder.makeIf(condition, xNode, yNode, new ArrowType.Time(TimeUnit.MILLISECOND, 32)); - - ExpressionTree expr = TreeBuilder.makeExpression(ifCoalesce, x); - Schema schema = new Schema(Lists.newArrayList(x, y)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 2; - byte[] validity = new byte[] {(byte) 1}; - byte[] yValidity = new byte[] {(byte) 3}; - int[] xValues = new int[] {5, 1}; - int[] yValues = new int[] {10, 2}; - int[] expected = new int[] {5, 2}; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf xData = intBuf(xValues); - - ArrowBuf yBufValidity = buf(yValidity); - ArrowBuf yData = intBuf(yValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode), - Lists.newArrayList(bufValidity, xData, yBufValidity, yData)); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(intVector); - eval.evaluate(batch, output); - - // output should be 5 and 2 - assertFalse(intVector.isNull(0)); - assertEquals(expected[0], intVector.get(0)); - assertEquals(expected[1], intVector.get(1)); - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testIsNull() throws GandivaException, Exception { - Field x = Field.nullable("x", float64); - - TreeNode xNode = TreeBuilder.makeField(x); - TreeNode isNull = TreeBuilder.makeFunction("isnull", Lists.newArrayList(xNode), boolType); - ExpressionTree expr = TreeBuilder.makeExpression(isNull, Field.nullable("result", boolType)); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - double[] xValues = - new double[] { - 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 - }; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf xData = doubleBuf(xValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, xData)); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < 8; i++) { - assertFalse(bitVector.getObject(i).booleanValue()); - } - for (int i = 8; i < numRows; i++) { - assertTrue(bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testEquals() throws GandivaException, Exception { - Field c1 = Field.nullable("c1", int32); - Field c2 = Field.nullable("c2", int32); - - TreeNode c1Node = TreeBuilder.makeField(c1); - TreeNode c2Node = TreeBuilder.makeField(c2); - TreeNode equals = - TreeBuilder.makeFunction("equal", Lists.newArrayList(c1Node, c2Node), boolType); - ExpressionTree expr = TreeBuilder.makeExpression(equals, Field.nullable("result", boolType)); - Schema schema = new Schema(Lists.newArrayList(c1, c2)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - int[] c1Values = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] c2Values = new int[] {1, 2, 3, 4, 8, 7, 6, 5, 16, 15, 14, 13, 12, 11, 10, 9}; - - ArrowBuf c1Validity = buf(validity); - ArrowBuf c1Data = intBuf(c1Values); - ArrowBuf c2Validity = buf(validity); - ArrowBuf c2Data = intBuf(c2Values); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(c1Validity, c1Data, c2Validity, c2Data)); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < 4; i++) { - assertTrue(bitVector.getObject(i).booleanValue()); - } - for (int i = 4; i < 8; i++) { - assertFalse(bitVector.getObject(i).booleanValue()); - } - for (int i = 8; i < 16; i++) { - assertTrue(bitVector.isNull(i)); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testInExpr() throws GandivaException, Exception { - Field c1 = Field.nullable("c1", int32); - - TreeNode inExpr = - TreeBuilder.makeInExpressionInt32( - TreeBuilder.makeField(c1), Sets.newHashSet(1, 2, 3, 4, 5, 15, 16)); - ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType)); - Schema schema = new Schema(Lists.newArrayList(c1)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - int[] c1Values = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - - ArrowBuf c1Validity = buf(validity); - ArrowBuf c1Data = intBuf(c1Values); - ArrowBuf c2Validity = buf(validity); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(c1Validity, c1Data, c2Validity)); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < 5; i++) { - assertTrue(bitVector.getObject(i).booleanValue()); - } - for (int i = 5; i < 16; i++) { - assertFalse(bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testInExprDecimal() throws GandivaException, Exception { - Integer precision = 26; - Integer scale = 5; - ArrowType.Decimal decimal = new ArrowType.Decimal(precision, scale, 128); - Field c1 = Field.nullable("c1", decimal); - - String[] values = new String[] {"1", "2", "3", "4"}; - Set decimalSet = decimalSet(values, scale); - decimalSet.add(new BigDecimal(-0.0)); - decimalSet.add(new BigDecimal(Long.MAX_VALUE)); - decimalSet.add(new BigDecimal(Long.MIN_VALUE)); - TreeNode inExpr = - TreeBuilder.makeInExpressionDecimal( - TreeBuilder.makeField(c1), decimalSet, precision, scale); - ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType)); - Schema schema = new Schema(Lists.newArrayList(c1)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - String[] c1Values = - new String[] { - "1", - "2", - "3", - "4", - "-0.0", - "6", - "7", - "8", - "9", - "10", - "11", - "12", - "13", - "14", - String.valueOf(Long.MAX_VALUE), - String.valueOf(Long.MIN_VALUE) - }; - - DecimalVector c1Data = decimalVector(c1Values, precision, scale); - ArrowBuf c1Validity = buf(validity); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(c1Validity, c1Data.getDataBuffer(), c1Data.getValidityBuffer())); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < 5; i++) { - assertTrue(bitVector.getObject(i).booleanValue()); - } - for (int i = 5; i < 16; i++) { - assertFalse(bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testInExprDouble() throws GandivaException, Exception { - Field c1 = Field.nullable("c1", float64); - - TreeNode inExpr = - TreeBuilder.makeInExpressionDouble( - TreeBuilder.makeField(c1), - Sets.newHashSet( - 1.0, - -0.0, - 3.0, - 4.0, - Double.NaN, - Double.POSITIVE_INFINITY, - Double.NEGATIVE_INFINITY)); - ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType)); - Schema schema = new Schema(Lists.newArrayList(c1)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - // Create a row-batch with some sample data to look for - int numRows = 16; - // Only the first 8 values will be valid. - byte[] validity = new byte[] {(byte) 255, 0}; - double[] c1Values = - new double[] { - 1, - -0.0, - Double.NEGATIVE_INFINITY, - Double.POSITIVE_INFINITY, - Double.NaN, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 4, - 3 - }; - - ArrowBuf c1Validity = buf(validity); - ArrowBuf c1Data = doubleBuf(c1Values); - ArrowBuf c2Validity = buf(validity); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(c1Validity, c1Data, c2Validity)); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - // The first four values in the vector must match the expression, but not the other ones. - for (int i = 0; i < 4; i++) { - assertTrue(bitVector.getObject(i).booleanValue()); - } - for (int i = 4; i < 16; i++) { - assertFalse(bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testInExprStrings() throws GandivaException, Exception { - Field c1 = Field.nullable("c1", new ArrowType.Utf8()); - - TreeNode l1 = TreeBuilder.makeLiteral(1L); - TreeNode l2 = TreeBuilder.makeLiteral(3L); - List args = Lists.newArrayList(TreeBuilder.makeField(c1), l1, l2); - TreeNode substr = TreeBuilder.makeFunction("substr", args, new ArrowType.Utf8()); - TreeNode inExpr = - TreeBuilder.makeInExpressionString(substr, Sets.newHashSet("one", "two", "thr", "fou")); - ExpressionTree expr = TreeBuilder.makeExpression(inExpr, Field.nullable("result", boolType)); - Schema schema = new Schema(Lists.newArrayList(c1)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - String[] c1Values = - new String[] { - "one", - "two", - "three", - "four", - "five", - "six", - "seven", - "eight", - "nine", - "ten", - "eleven", - "twelve", - "thirteen", - "fourteen", - "fifteen", - "sixteen" - }; - - ArrowBuf c1Validity = buf(validity); - List dataBufsX = stringBufs(c1Values); - ArrowBuf c2Validity = buf(validity); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(c1Validity, dataBufsX.get(0), dataBufsX.get(1), c2Validity)); - - BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - bitVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(bitVector); - eval.evaluate(batch, output); - - for (int i = 0; i < 4; i++) { - assertTrue(bitVector.getObject(i).booleanValue()); - } - for (int i = 5; i < 16; i++) { - assertFalse(bitVector.getObject(i).booleanValue()); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testSmallOutputVectors() throws GandivaException, Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Field retType = Field.nullable("c", int32); - ExpressionTree root = TreeBuilder.makeExpression("add", args, retType); - - List exprs = Lists.newArrayList(root); - - Schema schema = new Schema(args); - Projector eval = Projector.make(schema, exprs); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] bValues = new int[] {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - - ArrowBuf aValidity = buf(validity); - ArrowBuf aData = intBuf(aValues); - ArrowBuf bValidity = buf(validity); - ArrowBuf b2Validity = buf(validity); - ArrowBuf bData = intBuf(bValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 8), new ArrowFieldNode(numRows, 8)), - Lists.newArrayList(aValidity, aData, bValidity, bData, b2Validity)); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - - List output = new ArrayList(); - output.add(intVector); - try { - eval.evaluate(batch, output); - } catch (Throwable t) { - intVector.allocateNew(numRows); - eval.evaluate(batch, output); - } - - for (int i = 0; i < 8; i++) { - assertFalse(intVector.isNull(i)); - assertEquals(17, intVector.get(i)); - } - for (int i = 8; i < 16; i++) { - assertTrue(intVector.isNull(i)); - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testDateTime() throws GandivaException, Exception { - ArrowType date64 = new ArrowType.Date(DateUnit.MILLISECOND); - // ArrowType time32 = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ"); - - Field dateField = Field.nullable("date", date64); - // Field timeField = Field.nullable("time", time32); - Field tsField = Field.nullable("timestamp", timeStamp); - - TreeNode dateNode = TreeBuilder.makeField(dateField); - TreeNode tsNode = TreeBuilder.makeField(tsField); - - List dateArgs = Lists.newArrayList(dateNode); - TreeNode dateToYear = TreeBuilder.makeFunction("extractYear", dateArgs, int64); - TreeNode dateToMonth = TreeBuilder.makeFunction("extractMonth", dateArgs, int64); - TreeNode dateToDay = TreeBuilder.makeFunction("extractDay", dateArgs, int64); - TreeNode dateToHour = TreeBuilder.makeFunction("extractHour", dateArgs, int64); - TreeNode dateToMin = TreeBuilder.makeFunction("extractMinute", dateArgs, int64); - - List tsArgs = Lists.newArrayList(tsNode); - TreeNode tsToYear = TreeBuilder.makeFunction("extractYear", tsArgs, int64); - TreeNode tsToMonth = TreeBuilder.makeFunction("extractMonth", tsArgs, int64); - TreeNode tsToDay = TreeBuilder.makeFunction("extractDay", tsArgs, int64); - TreeNode tsToHour = TreeBuilder.makeFunction("extractHour", tsArgs, int64); - TreeNode tsToMin = TreeBuilder.makeFunction("extractMinute", tsArgs, int64); - - Field resultField = Field.nullable("result", int64); - List exprs = - Lists.newArrayList( - TreeBuilder.makeExpression(dateToYear, resultField), - TreeBuilder.makeExpression(dateToMonth, resultField), - TreeBuilder.makeExpression(dateToDay, resultField), - TreeBuilder.makeExpression(dateToHour, resultField), - TreeBuilder.makeExpression(dateToMin, resultField), - TreeBuilder.makeExpression(tsToYear, resultField), - TreeBuilder.makeExpression(tsToMonth, resultField), - TreeBuilder.makeExpression(tsToDay, resultField), - TreeBuilder.makeExpression(tsToHour, resultField), - TreeBuilder.makeExpression(tsToMin, resultField)); - - Schema schema = new Schema(Lists.newArrayList(dateField, tsField)); - Projector eval = Projector.make(schema, exprs); - - int numRows = 8; - byte[] validity = new byte[] {(byte) 255}; - String[] values = - new String[] { - "2007-01-01T01:00:00.00Z", - "2007-03-05T03:40:00.00Z", - "2008-05-31T13:55:00.00Z", - "2000-06-30T23:20:00.00Z", - "2000-07-10T20:30:00.00Z", - "2000-08-20T00:14:00.00Z", - "2000-09-30T02:29:00.00Z", - "2000-10-31T05:33:00.00Z" - }; - long[] expYearFromDate = new long[] {2007, 2007, 2008, 2000, 2000, 2000, 2000, 2000}; - long[] expMonthFromDate = new long[] {1, 3, 5, 6, 7, 8, 9, 10}; - long[] expDayFromDate = new long[] {1, 5, 31, 30, 10, 20, 30, 31}; - long[] expHourFromDate = new long[] {1, 3, 13, 23, 20, 0, 2, 5}; - long[] expMinFromDate = new long[] {0, 40, 55, 20, 30, 14, 29, 33}; - - long[][] expValues = - new long[][] { - expYearFromDate, expMonthFromDate, expDayFromDate, expHourFromDate, expMinFromDate - }; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf millisData = stringToMillis(values); - ArrowBuf buf2Validity = buf(validity); - ArrowBuf millis2Data = stringToMillis(values); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(bufValidity, millisData, buf2Validity, millis2Data)); - - List output = new ArrayList(); - for (int i = 0; i < exprs.size(); i++) { - BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - bigIntVector.allocateNew(numRows); - output.add(bigIntVector); - } - eval.evaluate(batch, output); - eval.close(); - - for (int i = 0; i < output.size(); i++) { - long[] expected = expValues[i % 5]; - BigIntVector bigIntVector = (BigIntVector) output.get(i); - - for (int j = 0; j < numRows; j++) { - assertFalse(bigIntVector.isNull(j)); - assertEquals(expected[j], bigIntVector.get(j)); - } - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testDateTrunc() throws Exception { - ArrowType date64 = new ArrowType.Date(DateUnit.MILLISECOND); - Field dateField = Field.nullable("date", date64); - - TreeNode dateNode = TreeBuilder.makeField(dateField); - - List dateArgs = Lists.newArrayList(dateNode); - TreeNode dateToYear = TreeBuilder.makeFunction("date_trunc_Year", dateArgs, date64); - TreeNode dateToMonth = TreeBuilder.makeFunction("date_trunc_Month", dateArgs, date64); - - Field resultField = Field.nullable("result", date64); - List exprs = - Lists.newArrayList( - TreeBuilder.makeExpression(dateToYear, resultField), - TreeBuilder.makeExpression(dateToMonth, resultField)); - - Schema schema = new Schema(Lists.newArrayList(dateField)); - Projector eval = Projector.make(schema, exprs); - - int numRows = 4; - byte[] validity = new byte[] {(byte) 255}; - String[] values = - new String[] { - "2007-01-01T01:00:00.00Z", - "2007-03-05T03:40:00.00Z", - "2008-05-31T13:55:00.00Z", - "2000-06-30T23:20:00.00Z", - }; - String[] expYearFromDate = - new String[] { - "2007-01-01T00:00:00.00Z", - "2007-01-01T00:00:00.00Z", - "2008-01-01T00:00:00.00Z", - "2000-01-01T00:00:00.00Z", - }; - String[] expMonthFromDate = - new String[] { - "2007-01-01T00:00:00.00Z", - "2007-03-01T00:00:00.00Z", - "2008-05-01T00:00:00.00Z", - "2000-06-01T00:00:00.00Z", - }; - - String[][] expValues = new String[][] {expYearFromDate, expMonthFromDate}; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf millisData = stringToMillis(values); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, millisData)); - - List output = new ArrayList(); - for (int i = 0; i < exprs.size(); i++) { - BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - bigIntVector.allocateNew(numRows); - output.add(bigIntVector); - } - eval.evaluate(batch, output); - eval.close(); - - for (int i = 0; i < output.size(); i++) { - String[] expected = expValues[i]; - BigIntVector bigIntVector = (BigIntVector) output.get(i); - - for (int j = 0; j < numRows; j++) { - assertFalse(bigIntVector.isNull(j)); - assertEquals(Instant.parse(expected[j]).toEpochMilli(), bigIntVector.get(j)); - } - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testUnknownFunction() { - Field c1 = Field.nullable("c1", int8); - Field c2 = Field.nullable("c2", int8); - - TreeNode c1Node = TreeBuilder.makeField(c1); - TreeNode c2Node = TreeBuilder.makeField(c2); - - TreeNode unknown = - TreeBuilder.makeFunction("xxx_yyy", Lists.newArrayList(c1Node, c2Node), int8); - ExpressionTree expr = TreeBuilder.makeExpression(unknown, Field.nullable("result", int8)); - Schema schema = new Schema(Lists.newArrayList(c1, c2)); - boolean caughtException = false; - try { - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - } catch (GandivaException ge) { - caughtException = true; - } - - assertTrue(caughtException); - } - - @Test - public void testCastTimestampToString() throws Exception { - ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ"); - - Field tsField = Field.nullable("timestamp", timeStamp); - Field lenField = Field.nullable("outLength", int64); - - TreeNode tsNode = TreeBuilder.makeField(tsField); - TreeNode lenNode = TreeBuilder.makeField(lenField); - - TreeNode tsToString = - TreeBuilder.makeFunction( - "castVARCHAR", Lists.newArrayList(tsNode, lenNode), new ArrowType.Utf8()); - - Field resultField = Field.nullable("result", new ArrowType.Utf8()); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(tsToString, resultField)); - - Schema schema = new Schema(Lists.newArrayList(tsField, lenField)); - Projector eval = Projector.make(schema, exprs); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - String[] values = - new String[] { - "0007-01-01T01:00:00Z", - "2007-03-05T03:40:00Z", - "2008-05-31T13:55:00Z", - "2000-06-30T23:20:00Z", - "2000-07-10T20:30:00Z", - }; - long[] lenValues = new long[] {23L, 24L, 22L, 0L, 4L}; - - String[] expValues = - new String[] { - "0007-01-01 01:00:00.000", - "2007-03-05 03:40:00.000", - "2008-05-31 13:55:00.00", - "", - "2000", - }; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf millisData = stringToMillis(values); - ArrowBuf lenValidity = buf(validity); - ArrowBuf lenData = longBuf(lenValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(bufValidity, millisData, lenValidity, lenData)); - - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - - charVector.allocateNew(numRows * 23, numRows); - output.add(charVector); - } - eval.evaluate(batch, output); - eval.close(); - - for (ValueVector valueVector : output) { - VarCharVector charVector = (VarCharVector) valueVector; - - for (int j = 0; j < numRows; j++) { - assertFalse(charVector.isNull(j)); - assertEquals(expValues[j], new String(charVector.get(j))); - } - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCastDayIntervalToBigInt() throws Exception { - ArrowType dayIntervalType = new ArrowType.Interval(IntervalUnit.DAY_TIME); - - Field dayIntervalField = Field.nullable("dayInterval", dayIntervalType); - - TreeNode intervalNode = TreeBuilder.makeField(dayIntervalField); - - TreeNode intervalToBigint = - TreeBuilder.makeFunction("castBIGINT", Lists.newArrayList(intervalNode), int64); - - Field resultField = Field.nullable("result", int64); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(intervalToBigint, resultField)); - - Schema schema = new Schema(Lists.newArrayList(dayIntervalField)); - Projector eval = Projector.make(schema, exprs); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - String[] values = - new String[] { - "1 0", // "days millis" - "2 0", - "1 1", - "10 5000", - "11 86400001", - }; - - Long[] expValues = - new Long[] { - 86400000L, - 2 * 86400000L, - 86400000L + 1L, - 10 * 86400000L + 5000L, - 11 * 86400000L + 86400001L - }; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf intervalsData = stringToDayInterval(values); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(bufValidity, intervalsData)); - - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - bigIntVector.allocateNew(numRows); - output.add(bigIntVector); - } - eval.evaluate(batch, output); - eval.close(); - - for (ValueVector valueVector : output) { - BigIntVector bigintVector = (BigIntVector) valueVector; - - for (int j = 0; j < numRows; j++) { - assertFalse(bigintVector.isNull(j)); - assertEquals(expValues[j], Long.valueOf(bigintVector.get(j))); - } - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCaseInsensitiveFunctions() throws Exception { - ArrowType timeStamp = new ArrowType.Timestamp(TimeUnit.MILLISECOND, "TZ"); - - Field tsField = Field.nullable("timestamp", timeStamp); - - TreeNode tsNode = TreeBuilder.makeField(tsField); - - TreeNode extractday = TreeBuilder.makeFunction("extractday", Lists.newArrayList(tsNode), int64); - - ExpressionTree expr = TreeBuilder.makeExpression(extractday, Field.nullable("result", int64)); - Schema schema = new Schema(Lists.newArrayList(tsField)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - String[] values = - new String[] { - "0007-01-01T01:00:00Z", - "2007-03-05T03:40:00Z", - "2008-05-31T13:55:00Z", - "2000-06-30T23:20:00Z", - "2000-07-10T20:30:00Z", - }; - - long[] expValues = new long[] {1, 5, 31, 30, 10}; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf millisData = stringToMillis(values); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, Lists.newArrayList(fieldNode), Lists.newArrayList(bufValidity, millisData)); - - List output = new ArrayList<>(); - BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - bigIntVector.allocateNew(numRows); - output.add(bigIntVector); - - eval.evaluate(batch, output); - eval.close(); - - for (ValueVector valueVector : output) { - BigIntVector vector = (BigIntVector) valueVector; - - for (int j = 0; j < numRows; j++) { - assertFalse(vector.isNull(j)); - assertEquals(expValues[j], vector.get(j)); - } - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCastInt() throws Exception { - Field inField = Field.nullable("input", new ArrowType.Utf8()); - TreeNode inNode = TreeBuilder.makeField(inField); - TreeNode castINTFn = TreeBuilder.makeFunction("castINT", Lists.newArrayList(inNode), int32); - Field resultField = Field.nullable("result", int32); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(castINTFn, resultField)); - Schema schema = new Schema(Lists.newArrayList(inField)); - Projector eval = Projector.make(schema, exprs); - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - String[] values = new String[] {"0", "123", "-123", "-1", "1"}; - int[] expValues = new int[] {0, 123, -123, -1, 1}; - ArrowBuf bufValidity = buf(validity); - List bufData = stringBufs(values); - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode), - Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1))); - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - output.add(intVector); - } - eval.evaluate(batch, output); - eval.close(); - for (ValueVector valueVector : output) { - IntVector intVector = (IntVector) valueVector; - for (int j = 0; j < numRows; j++) { - assertFalse(intVector.isNull(j)); - assertTrue(expValues[j] == intVector.get(j)); - } - } - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCastIntInvalidValue() throws Exception { - Field inField = Field.nullable("input", new ArrowType.Utf8()); - TreeNode inNode = TreeBuilder.makeField(inField); - TreeNode castINTFn = TreeBuilder.makeFunction("castINT", Lists.newArrayList(inNode), int32); - Field resultField = Field.nullable("result", int32); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(castINTFn, resultField)); - Schema schema = new Schema(Lists.newArrayList(inField)); - Projector eval = Projector.make(schema, exprs); - int numRows = 1; - byte[] validity = new byte[] {(byte) 255}; - String[] values = new String[] {"abc"}; - ArrowBuf bufValidity = buf(validity); - List bufData = stringBufs(values); - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode), - Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1))); - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - output.add(intVector); - } - - assertThrows( - GandivaException.class, - () -> { - try { - eval.evaluate(batch, output); - } finally { - eval.close(); - releaseRecordBatch(batch); - releaseValueVectors(output); - } - }); - } - - @Test - public void testCastFloat() throws Exception { - Field inField = Field.nullable("input", new ArrowType.Utf8()); - TreeNode inNode = TreeBuilder.makeField(inField); - TreeNode castFLOAT8Fn = - TreeBuilder.makeFunction("castFLOAT8", Lists.newArrayList(inNode), float64); - Field resultField = Field.nullable("result", float64); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(castFLOAT8Fn, resultField)); - Schema schema = new Schema(Lists.newArrayList(inField)); - Projector eval = Projector.make(schema, exprs); - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - String[] values = new String[] {"2.3", "-11.11", "0", "111", "12345.67"}; - double[] expValues = new double[] {2.3, -11.11, 0, 111, 12345.67}; - ArrowBuf bufValidity = buf(validity); - List bufData = stringBufs(values); - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode), - Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1))); - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator); - float8Vector.allocateNew(numRows); - output.add(float8Vector); - } - eval.evaluate(batch, output); - eval.close(); - for (ValueVector valueVector : output) { - Float8Vector float8Vector = (Float8Vector) valueVector; - for (int j = 0; j < numRows; j++) { - assertFalse(float8Vector.isNull(j)); - assertTrue(expValues[j] == float8Vector.get(j)); - } - } - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCastFloatVarbinary() throws Exception { - Field inField = Field.nullable("input", new ArrowType.Binary()); - TreeNode inNode = TreeBuilder.makeField(inField); - TreeNode castFLOAT8Fn = - TreeBuilder.makeFunction("castFLOAT8", Lists.newArrayList(inNode), float64); - Field resultField = Field.nullable("result", float64); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(castFLOAT8Fn, resultField)); - Schema schema = new Schema(Lists.newArrayList(inField)); - Projector eval = Projector.make(schema, exprs); - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - String[] values = new String[] {"2.3", "-11.11", "0", "111", "12345.67"}; - double[] expValues = new double[] {2.3, -11.11, 0, 111, 12345.67}; - ArrowBuf bufValidity = buf(validity); - List bufData = stringBufs(values); - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode), - Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1))); - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator); - float8Vector.allocateNew(numRows); - output.add(float8Vector); - } - eval.evaluate(batch, output); - eval.close(); - for (ValueVector valueVector : output) { - Float8Vector float8Vector = (Float8Vector) valueVector; - for (int j = 0; j < numRows; j++) { - assertFalse(float8Vector.isNull(j)); - assertTrue(expValues[j] == float8Vector.get(j)); - } - } - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCastFloatInvalidValue() throws Exception { - Field inField = Field.nullable("input", new ArrowType.Utf8()); - TreeNode inNode = TreeBuilder.makeField(inField); - TreeNode castFLOAT8Fn = - TreeBuilder.makeFunction("castFLOAT8", Lists.newArrayList(inNode), float64); - Field resultField = Field.nullable("result", float64); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(castFLOAT8Fn, resultField)); - Schema schema = new Schema(Lists.newArrayList(inField)); - Projector eval = Projector.make(schema, exprs); - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - String[] values = new String[] {"2.3", "-11.11", "abc", "111", "12345.67"}; - ArrowBuf bufValidity = buf(validity); - List bufData = stringBufs(values); - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode), - Lists.newArrayList(bufValidity, bufData.get(0), bufData.get(1))); - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - Float8Vector float8Vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator); - float8Vector.allocateNew(numRows); - output.add(float8Vector); - } - - assertThrows( - GandivaException.class, - () -> { - try { - eval.evaluate(batch, output); - } finally { - eval.close(); - releaseRecordBatch(batch); - releaseValueVectors(output); - } - }); - } - - @Test - public void testEvaluateWithUnsetTargetHostCPU() throws Exception { - Field a = Field.nullable("a", int32); - Field b = Field.nullable("b", int32); - List args = Lists.newArrayList(a, b); - - Field retType = Field.nullable("c", int32); - ExpressionTree root = TreeBuilder.makeExpression("add", args, retType); - - List exprs = Lists.newArrayList(root); - - Schema schema = new Schema(args); - Projector eval = - Projector.make( - schema, exprs, new ConfigurationBuilder.ConfigOptions().withTargetCPU(false)); - - int numRows = 16; - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - int[] aValues = new int[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - int[] bValues = new int[] {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - - ArrowBuf validitya = buf(validity); - ArrowBuf valuesa = intBuf(aValues); - ArrowBuf validityb = buf(validity); - ArrowBuf valuesb = intBuf(bValues); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 8), new ArrowFieldNode(numRows, 8)), - Lists.newArrayList(validitya, valuesa, validityb, valuesb)); - - IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator); - intVector.allocateNew(numRows); - - List output = new ArrayList(); - output.add(intVector); - eval.evaluate(batch, output); - - for (int i = 0; i < 8; i++) { - assertFalse(intVector.isNull(i)); - assertEquals(17, intVector.get(i)); - } - for (int i = 8; i < 16; i++) { - assertTrue(intVector.isNull(i)); - } - - // free buffers - releaseRecordBatch(batch); - releaseValueVectors(output); - eval.close(); - } - - @Test - public void testCastVarcharFromInteger() throws Exception { - Field inField = Field.nullable("input", int32); - Field lenField = Field.nullable("outLength", int64); - - TreeNode inNode = TreeBuilder.makeField(inField); - TreeNode lenNode = TreeBuilder.makeField(lenField); - - TreeNode tsToString = - TreeBuilder.makeFunction( - "castVARCHAR", Lists.newArrayList(inNode, lenNode), new ArrowType.Utf8()); - - Field resultField = Field.nullable("result", new ArrowType.Utf8()); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(tsToString, resultField)); - - Schema schema = new Schema(Lists.newArrayList(inField, lenField)); - Projector eval = Projector.make(schema, exprs); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - int[] values = - new int[] { - 2345, 2345, 2345, 2345, -2345, - }; - long[] lenValues = new long[] {0L, 4L, 2L, 6L, 5L}; - - String[] expValues = - new String[] { - "", - Integer.toString(2345).substring(0, 4), - Integer.toString(2345).substring(0, 2), - Integer.toString(2345), - Integer.toString(-2345) - }; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf bufData = intBuf(values); - ArrowBuf lenValidity = buf(validity); - ArrowBuf lenData = longBuf(lenValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(bufValidity, bufData, lenValidity, lenData)); - - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - - charVector.allocateNew(numRows * 5, numRows); - output.add(charVector); - } - eval.evaluate(batch, output); - eval.close(); - - for (ValueVector valueVector : output) { - VarCharVector charVector = (VarCharVector) valueVector; - - for (int j = 0; j < numRows; j++) { - assertFalse(charVector.isNull(j)); - assertEquals(expValues[j], new String(charVector.get(j))); - } - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testCastVarcharFromFloat() throws Exception { - Field inField = Field.nullable("input", float64); - Field lenField = Field.nullable("outLength", int64); - - TreeNode inNode = TreeBuilder.makeField(inField); - TreeNode lenNode = TreeBuilder.makeField(lenField); - - TreeNode tsToString = - TreeBuilder.makeFunction( - "castVARCHAR", Lists.newArrayList(inNode, lenNode), new ArrowType.Utf8()); - - Field resultField = Field.nullable("result", new ArrowType.Utf8()); - List exprs = - Lists.newArrayList(TreeBuilder.makeExpression(tsToString, resultField)); - - Schema schema = new Schema(Lists.newArrayList(inField, lenField)); - Projector eval = Projector.make(schema, exprs); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 255}; - double[] values = - new double[] { - 0.0, - -0.0, - 1.0, - 0.001, - 0.0009, - 0.00099893, - 999999.9999, - 10000000.0, - 23943410000000.343434, - Double.POSITIVE_INFINITY, - Double.NEGATIVE_INFINITY, - Double.NaN, - 23.45, - 23.45, - -23.45, - }; - long[] lenValues = - new long[] {6L, 6L, 6L, 6L, 10L, 15L, 15L, 15L, 30L, 15L, 15L, 15L, 0L, 6L, 6L}; - - /* The Java real numbers are represented in two ways and Gandiva must - * follow the same rules: - * - If the number is greater or equals than 10^7 and less than 10^(-3) - * it will be represented using scientific notation, e.g: - * - 0.000012 -> 1.2E-5 - * - 10000002.3 -> 1.00000023E7 - * - If the numbers are between that interval above, they are showed as is. - * - * The test checks if the Gandiva function casts the number with the same notation of the - * Java. - * */ - String[] expValues = - new String[] { - Double.toString(0.0), // must be cast to -> "0.0" - Double.toString(-0.0), // must be cast to -> "-0.0" - Double.toString(1.0), // must be cast to -> "1.0" - Double.toString(0.001), // must be cast to -> "0.001" - Double.toString(0.0009), // must be cast to -> "9E-4" - Double.toString(0.00099893), // must be cast to -> "9E-4" - Double.toString(999999.9999), // must be cast to -> "999999.9999" - Double.toString(10000000.0), // must be cast to 1E7 - Double.toString(23943410000000.343434), - Double.toString(Double.POSITIVE_INFINITY), - Double.toString(Double.NEGATIVE_INFINITY), - Double.toString(Double.NaN), - "", - Double.toString(23.45), - Double.toString(-23.45) - }; - - ArrowBuf bufValidity = buf(validity); - ArrowBuf bufData = doubleBuf(values); - ArrowBuf lenValidity = buf(validity); - ArrowBuf lenData = longBuf(lenValues); - - ArrowFieldNode fieldNode = new ArrowFieldNode(numRows, 0); - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(fieldNode, fieldNode), - Lists.newArrayList(bufValidity, bufData, lenValidity, lenData)); - - List output = new ArrayList<>(); - for (int i = 0; i < exprs.size(); i++) { - VarCharVector charVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - - charVector.allocateNew(numRows * 5, numRows); - output.add(charVector); - } - eval.evaluate(batch, output); - eval.close(); - - for (ValueVector valueVector : output) { - VarCharVector charVector = (VarCharVector) valueVector; - - for (int j = 0; j < numRows; j++) { - assertFalse(charVector.isNull(j)); - assertEquals(expValues[j], new String(charVector.get(j))); - } - } - - releaseRecordBatch(batch); - releaseValueVectors(output); - } - - @Test - public void testInitCap() throws Exception { - - Field x = Field.nullable("x", new ArrowType.Utf8()); - - Field retType = Field.nullable("c", new ArrowType.Utf8()); - - TreeNode cond = - TreeBuilder.makeFunction( - "initcap", Lists.newArrayList(TreeBuilder.makeField(x)), new ArrowType.Utf8()); - ExpressionTree expr = TreeBuilder.makeExpression(cond, retType); - Schema schema = new Schema(Lists.newArrayList(x)); - Projector eval = Projector.make(schema, Lists.newArrayList(expr)); - - int numRows = 5; - byte[] validity = new byte[] {(byte) 15, 0}; - String[] valuesX = - new String[] { - " øhpqršvñ \n\n", - "möbelträger1füße \nmöbelträge'rfüße", - "ÂbĆDËFgh\néll", - "citroën CaR", - "kjk" - }; - - String[] expected = - new String[] { - " Øhpqršvñ \n\n", - "Möbelträger1füße \nMöbelträge'Rfüße", - "Âbćdëfgh\nÉll", - "Citroën Car", - null - }; - - ArrowBuf validityX = buf(validity); - List dataBufsX = stringBufs(valuesX); - - ArrowRecordBatch batch = - new ArrowRecordBatch( - numRows, - Lists.newArrayList(new ArrowFieldNode(numRows, 0)), - Lists.newArrayList(validityX, dataBufsX.get(0), dataBufsX.get(1))); - - // allocate data for output vector. - VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - outVector.allocateNew(numRows * 100, numRows); - - // evaluate expression - List output = new ArrayList<>(); - output.add(outVector); - eval.evaluate(batch, output); - eval.close(); - - // match expected output. - for (int i = 0; i < numRows - 1; i++) { - assertFalse(outVector.isNull(i), "Expect none value equals null"); - assertEquals(expected[i], new String(outVector.get(i))); - } - - assertTrue(outVector.isNull(numRows - 1), "Last value must be null"); - - releaseRecordBatch(batch); - releaseValueVectors(output); - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/TestJniLoader.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/TestJniLoader.java deleted file mode 100644 index 4541dba92c928..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/TestJniLoader.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.evaluator; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.Test; - -public class TestJniLoader { - - @Test - public void testDefaultConfiguration() throws Exception { - long configId = JniLoader.getConfiguration(ConfigurationBuilder.ConfigOptions.getDefault()); - assertEquals(configId, JniLoader.getDefaultConfiguration()); - assertEquals( - configId, JniLoader.getConfiguration(ConfigurationBuilder.ConfigOptions.getDefault())); - - long configId2 = - JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions().withOptimize(false)); - long configId3 = - JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions().withTargetCPU(false)); - long configId4 = - JniLoader.getConfiguration( - new ConfigurationBuilder.ConfigOptions().withOptimize(false).withTargetCPU(false)); - - assertTrue(configId != configId2 && configId2 != configId3 && configId3 != configId4); - - assertEquals( - configId2, - JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions().withOptimize(false))); - assertEquals( - configId3, - JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions().withTargetCPU(false))); - assertEquals( - configId4, - JniLoader.getConfiguration( - new ConfigurationBuilder.ConfigOptions().withOptimize(false).withTargetCPU(false))); - - JniLoader.removeConfiguration(new ConfigurationBuilder.ConfigOptions().withOptimize(false)); - // configids are monotonically updated. after a config is removed, new one is assigned with - // higher id - assertNotEquals( - configId2, - JniLoader.getConfiguration(new ConfigurationBuilder.ConfigOptions().withOptimize(false))); - - JniLoader.removeConfiguration(new ConfigurationBuilder.ConfigOptions()); - assertNotEquals( - configId, JniLoader.getConfiguration(ConfigurationBuilder.ConfigOptions.getDefault())); - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/ArrowTypeHelperTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/ArrowTypeHelperTest.java deleted file mode 100644 index 33cc4ec382fb9..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/ArrowTypeHelperTest.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -public class ArrowTypeHelperTest { - - private void testInt(int width, boolean isSigned, int expected) throws GandivaException { - ArrowType arrowType = new ArrowType.Int(width, isSigned); - GandivaTypes.ExtGandivaType gandivaType = ArrowTypeHelper.arrowTypeToProtobuf(arrowType); - assertEquals(expected, gandivaType.getType().getNumber()); - } - - @Test - public void testAllInts() throws GandivaException { - testInt(8, false, GandivaTypes.GandivaType.UINT8_VALUE); - testInt(8, true, GandivaTypes.GandivaType.INT8_VALUE); - testInt(16, false, GandivaTypes.GandivaType.UINT16_VALUE); - testInt(16, true, GandivaTypes.GandivaType.INT16_VALUE); - testInt(32, false, GandivaTypes.GandivaType.UINT32_VALUE); - testInt(32, true, GandivaTypes.GandivaType.INT32_VALUE); - testInt(64, false, GandivaTypes.GandivaType.UINT64_VALUE); - testInt(64, true, GandivaTypes.GandivaType.INT64_VALUE); - } - - private void testFloat(FloatingPointPrecision precision, int expected) throws GandivaException { - ArrowType arrowType = new ArrowType.FloatingPoint(precision); - GandivaTypes.ExtGandivaType gandivaType = ArrowTypeHelper.arrowTypeToProtobuf(arrowType); - assertEquals(expected, gandivaType.getType().getNumber()); - } - - @Test - public void testAllFloats() throws GandivaException { - testFloat(FloatingPointPrecision.HALF, GandivaTypes.GandivaType.HALF_FLOAT_VALUE); - testFloat(FloatingPointPrecision.SINGLE, GandivaTypes.GandivaType.FLOAT_VALUE); - testFloat(FloatingPointPrecision.DOUBLE, GandivaTypes.GandivaType.DOUBLE_VALUE); - } - - private void testBasic(ArrowType arrowType, int expected) throws GandivaException { - GandivaTypes.ExtGandivaType gandivaType = ArrowTypeHelper.arrowTypeToProtobuf(arrowType); - assertEquals(expected, gandivaType.getType().getNumber()); - } - - @Test - public void testSimpleTypes() throws GandivaException { - testBasic(new ArrowType.Bool(), GandivaTypes.GandivaType.BOOL_VALUE); - testBasic(new ArrowType.Binary(), GandivaTypes.GandivaType.BINARY_VALUE); - testBasic(new ArrowType.Utf8(), GandivaTypes.GandivaType.UTF8_VALUE); - } - - @Test - public void testField() throws GandivaException { - Field field = Field.nullable("col1", new ArrowType.Bool()); - GandivaTypes.Field f = ArrowTypeHelper.arrowFieldToProtobuf(field); - assertEquals(field.getName(), f.getName()); - assertEquals(true, f.getNullable()); - assertEquals(GandivaTypes.GandivaType.BOOL_VALUE, f.getType().getType().getNumber()); - } - - @Test - public void testSchema() throws GandivaException { - Field a = Field.nullable("a", new ArrowType.Int(16, false)); - Field b = Field.nullable("b", new ArrowType.Int(32, true)); - Field c = Field.nullable("c", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)); - - List fields = new ArrayList(); - fields.add(a); - fields.add(b); - fields.add(c); - - GandivaTypes.Schema schema = ArrowTypeHelper.arrowSchemaToProtobuf(new Schema(fields)); - int idx = 0; - for (GandivaTypes.Field f : schema.getColumnsList()) { - assertEquals(fields.get(idx).getName(), f.getName()); - idx++; - } - } -} diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/TreeBuilderTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/TreeBuilderTest.java deleted file mode 100644 index 63b31f1fb30d8..0000000000000 --- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/expression/TreeBuilderTest.java +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.gandiva.expression; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.gandiva.exceptions.GandivaException; -import org.apache.arrow.gandiva.ipc.GandivaTypes; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.junit.jupiter.api.Test; - -public class TreeBuilderTest { - - @Test - public void testMakeLiteral() throws GandivaException { - TreeNode n = TreeBuilder.makeLiteral(Boolean.TRUE); - GandivaTypes.TreeNode node = n.toProtobuf(); - - assertEquals(true, node.getBooleanNode().getValue()); - - n = TreeBuilder.makeLiteral(new Integer(10)); - node = n.toProtobuf(); - assertEquals(10, node.getIntNode().getValue()); - - n = TreeBuilder.makeLiteral(new Long(50)); - node = n.toProtobuf(); - assertEquals(50, node.getLongNode().getValue()); - - Float f = new Float(2.5); - n = TreeBuilder.makeLiteral(f); - node = n.toProtobuf(); - assertEquals(f.floatValue(), node.getFloatNode().getValue(), 0.1); - - Double d = new Double(3.3); - n = TreeBuilder.makeLiteral(d); - node = n.toProtobuf(); - assertEquals(d.doubleValue(), node.getDoubleNode().getValue(), 0.1); - - String s = new String("hello"); - n = TreeBuilder.makeStringLiteral(s); - node = n.toProtobuf(); - assertArrayEquals(s.getBytes(), node.getStringNode().getValue().toByteArray()); - - byte[] b = new String("hello").getBytes(); - n = TreeBuilder.makeBinaryLiteral(b); - node = n.toProtobuf(); - assertArrayEquals(b, node.getBinaryNode().getValue().toByteArray()); - } - - @Test - public void testMakeNull() throws GandivaException { - TreeNode n = TreeBuilder.makeNull(new ArrowType.Bool()); - GandivaTypes.TreeNode node = n.toProtobuf(); - assertEquals( - GandivaTypes.GandivaType.BOOL_VALUE, node.getNullNode().getType().getType().getNumber()); - - n = TreeBuilder.makeNull(new ArrowType.Int(32, true)); - node = n.toProtobuf(); - assertEquals( - GandivaTypes.GandivaType.INT32_VALUE, node.getNullNode().getType().getType().getNumber()); - - n = TreeBuilder.makeNull(new ArrowType.Int(64, false)); - node = n.toProtobuf(); - assertEquals( - GandivaTypes.GandivaType.UINT64_VALUE, node.getNullNode().getType().getType().getNumber()); - - n = TreeBuilder.makeNull(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)); - node = n.toProtobuf(); - assertEquals( - GandivaTypes.GandivaType.FLOAT_VALUE, node.getNullNode().getType().getType().getNumber()); - } - - @Test - public void testMakeField() throws GandivaException { - TreeNode n = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Int(32, true))); - GandivaTypes.TreeNode node = n.toProtobuf(); - - assertEquals("a", node.getFieldNode().getField().getName()); - assertEquals( - GandivaTypes.GandivaType.INT32_VALUE, - node.getFieldNode().getField().getType().getType().getNumber()); - } - - @Test - public void testMakeFunction() throws GandivaException { - TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Int(64, false))); - TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Int(64, false))); - List args = new ArrayList(2); - args.add(a); - args.add(b); - - TreeNode addNode = TreeBuilder.makeFunction("add", args, new ArrowType.Int(64, false)); - GandivaTypes.TreeNode node = addNode.toProtobuf(); - - assertTrue(node.hasFnNode()); - assertEquals("add", node.getFnNode().getFunctionName()); - assertEquals("a", node.getFnNode().getInArgsList().get(0).getFieldNode().getField().getName()); - assertEquals("b", node.getFnNode().getInArgsList().get(1).getFieldNode().getField().getName()); - assertEquals( - GandivaTypes.GandivaType.UINT64_VALUE, - node.getFnNode().getReturnType().getType().getNumber()); - } - - @Test - public void testMakeIf() throws GandivaException { - Field a = Field.nullable("a", new ArrowType.Int(64, false)); - Field b = Field.nullable("b", new ArrowType.Int(64, false)); - TreeNode aNode = TreeBuilder.makeField(a); - TreeNode bNode = TreeBuilder.makeField(b); - List args = new ArrayList(2); - args.add(aNode); - args.add(bNode); - - ArrowType retType = new ArrowType.Bool(); - TreeNode cond = TreeBuilder.makeFunction("greater_than", args, retType); - TreeNode ifNode = TreeBuilder.makeIf(cond, aNode, bNode, retType); - - GandivaTypes.TreeNode node = ifNode.toProtobuf(); - - assertTrue(node.hasIfNode()); - assertEquals("greater_than", node.getIfNode().getCond().getFnNode().getFunctionName()); - assertEquals(a.getName(), node.getIfNode().getThenNode().getFieldNode().getField().getName()); - assertEquals(b.getName(), node.getIfNode().getElseNode().getFieldNode().getField().getName()); - assertEquals( - GandivaTypes.GandivaType.BOOL_VALUE, - node.getIfNode().getReturnType().getType().getNumber()); - } - - @Test - public void testMakeAnd() throws GandivaException { - TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Bool())); - TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Bool())); - List args = new ArrayList(2); - args.add(a); - args.add(b); - - TreeNode andNode = TreeBuilder.makeAnd(args); - GandivaTypes.TreeNode node = andNode.toProtobuf(); - - assertTrue(node.hasAndNode()); - assertEquals(2, node.getAndNode().getArgsList().size()); - assertEquals("a", node.getAndNode().getArgsList().get(0).getFieldNode().getField().getName()); - assertEquals("b", node.getAndNode().getArgsList().get(1).getFieldNode().getField().getName()); - } - - @Test - public void testMakeOr() throws GandivaException { - TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Bool())); - TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Bool())); - List args = new ArrayList(2); - args.add(a); - args.add(b); - - TreeNode orNode = TreeBuilder.makeOr(args); - GandivaTypes.TreeNode node = orNode.toProtobuf(); - - assertTrue(node.hasOrNode()); - assertEquals(2, node.getOrNode().getArgsList().size()); - assertEquals("a", node.getOrNode().getArgsList().get(0).getFieldNode().getField().getName()); - assertEquals("b", node.getOrNode().getArgsList().get(1).getFieldNode().getField().getName()); - } - - @Test - public void testExpression() throws GandivaException { - Field a = Field.nullable("a", new ArrowType.Int(64, false)); - Field b = Field.nullable("b", new ArrowType.Int(64, false)); - TreeNode aNode = TreeBuilder.makeField(a); - TreeNode bNode = TreeBuilder.makeField(b); - List args = new ArrayList(2); - args.add(aNode); - args.add(bNode); - - ArrowType retType = new ArrowType.Bool(); - TreeNode cond = TreeBuilder.makeFunction("greater_than", args, retType); - TreeNode ifNode = TreeBuilder.makeIf(cond, aNode, bNode, retType); - - ExpressionTree expr = TreeBuilder.makeExpression(ifNode, Field.nullable("c", retType)); - - GandivaTypes.ExpressionRoot root = expr.toProtobuf(); - - assertTrue(root.getRoot().hasIfNode()); - assertEquals( - "greater_than", root.getRoot().getIfNode().getCond().getFnNode().getFunctionName()); - assertEquals("c", root.getResultType().getName()); - assertEquals( - GandivaTypes.GandivaType.BOOL_VALUE, root.getResultType().getType().getType().getNumber()); - } - - @Test - public void testExpression2() throws GandivaException { - Field a = Field.nullable("a", new ArrowType.Int(64, false)); - Field b = Field.nullable("b", new ArrowType.Int(64, false)); - List args = new ArrayList(2); - args.add(a); - args.add(b); - - Field c = Field.nullable("c", new ArrowType.Int(64, false)); - ExpressionTree expr = TreeBuilder.makeExpression("add", args, c); - GandivaTypes.ExpressionRoot root = expr.toProtobuf(); - - GandivaTypes.TreeNode node = root.getRoot(); - - assertEquals("c", root.getResultType().getName()); - assertTrue(node.hasFnNode()); - assertEquals("add", node.getFnNode().getFunctionName()); - assertEquals("a", node.getFnNode().getInArgsList().get(0).getFieldNode().getField().getName()); - assertEquals("b", node.getFnNode().getInArgsList().get(1).getFieldNode().getField().getName()); - assertEquals( - GandivaTypes.GandivaType.UINT64_VALUE, - node.getFnNode().getReturnType().getType().getNumber()); - } - - @Test - public void testExpressionWithAnd() throws GandivaException { - TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Bool())); - TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Bool())); - List args = new ArrayList(2); - args.add(a); - args.add(b); - - TreeNode andNode = TreeBuilder.makeAnd(args); - ExpressionTree expr = - TreeBuilder.makeExpression(andNode, Field.nullable("c", new ArrowType.Bool())); - GandivaTypes.ExpressionRoot root = expr.toProtobuf(); - - assertTrue(root.getRoot().hasAndNode()); - assertEquals( - "a", root.getRoot().getAndNode().getArgsList().get(0).getFieldNode().getField().getName()); - assertEquals( - "b", root.getRoot().getAndNode().getArgsList().get(1).getFieldNode().getField().getName()); - assertEquals("c", root.getResultType().getName()); - assertEquals( - GandivaTypes.GandivaType.BOOL_VALUE, root.getResultType().getType().getType().getNumber()); - } - - @Test - public void testExpressionWithOr() throws GandivaException { - TreeNode a = TreeBuilder.makeField(Field.nullable("a", new ArrowType.Bool())); - TreeNode b = TreeBuilder.makeField(Field.nullable("b", new ArrowType.Bool())); - List args = new ArrayList(2); - args.add(a); - args.add(b); - - TreeNode orNode = TreeBuilder.makeOr(args); - ExpressionTree expr = - TreeBuilder.makeExpression(orNode, Field.nullable("c", new ArrowType.Bool())); - GandivaTypes.ExpressionRoot root = expr.toProtobuf(); - - assertTrue(root.getRoot().hasOrNode()); - assertEquals( - "a", root.getRoot().getOrNode().getArgsList().get(0).getFieldNode().getField().getName()); - assertEquals( - "b", root.getRoot().getOrNode().getArgsList().get(1).getFieldNode().getField().getName()); - assertEquals("c", root.getResultType().getName()); - assertEquals( - GandivaTypes.GandivaType.BOOL_VALUE, root.getResultType().getType().getType().getNumber()); - } - - @Test - public void testCondition() throws GandivaException { - Field a = Field.nullable("a", new ArrowType.Int(64, false)); - Field b = Field.nullable("b", new ArrowType.Int(64, false)); - - TreeNode aNode = TreeBuilder.makeField(a); - TreeNode bNode = TreeBuilder.makeField(b); - List args = new ArrayList(2); - args.add(aNode); - args.add(bNode); - - TreeNode root = TreeBuilder.makeFunction("greater_than", args, new ArrowType.Bool()); - Condition condition = TreeBuilder.makeCondition(root); - - GandivaTypes.Condition conditionProto = condition.toProtobuf(); - assertTrue(conditionProto.getRoot().hasFnNode()); - assertEquals("greater_than", conditionProto.getRoot().getFnNode().getFunctionName()); - assertEquals( - "a", - conditionProto - .getRoot() - .getFnNode() - .getInArgsList() - .get(0) - .getFieldNode() - .getField() - .getName()); - assertEquals( - "b", - conditionProto - .getRoot() - .getFnNode() - .getInArgsList() - .get(1) - .getFieldNode() - .getField() - .getName()); - } - - @Test - public void testCondition2() throws GandivaException { - Field a = Field.nullable("a", new ArrowType.Int(64, false)); - Field b = Field.nullable("b", new ArrowType.Int(64, false)); - - Condition condition = TreeBuilder.makeCondition("greater_than", Arrays.asList(a, b)); - - GandivaTypes.Condition conditionProto = condition.toProtobuf(); - assertTrue(conditionProto.getRoot().hasFnNode()); - assertEquals("greater_than", conditionProto.getRoot().getFnNode().getFunctionName()); - assertEquals( - "a", - conditionProto - .getRoot() - .getFnNode() - .getInArgsList() - .get(0) - .getFieldNode() - .getField() - .getName()); - assertEquals( - "b", - conditionProto - .getRoot() - .getFnNode() - .getInArgsList() - .get(1) - .getFieldNode() - .getField() - .getName()); - } -} diff --git a/java/gandiva/src/test/resources/logback.xml b/java/gandiva/src/test/resources/logback.xml deleted file mode 100644 index f9e449fa67b2e..0000000000000 --- a/java/gandiva/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - - diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml deleted file mode 100644 index 72ee69d60a998..0000000000000 --- a/java/memory/memory-core/pom.xml +++ /dev/null @@ -1,114 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-memory - 19.0.0-SNAPSHOT - - - arrow-memory-core - - Arrow Memory - Core - Core off-heap memory management libraries for Arrow ValueVectors. - - - - --add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - - - org.slf4j - slf4j-api - - - org.immutables - value-annotations - - - org.checkerframework - checker-qual - - - com.google.errorprone - error_prone_annotations - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Xmaxerrs - - 10000 - -Xmaxwarns - 10000 - -AskipDefs=.*Test - - -AatfDoNotCache - - - - - org.checkerframework - checker - ${checker.framework.version} - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - --add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - - **/TestOpens.java - - - - - - opens-tests - - test - - test - - - - - - **/TestOpens.java - - - - - - - - diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java deleted file mode 100644 index 0a607bdf2f43a..0000000000000 --- a/java/memory/memory-core/src/main/java/module-info.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.memory.core { - exports org.apache.arrow.memory; - exports org.apache.arrow.memory.rounding; - exports org.apache.arrow.memory.util; - exports org.apache.arrow.memory.util.hash; - exports org.apache.arrow.util; - - requires java.compiler; - requires transitive jdk.unsupported; - requires static org.checkerframework.checker.qual; - requires static org.immutables.value.annotations; - requires static com.google.errorprone.annotations; - requires org.slf4j; -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java deleted file mode 100644 index 5d052c2cdeeec..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.util.concurrent.atomic.AtomicLong; -import org.apache.arrow.util.Preconditions; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * Provides a concurrent way to manage account for memory usage without locking. Used as basis for - * Allocators. All operations are threadsafe (except for close). - */ -class Accountant implements AutoCloseable { - - /** The parent allocator. */ - protected final @Nullable Accountant parent; - - private final String name; - - /** - * The amount of memory reserved for this allocator. Releases below this amount of memory will not - * be returned to the parent Accountant until this Accountant is closed. - */ - protected final long reservation; - - private final AtomicLong peakAllocation = new AtomicLong(); - - /** - * Maximum local memory that can be held. This can be externally updated. Changing it won't cause - * past memory to change but will change responses to future allocation efforts - */ - private final AtomicLong allocationLimit = new AtomicLong(); - - /** Currently allocated amount of memory. */ - private final AtomicLong locallyHeldMemory = new AtomicLong(); - - public Accountant( - @Nullable Accountant parent, String name, long reservation, long maxAllocation) { - Preconditions.checkNotNull(name, "name must not be null"); - Preconditions.checkArgument( - reservation >= 0, "The initial reservation size must be non-negative."); - Preconditions.checkArgument( - maxAllocation >= 0, "The maximum allocation limit must be non-negative."); - Preconditions.checkArgument( - reservation <= maxAllocation, - "The initial reservation size must be <= the maximum allocation."); - Preconditions.checkArgument( - reservation == 0 || parent != null, "The root accountant can't reserve memory."); - - this.parent = parent; - this.name = name; - this.reservation = reservation; - this.allocationLimit.set(maxAllocation); - - if (reservation != 0) { - Preconditions.checkArgument(parent != null, "parent must not be null"); - // we will allocate a reservation from our parent. - final AllocationOutcome outcome = parent.allocateBytes(reservation); - if (!outcome.isOk()) { - throw new OutOfMemoryException( - String.format( - "Failure trying to allocate initial reservation for Allocator. " - + "Attempted to allocate %d bytes.", - reservation), - outcome.getDetails()); - } - } - } - - /** - * Attempt to allocate the requested amount of memory. Either completely succeeds or completely - * fails. If it fails, no changes are made to accounting. - * - * @param size The amount of memory to reserve in bytes. - * @return the status and details of allocation at each allocator in the chain. - */ - AllocationOutcome allocateBytes(long size) { - AllocationOutcome.Status status = allocateBytesInternal(size); - if (status.isOk()) { - return AllocationOutcome.SUCCESS_INSTANCE; - } else { - // Try again, but with details this time. - // Populating details only on failures avoids performance overhead in the common case (success - // case). - AllocationOutcomeDetails details = new AllocationOutcomeDetails(); - status = allocateBytesInternal(size, details); - return new AllocationOutcome(status, details); - } - } - - private AllocationOutcome.Status allocateBytesInternal( - long size, @Nullable AllocationOutcomeDetails details) { - final AllocationOutcome.Status status = - allocate(size, true /*incomingUpdatePeek*/, false /*forceAllocation*/, details); - if (!status.isOk()) { - releaseBytes(size); - } - return status; - } - - private AllocationOutcome.Status allocateBytesInternal(long size) { - return allocateBytesInternal(size, null /*details*/); - } - - private void updatePeak() { - final long currentMemory = locallyHeldMemory.get(); - while (true) { - - final long previousPeak = peakAllocation.get(); - if (currentMemory > previousPeak) { - if (!peakAllocation.compareAndSet(previousPeak, currentMemory)) { - // peak allocation changed underneath us. try again. - continue; - } - } - - // we either succeeded to set peak allocation or we weren't above the previous peak, exit. - return; - } - } - - /** - * Increase the accounting. Returns whether the allocation fit within limits. - * - * @param size to increase - * @return Whether the allocation fit within limits. - */ - public boolean forceAllocate(long size) { - final AllocationOutcome.Status outcome = allocate(size, true, true, null); - return outcome.isOk(); - } - - /** - * Internal method for allocation. This takes a forced approach to allocation to ensure that we - * manage reservation boundary issues consistently. Allocation is always done through the entire - * tree. The two options that we influence are whether the allocation should be forced and whether - * or not the peak memory allocation should be updated. If at some point during allocation - * escalation we determine that the allocation is no longer possible, we will continue to do a - * complete and consistent allocation but we will stop updating the peak allocation. We do this - * because we know that we will be directly unwinding this allocation (and thus never actually - * making the allocation). If force allocation is passed, then we continue to update the peak - * limits since we now know that this allocation will occur despite our moving past one or more - * limits. - * - * @param size The size of the allocation. - * @param incomingUpdatePeak Whether we should update the local peak for this allocation. - * @param forceAllocation Whether we should force the allocation. - * @return The outcome of the allocation. - */ - private AllocationOutcome.Status allocate( - final long size, - final boolean incomingUpdatePeak, - final boolean forceAllocation, - @Nullable AllocationOutcomeDetails details) { - final long oldLocal = locallyHeldMemory.getAndAdd(size); - final long newLocal = oldLocal + size; - // Borrowed from Math.addExact (but avoid exception here) - // Overflow if result has opposite sign of both arguments - // No need to reset locallyHeldMemory on overflow; allocateBytesInternal will releaseBytes on - // failure - final boolean overflow = ((oldLocal ^ newLocal) & (size ^ newLocal)) < 0; - final long beyondReservation = newLocal - reservation; - final boolean beyondLimit = overflow || newLocal > allocationLimit.get(); - final boolean updatePeak = forceAllocation || (incomingUpdatePeak && !beyondLimit); - - if (details != null) { - // Add details if required (used in exceptions and debugging). - boolean allocationFailed = true; - long allocatedLocal = 0; - if (!beyondLimit) { - allocatedLocal = size - Math.min(beyondReservation, size); - allocationFailed = false; - } - details.pushEntry(this, newLocal - size, size, allocatedLocal, allocationFailed); - } - - AllocationOutcome.Status parentOutcome = AllocationOutcome.Status.SUCCESS; - if (beyondReservation > 0 && parent != null) { - // we need to get memory from our parent. - final long parentRequest = Math.min(beyondReservation, size); - parentOutcome = parent.allocate(parentRequest, updatePeak, forceAllocation, details); - } - - final AllocationOutcome.Status finalOutcome; - if (beyondLimit) { - finalOutcome = AllocationOutcome.Status.FAILED_LOCAL; - } else { - finalOutcome = - parentOutcome.isOk() - ? AllocationOutcome.Status.SUCCESS - : AllocationOutcome.Status.FAILED_PARENT; - } - - if (updatePeak) { - updatePeak(); - } - - return finalOutcome; - } - - public void releaseBytes(long size) { - // reduce local memory. all memory released above reservation should be released up the tree. - final long newSize = locallyHeldMemory.addAndGet(-size); - - Preconditions.checkArgument(newSize >= 0, "Accounted size went negative."); - - final long originalSize = newSize + size; - if (originalSize > reservation && parent != null) { - // we deallocated memory that we should release to our parent. - final long possibleAmountToReleaseToParent = originalSize - reservation; - final long actualToReleaseToParent = Math.min(size, possibleAmountToReleaseToParent); - parent.releaseBytes(actualToReleaseToParent); - } - } - - public boolean isOverLimit() { - return getAllocatedMemory() > getLimit() || (parent != null && parent.isOverLimit()); - } - - /** Close this Accountant. This will release any reservation bytes back to a parent Accountant. */ - @Override - public void close() { - // return memory reservation to parent allocator. - if (parent != null) { - parent.releaseBytes(reservation); - } - } - - /** - * Return the name of the accountant. - * - * @return name of accountant - */ - public String getName() { - return name; - } - - /** - * Return the current limit of this Accountant. - * - * @return Limit in bytes. - */ - public long getLimit() { - return allocationLimit.get(); - } - - /** - * Return the initial reservation. - * - * @return reservation in bytes. - */ - public long getInitReservation() { - return reservation; - } - - /** - * Set the maximum amount of memory that can be allocated in the this Accountant before failing an - * allocation. - * - * @param newLimit The limit in bytes. - */ - public void setLimit(long newLimit) { - allocationLimit.set(newLimit); - } - - /** - * Return the current amount of allocated memory that this Accountant is managing accounting for. - * Note this does not include reservation memory that hasn't been allocated. - * - * @return Currently allocate memory in bytes. - */ - public long getAllocatedMemory() { - return locallyHeldMemory.get(); - } - - /** - * The peak memory allocated by this Accountant. - * - * @return The peak allocated memory in bytes. - */ - public long getPeakMemoryAllocation() { - return peakAllocation.get(); - } - - public long getHeadroom() { - long localHeadroom = allocationLimit.get() - locallyHeldMemory.get(); - if (parent == null) { - return localHeadroom; - } - - // Amount of reserved memory left on top of what parent has - long reservedHeadroom = Math.max(0, reservation - locallyHeldMemory.get()); - return Math.min(localHeadroom, parent.getHeadroom() + reservedHeadroom); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java deleted file mode 100644 index b82099ee3a428..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * An allocation listener being notified for allocation/deallocation - * - *

    It might be called from multiple threads if the allocator hierarchy shares a listener, in - * which case, the provider should take care of making the implementation thread-safe. - */ -public interface AllocationListener { - - AllocationListener NOOP = new AllocationListener() {}; - - /** - * Called each time a new buffer has been requested. - * - *

    An exception can be safely thrown by this method to terminate the allocation. - * - * @param size the buffer size being allocated - */ - default void onPreAllocation(long size) {} - - /** - * Called each time a new buffer has been allocated. - * - *

    An exception cannot be thrown by this method. - * - * @param size the buffer size being allocated - */ - default void onAllocation(long size) {} - - /** - * Informed each time a buffer is released from allocation. - * - *

    An exception cannot be thrown by this method. - * - * @param size The size of the buffer being released. - */ - default void onRelease(long size) {} - - /** - * Called whenever an allocation failed, giving the caller a chance to create some space in the - * allocator (either by freeing some resource, or by changing the limit), and, if successful, - * allowing the allocator to retry the allocation. - * - * @param size the buffer size that was being allocated - * @param outcome the outcome of the failed allocation. Carries information of what failed - * @return true, if the allocation can be retried; false if the allocation should fail - */ - default boolean onFailedAllocation(long size, AllocationOutcome outcome) { - return false; - } - - /** - * Called immediately after a child allocator was added to the parent allocator. - * - * @param parentAllocator The parent allocator to which a child was added - * @param childAllocator The child allocator that was just added - */ - default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {} - - /** - * Called immediately after a child allocator was removed from the parent allocator. - * - * @param parentAllocator The parent allocator from which a child was removed - * @param childAllocator The child allocator that was just removed - */ - default void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {} -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java deleted file mode 100644 index e9dd8cb9d28f4..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import org.apache.arrow.util.Preconditions; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * An AllocationManager is the implementation of a physical memory allocation. - * - *

    Manages the relationship between the allocators and a particular memory allocation. Ensures - * that one allocator owns the memory that multiple allocators may be referencing. Manages a - * BufferLedger between each of its associated allocators. It does not track the reference count; - * that is the role of {@link BufferLedger} (aka {@link ReferenceManager}). - * - *

    This is a public interface implemented by concrete allocator implementations (e.g. Netty or - * Unsafe). - * - *

    Threading: AllocationManager manages thread-safety internally. Operations within the context - * of a single BufferLedger are lockless in nature and can be leveraged by multiple threads. - * Operations that cross the context of two ledgers will acquire a lock on the AllocationManager - * instance. Important note, there is one AllocationManager per physical buffer allocation. As such, - * there will be thousands of these in a typical query. The contention of acquiring a lock on - * AllocationManager should be very low. - */ -public abstract class AllocationManager { - // The RootAllocator we are associated with. An allocation can only ever be associated with a - // single RootAllocator. - private final BufferAllocator root; - // An allocation can be tracked by multiple allocators. (This is because an allocator is more like - // a ledger.) - // All such allocators track reference counts individually, via BufferLedger instances. When an - // individual - // reference count reaches zero, the allocator will be dissociated from this allocation. If that - // was via the - // owningLedger, then no more allocators should be tracking this allocation, and the allocation - // will be freed. - // ARROW-1627: Trying to minimize memory overhead caused by previously used IdentityHashMap - private final LowCostIdentityHashMap map = - new LowCostIdentityHashMap<>(); - // The primary BufferLedger (i.e. reference count) tracking this allocation. - // This is mostly a semantic constraint on the API user: if the reference count reaches 0 in the - // owningLedger, then - // there are not supposed to be any references through other allocators. In practice, this doesn't - // do anything - // as the implementation just forces ownership to be transferred to one of the other extant - // references. - private volatile @Nullable BufferLedger owningLedger; - - @SuppressWarnings( - "nullness:method.invocation") // call to associate(a, b) not allowed on the given receiver - protected AllocationManager(BufferAllocator accountingAllocator) { - Preconditions.checkNotNull(accountingAllocator); - accountingAllocator.assertOpen(); - - this.root = accountingAllocator.getRoot(); - - // we do a no retain association since our creator will want to retrieve the newly created - // ledger and will create a reference count at that point - this.owningLedger = associate(accountingAllocator, false); - } - - @Nullable - BufferLedger getOwningLedger() { - return owningLedger; - } - - void setOwningLedger(final BufferLedger ledger) { - this.owningLedger = ledger; - } - - /** - * Associate the existing underlying buffer with a new allocator. This will increase the reference - * count on the corresponding buffer ledger by 1. - * - * @param allocator The target allocator to associate this buffer with. - * @return The reference manager (new or existing) that associates the underlying buffer to this - * new ledger. - */ - BufferLedger associate(final BufferAllocator allocator) { - return associate(allocator, true); - } - - private BufferLedger associate(final BufferAllocator allocator, final boolean retain) { - allocator.assertOpen(); - Preconditions.checkState( - root == allocator.getRoot(), - "A buffer can only be associated between two allocators that share the same root"); - - synchronized (this) { - BufferLedger ledger = map.get(allocator); - if (ledger != null) { - // We were already being tracked by the given allocator, just return it - if (retain) { - // bump the ref count for the ledger - ledger.increment(); - } - return ledger; - } - - // We weren't previously being tracked by the given allocator; create a new ledger - ledger = new BufferLedger(allocator, this); - - if (retain) { - // the new reference manager will have a ref count of 1 - ledger.increment(); - } - - // store the mapping for - BufferLedger oldLedger = map.put(ledger); - Preconditions.checkState( - oldLedger == null, - "Detected inconsistent state: A reference manager already exists for this allocator"); - - if (allocator instanceof BaseAllocator) { - // needed for debugging only: keep a pointer to reference manager inside allocator - // to dump state, verify allocator state etc - ((BaseAllocator) allocator).associateLedger(ledger); - } - return ledger; - } - } - - /** - * The way that a particular ReferenceManager (BufferLedger) communicates back to the - * AllocationManager that it no longer needs to hold a reference to a particular piece of memory. - * Reference manager needs to hold a lock to invoke this method It is called when the shared - * refcount of all the ArrowBufs managed by the calling ReferenceManager drops to 0. - */ - void release(final BufferLedger ledger) { - final BufferAllocator allocator = ledger.getAllocator(); - allocator.assertOpen(); - - // remove the mapping for the allocator - // of calling BufferLedger - Preconditions.checkState( - map.containsKey(allocator), "Expecting a mapping for allocator and reference manager"); - final BufferLedger oldLedger = map.remove(allocator); - Preconditions.checkState( - oldLedger != null, "Expecting a mapping for allocator and reference manager"); - BufferAllocator oldAllocator = oldLedger.getAllocator(); - if (oldAllocator instanceof BaseAllocator) { - // needed for debug only: tell the allocator that AllocationManager is removing a - // reference manager associated with this particular allocator - ((BaseAllocator) oldAllocator).dissociateLedger(oldLedger); - } - - if (oldLedger == owningLedger) { - // the release call was made by the owning reference manager - if (map.isEmpty()) { - // the only mapping was for the owner - // which now has been removed, it implies we can safely destroy the - // underlying memory chunk as it is no longer being referenced - oldAllocator.releaseBytes(getSize()); - // free the memory chunk associated with the allocation manager - release0(); - oldAllocator.getListener().onRelease(getSize()); - owningLedger = null; - } else { - // since the refcount dropped to 0 for the owning reference manager and allocation - // manager will no longer keep a mapping for it, we need to change the owning - // reference manager to whatever the next available - // mapping exists. - BufferLedger newOwningLedger = map.getNextValue(); - // we'll forcefully transfer the ownership and not worry about whether we - // exceeded the limit since this consumer can't do anything with this. - oldLedger.transferBalance(newOwningLedger); - } - } else { - // the release call was made by a non-owning reference manager, so after remove there have - // to be 1 or more mappings - Preconditions.checkState( - map.size() > 0, - "The final removal of reference manager should be connected to owning reference manager"); - } - } - - /** - * Return the size of underlying chunk of memory managed by this Allocation Manager. - * - *

    The underlying memory chunk managed can be different from the original requested size. - * - * @return size of underlying memory chunk - */ - public abstract long getSize(); - - /** Return the absolute memory address pointing to the fist byte of underlying memory chunk. */ - protected abstract long memoryAddress(); - - /** Release the underlying memory chunk. */ - protected abstract void release0(); - - /** - * A factory interface for creating {@link AllocationManager}. One may extend this interface to - * use a user-defined AllocationManager implementation. - */ - public interface Factory { - - /** - * Create an {@link AllocationManager}. - * - * @param accountingAllocator The allocator that are expected to be associated with newly - * created AllocationManager. Currently it is always equivalent to "this" - * @param size Size (in bytes) of memory managed by the AllocationManager - * @return The created AllocationManager used by this allocator - */ - AllocationManager create(BufferAllocator accountingAllocator, long size); - - ArrowBuf empty(); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java deleted file mode 100644 index d6361f0461bc1..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.util.Optional; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Describes the type of outcome that occurred when trying to account for allocation of memory. */ -public class AllocationOutcome { - private final Status status; - private final @Nullable AllocationOutcomeDetails details; - static final AllocationOutcome SUCCESS_INSTANCE = new AllocationOutcome(Status.SUCCESS); - - AllocationOutcome(Status status, @Nullable AllocationOutcomeDetails details) { - this.status = status; - this.details = details; - } - - AllocationOutcome(Status status) { - this(status, null); - } - - /** - * Get the status of the allocation. - * - * @return status code. - */ - public Status getStatus() { - return status; - } - - /** - * Get additional details of the allocation (like the status at each allocator in the hierarchy). - * - * @return details of allocation - */ - public Optional getDetails() { - return Optional.ofNullable(details); - } - - /** - * Returns true if the allocation was a success. - * - * @return true if allocation was successful, false otherwise. - */ - public boolean isOk() { - return status.isOk(); - } - - /** Allocation status code. */ - public enum Status { - /** Allocation succeeded. */ - SUCCESS(true), - - /** Allocation succeeded but only because the allocator was forced to move beyond a limit. */ - FORCED_SUCCESS(true), - - /** Allocation failed because the local allocator's limits were exceeded. */ - FAILED_LOCAL(false), - - /** Allocation failed because a parent allocator's limits were exceeded. */ - FAILED_PARENT(false); - - private final boolean ok; - - Status(boolean ok) { - this.ok = ok; - } - - public boolean isOk() { - return ok; - } - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java deleted file mode 100644 index 61db6898127c4..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.util.ArrayDeque; -import java.util.Deque; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Captures details of allocation for each accountant in the hierarchical chain. */ -public class AllocationOutcomeDetails { - Deque allocEntries; - - AllocationOutcomeDetails() { - allocEntries = new ArrayDeque<>(); - } - - void pushEntry( - Accountant accountant, - long totalUsedBeforeAllocation, - long requestedSize, - long allocatedSize, - boolean allocationFailed) { - - Entry top = allocEntries.peekLast(); - if (top != null && top.allocationFailed) { - // if the allocation has already failed, stop saving the entries. - return; - } - - allocEntries.addLast( - new Entry( - accountant, totalUsedBeforeAllocation, requestedSize, allocatedSize, allocationFailed)); - } - - /** - * Get the allocator that caused the failure. - * - * @return the allocator that caused failure, null if there was no failure. - */ - public @Nullable BufferAllocator getFailedAllocator() { - Entry top = allocEntries.peekLast(); - if (top != null && top.allocationFailed && (top.accountant instanceof BufferAllocator)) { - return (BufferAllocator) top.accountant; - } else { - return null; - } - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Allocation outcome details:\n"); - allocEntries.forEach(sb::append); - return sb.toString(); - } - - /** Outcome of the allocation request at one accountant in the hierarchy. */ - public static class Entry { - private final Accountant accountant; - - // Remember allocator attributes at the time of the request. - private final long limit; - private final long used; - - // allocation outcome - private final long requestedSize; - private final long allocatedSize; - private final boolean allocationFailed; - - Entry( - Accountant accountant, - long totalUsedBeforeAllocation, - long requestedSize, - long allocatedSize, - boolean allocationFailed) { - this.accountant = accountant; - this.limit = accountant.getLimit(); - this.used = totalUsedBeforeAllocation; - - this.requestedSize = requestedSize; - this.allocatedSize = allocatedSize; - this.allocationFailed = allocationFailed; - } - - public Accountant getAccountant() { - return accountant; - } - - public long getLimit() { - return limit; - } - - public long getUsed() { - return used; - } - - public long getRequestedSize() { - return requestedSize; - } - - public long getAllocatedSize() { - return allocatedSize; - } - - public boolean isAllocationFailed() { - return allocationFailed; - } - - @Override - public String toString() { - return "allocator[" - + accountant.getName() - + "]" - + " reservation: " - + accountant.getInitReservation() - + " limit: " - + limit - + " used: " - + used - + " requestedSize: " - + requestedSize - + " allocatedSize: " - + allocatedSize - + " localAllocationStatus: " - + (allocationFailed ? "fail" : "success") - + "\n"; - } - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java deleted file mode 100644 index 856cc88ab9c39..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * Supports cumulative allocation reservation. Clients may increase the size of the reservation - * repeatedly until they call for an allocation of the current total size. The reservation can only - * be used once, and will throw an exception if it is used more than once. - * - *

    For the purposes of airtight memory accounting, the reservation must be close()d whether it is - * used or not. This is not threadsafe. - */ -public interface AllocationReservation extends AutoCloseable { - - /** - * Add to the current reservation. - * - *

    Adding may fail if the allocator is not allowed to consume any more space. - * - * @param nBytes the number of bytes to add - * @return true if the addition is possible, false otherwise - * @throws IllegalStateException if called after buffer() is used to allocate the reservation - * @deprecated use {@link #add(long)} instead - */ - @Deprecated(forRemoval = true) - boolean add(int nBytes); - - /** - * Add to the current reservation. - * - *

    Adding may fail if the allocator is not allowed to consume any more space. - * - * @param nBytes the number of bytes to add - * @return true if the addition is possible, false otherwise - * @throws IllegalStateException if called after buffer() is used to allocate the reservation - */ - boolean add(long nBytes); - - /** - * Requests a reservation of additional space. - * - *

    The implementation of the allocator's inner class provides this. - * - * @param nBytes the amount to reserve - * @return true if the reservation can be satisfied, false otherwise - * @deprecated use {@link #reserve(long)} instead - */ - @Deprecated(forRemoval = true) - boolean reserve(int nBytes); - - /** - * Requests a reservation of additional space. - * - *

    The implementation of the allocator's inner class provides this. - * - * @param nBytes the amount to reserve - * @return true if the reservation can be satisfied, false otherwise - */ - boolean reserve(long nBytes); - - /** - * Allocate a buffer whose size is the total of all the add()s made. - * - *

    The allocation request can still fail, even if the amount of space requested is available, - * if the allocation cannot be made contiguously. - * - * @return the buffer, or null, if the request cannot be satisfied - * @throws IllegalStateException if called more than once - */ - ArrowBuf allocateBuffer(); - - /** - * Get the current size of the reservation (the sum of all the add()s). - * - * @return size of the current reservation - */ - int getSize(); - - /** - * Get the current size of the reservation (the sum of all the add()s) as a long value. - * - * @return size of the current reservation - */ - long getSizeLong(); - - /** - * Return whether or not the reservation has been used. - * - * @return whether or not the reservation has been used - */ - boolean isUsed(); - - /** - * Return whether or not the reservation has been closed. - * - * @return whether or not the reservation has been closed - */ - boolean isClosed(); - - @Override - void close(); -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocatorClosedException.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocatorClosedException.java deleted file mode 100644 index eba71f24c880d..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocatorClosedException.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** Exception thrown when a closed BufferAllocator is used. Note this is an unchecked exception. */ -@SuppressWarnings("serial") -public class AllocatorClosedException extends RuntimeException { - - /** - * Constructs a new allocator closed exception with a given message. - * - * @param message string associated with the cause - */ - public AllocatorClosedException(String message) { - super(message); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java deleted file mode 100644 index 775a8925ad1a9..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java +++ /dev/null @@ -1,1228 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.ReadOnlyBufferException; -import java.util.concurrent.atomic.AtomicLong; -import org.apache.arrow.memory.BaseAllocator.Verbosity; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.HistoricalLog; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.util.VisibleForTesting; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * ArrowBuf serves as a facade over underlying memory by providing several access APIs to read/write - * data into a chunk of direct memory. All the accounting, ownership and reference management is - * done by {@link ReferenceManager} and ArrowBuf can work with a custom user provided implementation - * of ReferenceManager - * - *

    Two important instance variables of an ArrowBuf: (1) address - starting virtual address in the - * underlying memory chunk that this ArrowBuf has access to (2) length - length (in bytes) in the - * underlying memory chunk that this ArrowBuf has access to - * - *

    The management (allocation, deallocation, reference counting etc) for the memory chunk is not - * done by ArrowBuf. Default implementation of ReferenceManager, allocation is in {@link - * BaseAllocator}, {@link BufferLedger} and {@link AllocationManager} - */ -public final class ArrowBuf implements AutoCloseable { - - private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ArrowBuf.class); - - private static final int SHORT_SIZE = Short.BYTES; - private static final int INT_SIZE = Integer.BYTES; - private static final int FLOAT_SIZE = Float.BYTES; - private static final int DOUBLE_SIZE = Double.BYTES; - private static final int LONG_SIZE = Long.BYTES; - - private static final AtomicLong idGenerator = new AtomicLong(0); - private static final int LOG_BYTES_PER_ROW = 10; - private final long id = idGenerator.incrementAndGet(); - private final ReferenceManager referenceManager; - private final @Nullable BufferManager bufferManager; - private final long addr; - private long readerIndex; - private long writerIndex; - private final @Nullable HistoricalLog historicalLog = - BaseAllocator.DEBUG - ? new HistoricalLog(BaseAllocator.DEBUG_LOG_LENGTH, "ArrowBuf[%d]", id) - : null; - private volatile long capacity; - - /** - * Constructs a new ArrowBuf. - * - * @param referenceManager The memory manager to track memory usage and reference count of this - * buffer - * @param capacity The capacity in bytes of this buffer - */ - public ArrowBuf( - final ReferenceManager referenceManager, - final @Nullable BufferManager bufferManager, - final long capacity, - final long memoryAddress) { - this.referenceManager = referenceManager; - this.bufferManager = bufferManager; - this.addr = memoryAddress; - this.capacity = capacity; - this.readerIndex = 0; - this.writerIndex = 0; - if (historicalLog != null) { - historicalLog.recordEvent("create()"); - } - } - - public int refCnt() { - return referenceManager.getRefCount(); - } - - /** - * Allows a function to determine whether not reading a particular string of bytes is valid. - * - *

    Will throw an exception if the memory is not readable for some reason. Only doesn't - * something in the case that AssertionUtil.BOUNDS_CHECKING_ENABLED is true. - * - * @param start The starting position of the bytes to be read. - * @param end The exclusive endpoint of the bytes to be read. - */ - public void checkBytes(long start, long end) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - checkIndexD(start, end - start); - } - } - - /** For get/set operations, reference count should be >= 1. */ - private void ensureAccessible() { - if (this.refCnt() == 0) { - throw new IllegalStateException("Ref count should be >= 1 for accessing the ArrowBuf"); - } - } - - /** - * Get reference manager for this ArrowBuf. - * - * @return user provided implementation of {@link ReferenceManager} - */ - public ReferenceManager getReferenceManager() { - return referenceManager; - } - - public long capacity() { - return capacity; - } - - /** - * Adjusts the capacity of this buffer. Size increases are NOT supported. - * - * @param newCapacity Must be in in the range [0, length). - */ - public synchronized ArrowBuf capacity(long newCapacity) { - - if (newCapacity == capacity) { - return this; - } - - Preconditions.checkArgument(newCapacity >= 0); - - if (newCapacity < capacity) { - capacity = newCapacity; - return this; - } - - throw new UnsupportedOperationException( - "Buffers don't support resizing that increases the size."); - } - - /** Returns the byte order of elements in this buffer. */ - public ByteOrder order() { - return ByteOrder.nativeOrder(); - } - - /** Returns the number of bytes still available to read in this buffer. */ - public long readableBytes() { - Preconditions.checkState( - writerIndex >= readerIndex, "Writer index cannot be less than reader index"); - return writerIndex - readerIndex; - } - - /** - * Returns the number of bytes still available to write into this buffer before capacity is - * reached. - */ - public long writableBytes() { - return capacity() - writerIndex; - } - - /** Returns a slice of only the readable bytes in the buffer. */ - public ArrowBuf slice() { - return slice(readerIndex, readableBytes()); - } - - /** Returns a slice (view) starting at index with the given length. */ - public ArrowBuf slice(long index, long length) { - - Preconditions.checkPositionIndex(index, this.capacity); - Preconditions.checkPositionIndex(index + length, this.capacity); - - /* - * Re the behavior of reference counting, see http://netty.io/wiki/reference-counted-objects - * .html#wiki-h3-5, which - * explains that derived buffers share their reference count with their parent - */ - final ArrowBuf newBuf = referenceManager.deriveBuffer(this, index, length); - newBuf.writerIndex(length); - return newBuf; - } - - /** Make a nio byte buffer from this arrowbuf. */ - public ByteBuffer nioBuffer() { - return nioBuffer(readerIndex, checkedCastToInt(readableBytes())); - } - - /** Make a nio byte buffer from this ArrowBuf. */ - public ByteBuffer nioBuffer(long index, int length) { - chk(index, length); - return getDirectBuffer(index, length); - } - - private ByteBuffer getDirectBuffer(long index, int length) { - long address = addr(index); - return MemoryUtil.directBuffer(address, length); - } - - public long memoryAddress() { - return this.addr; - } - - @Override - public String toString() { - return String.format("ArrowBuf[%d], address:%d, capacity:%d", id, memoryAddress(), capacity); - } - - @Override - public int hashCode() { - return System.identityHashCode(this); - } - - @Override - public boolean equals(@Nullable Object obj) { - // identity equals only. - return this == obj; - } - - /* - * IMPORTANT NOTE - * The data getters and setters work with a caller provided - * index. This index is 0 based and since ArrowBuf has access - * to a portion of underlying chunk of memory starting at - * some address, we convert the given relative index into - * absolute index as memory address + index. - * - * Example: - * - * Let's say we have an underlying chunk of memory of length 64 bytes - * Now let's say we have an ArrowBuf that has access to the chunk - * from offset 4 for length of 16 bytes. - * - * If the starting virtual address of chunk is MAR, then memory - * address of this ArrowBuf is MAR + offset -- this is what is stored - * in variable addr. See the BufferLedger and AllocationManager code - * for the implementation of ReferenceManager that manages a - * chunk of memory and creates ArrowBuf with access to a range of - * bytes within the chunk (or the entire chunk) - * - * So now to get/set data, we will do => addr + index - * This logic is put in method addr(index) and is frequently - * used in get/set data methods to compute the absolute - * byte address for get/set operation in the underlying chunk - * - * @param index the index at which we the user wants to read/write - * @return the absolute address within the memory - */ - private long addr(long index) { - return addr + index; - } - - /*-------------------------------------------------* - | Following are a set of fast path data set and | - | get APIs to write/read data from ArrowBuf | - | at a given index (0 based relative to this | - | ArrowBuf and not relative to the underlying | - | memory chunk). | - | | - *-------------------------------------------------*/ - - /** - * Helper function to do bounds checking at a particular index for particular length of data. - * - * @param index index (0 based relative to this ArrowBuf) - * @param length provided length of data for get/set - */ - private void chk(long index, long length) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - checkIndexD(index, length); - } - } - - private void checkIndexD(long index, long fieldLength) { - // check reference count - ensureAccessible(); - // check bounds - Preconditions.checkArgument(fieldLength >= 0, "expecting non-negative data length"); - if (index < 0 || index > capacity() - fieldLength) { - if (historicalLog != null) { - historicalLog.logHistory(logger); - } - throw new IndexOutOfBoundsException( - String.format( - "index: %d, length: %d (expected: range(0, %d))", index, fieldLength, capacity())); - } - } - - /** - * Get long value stored at a particular index in the underlying memory chunk this ArrowBuf has - * access to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be read from - * @return 8 byte long value - */ - public long getLong(long index) { - chk(index, LONG_SIZE); - return MemoryUtil.getLong(addr(index)); - } - - /** - * Set long value at a particular index in the underlying memory chunk this ArrowBuf has access - * to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setLong(long index, long value) { - chk(index, LONG_SIZE); - MemoryUtil.putLong(addr(index), value); - } - - /** - * Get float value stored at a particular index in the underlying memory chunk this ArrowBuf has - * access to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be read from - * @return 4 byte float value - */ - public float getFloat(long index) { - return Float.intBitsToFloat(getInt(index)); - } - - /** - * Set float value at a particular index in the underlying memory chunk this ArrowBuf has access - * to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setFloat(long index, float value) { - chk(index, FLOAT_SIZE); - MemoryUtil.putInt(addr(index), Float.floatToRawIntBits(value)); - } - - /** - * Get double value stored at a particular index in the underlying memory chunk this ArrowBuf has - * access to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be read from - * @return 8 byte double value - */ - public double getDouble(long index) { - return Double.longBitsToDouble(getLong(index)); - } - - /** - * Set double value at a particular index in the underlying memory chunk this ArrowBuf has access - * to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setDouble(long index, double value) { - chk(index, DOUBLE_SIZE); - MemoryUtil.putLong(addr(index), Double.doubleToRawLongBits(value)); - } - - /** - * Get char value stored at a particular index in the underlying memory chunk this ArrowBuf has - * access to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be read from - * @return 2 byte char value - */ - public char getChar(long index) { - return (char) getShort(index); - } - - /** - * Set char value at a particular index in the underlying memory chunk this ArrowBuf has access - * to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setChar(long index, int value) { - chk(index, SHORT_SIZE); - MemoryUtil.putShort(addr(index), (short) value); - } - - /** - * Get int value stored at a particular index in the underlying memory chunk this ArrowBuf has - * access to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be read from - * @return 4 byte int value - */ - public int getInt(long index) { - chk(index, INT_SIZE); - return MemoryUtil.getInt(addr(index)); - } - - /** - * Set int value at a particular index in the underlying memory chunk this ArrowBuf has access to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setInt(long index, int value) { - chk(index, INT_SIZE); - MemoryUtil.putInt(addr(index), value); - } - - /** - * Get short value stored at a particular index in the underlying memory chunk this ArrowBuf has - * access to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be read from - * @return 2 byte short value - */ - public short getShort(long index) { - chk(index, SHORT_SIZE); - return MemoryUtil.getShort(addr(index)); - } - - /** - * Set short value at a particular index in the underlying memory chunk this ArrowBuf has access - * to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setShort(long index, int value) { - setShort(index, (short) value); - } - - /** - * Set short value at a particular index in the underlying memory chunk this ArrowBuf has access - * to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setShort(long index, short value) { - chk(index, SHORT_SIZE); - MemoryUtil.putShort(addr(index), value); - } - - /** - * Set byte value at a particular index in the underlying memory chunk this ArrowBuf has access - * to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setByte(long index, int value) { - chk(index, 1); - MemoryUtil.putByte(addr(index), (byte) value); - } - - /** - * Set byte value at a particular index in the underlying memory chunk this ArrowBuf has access - * to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be written - * @param value value to write - */ - public void setByte(long index, byte value) { - chk(index, 1); - MemoryUtil.putByte(addr(index), value); - } - - /** - * Get byte value stored at a particular index in the underlying memory chunk this ArrowBuf has - * access to. - * - * @param index index (0 based relative to this ArrowBuf) where the value will be read from - * @return byte value - */ - public byte getByte(long index) { - chk(index, 1); - return MemoryUtil.getByte(addr(index)); - } - - /*--------------------------------------------------* - | Following are another set of data set APIs | - | that directly work with writerIndex | - | | - *--------------------------------------------------*/ - - /** - * Helper function to do bound checking w.r.t writerIndex by checking if we can set "length" bytes - * of data at the writerIndex in this ArrowBuf. - * - * @param length provided length of data for set - */ - private void ensureWritable(final int length) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - Preconditions.checkArgument(length >= 0, "expecting non-negative length"); - // check reference count - this.ensureAccessible(); - // check bounds - if (length > writableBytes()) { - throw new IndexOutOfBoundsException( - String.format( - "writerIndex(%d) + length(%d) exceeds capacity(%d)", - writerIndex, length, capacity())); - } - } - } - - /** - * Helper function to do bound checking w.r.t readerIndex by checking if we can read "length" - * bytes of data at the readerIndex in this ArrowBuf. - * - * @param length provided length of data for get - */ - private void ensureReadable(final int length) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - Preconditions.checkArgument(length >= 0, "expecting non-negative length"); - // check reference count - this.ensureAccessible(); - // check bounds - if (length > readableBytes()) { - throw new IndexOutOfBoundsException( - String.format( - "readerIndex(%d) + length(%d) exceeds writerIndex(%d)", - readerIndex, length, writerIndex)); - } - } - } - - /** - * Read the byte at readerIndex. - * - * @return byte value - */ - public byte readByte() { - ensureReadable(1); - final byte b = getByte(readerIndex); - ++readerIndex; - return b; - } - - /** - * Read dst.length bytes at readerIndex into dst byte array. - * - * @param dst byte array where the data will be written - */ - public void readBytes(byte[] dst) { - Preconditions.checkArgument(dst != null, "expecting valid dst bytearray"); - ensureReadable(dst.length); - getBytes(readerIndex, dst, 0, checkedCastToInt(dst.length)); - } - - /** - * Set the provided byte value at the writerIndex. - * - * @param value value to set - */ - public void writeByte(byte value) { - ensureWritable(1); - MemoryUtil.putByte(addr(writerIndex), value); - ++writerIndex; - } - - /** - * Set the lower order byte for the provided value at the writerIndex. - * - * @param value value to be set - */ - public void writeByte(int value) { - ensureWritable(1); - MemoryUtil.putByte(addr(writerIndex), (byte) value); - ++writerIndex; - } - - /** - * Write the bytes from given byte array into this ArrowBuf starting at writerIndex. - * - * @param src src byte array - */ - public void writeBytes(byte[] src) { - Preconditions.checkArgument(src != null, "expecting valid src array"); - writeBytes(src, 0, src.length); - } - - /** - * Write the bytes from given byte array starting at srcIndex into this ArrowBuf starting at - * writerIndex. - * - * @param src src byte array - * @param srcIndex index in the byte array where the copy will being from - * @param length length of data to copy - */ - public void writeBytes(byte[] src, int srcIndex, int length) { - ensureWritable(length); - setBytes(writerIndex, src, srcIndex, length); - writerIndex += length; - } - - /** - * Set the provided int value as short at the writerIndex. - * - * @param value value to set - */ - public void writeShort(int value) { - ensureWritable(SHORT_SIZE); - MemoryUtil.putShort(addr(writerIndex), (short) value); - writerIndex += SHORT_SIZE; - } - - /** - * Set the provided int value at the writerIndex. - * - * @param value value to set - */ - public void writeInt(int value) { - ensureWritable(INT_SIZE); - MemoryUtil.putInt(addr(writerIndex), value); - writerIndex += INT_SIZE; - } - - /** - * Set the provided long value at the writerIndex. - * - * @param value value to set - */ - public void writeLong(long value) { - ensureWritable(LONG_SIZE); - MemoryUtil.putLong(addr(writerIndex), value); - writerIndex += LONG_SIZE; - } - - /** - * Set the provided float value at the writerIndex. - * - * @param value value to set - */ - public void writeFloat(float value) { - ensureWritable(FLOAT_SIZE); - MemoryUtil.putInt(addr(writerIndex), Float.floatToRawIntBits(value)); - writerIndex += FLOAT_SIZE; - } - - /** - * Set the provided double value at the writerIndex. - * - * @param value value to set - */ - public void writeDouble(double value) { - ensureWritable(DOUBLE_SIZE); - MemoryUtil.putLong(addr(writerIndex), Double.doubleToRawLongBits(value)); - writerIndex += DOUBLE_SIZE; - } - - /*--------------------------------------------------* - | Following are another set of data set/get APIs | - | that read and write stream of bytes from/to byte | - | arrays, ByteBuffer, ArrowBuf etc | - | | - *--------------------------------------------------*/ - - /** - * Determine if the requested {@code index} and {@code length} will fit within {@code capacity}. - * - * @param index The starting index. - * @param length The length which will be utilized (starting from {@code index}). - * @param capacity The capacity that {@code index + length} is allowed to be within. - * @return {@code true} if the requested {@code index} and {@code length} will fit within {@code - * capacity}. {@code false} if this would result in an index out of bounds exception. - */ - private static boolean isOutOfBounds(long index, long length, long capacity) { - return (index | length | (index + length) | (capacity - (index + length))) < 0; - } - - private void checkIndex(long index, long fieldLength) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - // check reference count - this.ensureAccessible(); - // check bounds - if (isOutOfBounds(index, fieldLength, this.capacity())) { - throw new IndexOutOfBoundsException( - String.format( - "index: %d, length: %d (expected: range(0, %d))", - index, fieldLength, this.capacity())); - } - } - } - - /** - * Copy data from this ArrowBuf at a given index in into destination byte array. - * - * @param index starting index (0 based relative to the portion of memory) this ArrowBuf has - * access to - * @param dst byte array to copy the data into - */ - public void getBytes(long index, byte[] dst) { - getBytes(index, dst, 0, dst.length); - } - - /** - * Copy data from this ArrowBuf at a given index into destination byte array. - * - * @param index index (0 based relative to the portion of memory this ArrowBuf has access to) - * @param dst byte array to copy the data into - * @param dstIndex starting index in dst byte array to copy into - * @param length length of data to copy from this ArrowBuf - */ - public void getBytes(long index, byte[] dst, int dstIndex, int length) { - // bound check for this ArrowBuf where the data will be copied from - checkIndex(index, length); - // null check - Preconditions.checkArgument(dst != null, "expecting a valid dst byte array"); - // bound check for dst byte array where the data will be copied to - if (isOutOfBounds(dstIndex, length, dst.length)) { - // not enough space to copy "length" bytes into dst array from dstIndex onwards - throw new IndexOutOfBoundsException( - "Not enough space to copy data into destination" + dstIndex); - } - if (length != 0) { - // copy "length" bytes from this ArrowBuf starting at addr(index) address - // into dst byte array at dstIndex onwards - MemoryUtil.copyFromMemory(addr(index), dst, dstIndex, length); - } - } - - /** - * Copy data from a given byte array into this ArrowBuf starting at a given index. - * - * @param index starting index (0 based relative to the portion of memory) this ArrowBuf has - * access to - * @param src byte array to copy the data from - */ - public void setBytes(long index, byte[] src) { - setBytes(index, src, 0, src.length); - } - - /** - * Copy data from a given byte array starting at the given source index into this ArrowBuf at a - * given index. - * - * @param index index (0 based relative to the portion of memory this ArrowBuf has access to) - * @param src src byte array to copy the data from - * @param srcIndex index in the byte array where the copy will start from - * @param length length of data to copy from byte array - */ - public void setBytes(long index, byte[] src, int srcIndex, long length) { - // bound check for this ArrowBuf where the data will be copied into - checkIndex(index, length); - // null check - Preconditions.checkArgument(src != null, "expecting a valid src byte array"); - // bound check for src byte array where the data will be copied from - if (isOutOfBounds(srcIndex, length, src.length)) { - // not enough space to copy "length" bytes into dst array from dstIndex onwards - throw new IndexOutOfBoundsException( - "Not enough space to copy data from byte array" + srcIndex); - } - if (length > 0) { - // copy "length" bytes from src byte array at the starting index (srcIndex) - // into this ArrowBuf starting at address "addr(index)" - MemoryUtil.copyToMemory(src, srcIndex, addr(index), length); - } - } - - /** - * Copy data from this ArrowBuf at a given index into the destination ByteBuffer. - * - * @param index index (0 based relative to the portion of memory this ArrowBuf has access to) - * @param dst dst ByteBuffer where the data will be copied into - */ - public void getBytes(long index, ByteBuffer dst) { - // bound check for this ArrowBuf where the data will be copied from - checkIndex(index, dst.remaining()); - // dst.remaining() bytes of data will be copied into dst ByteBuffer - if (dst.remaining() != 0) { - // address in this ArrowBuf where the copy will begin from - final long srcAddress = addr(index); - if (dst.isDirect()) { - if (dst.isReadOnly()) { - throw new ReadOnlyBufferException(); - } - // copy dst.remaining() bytes of data from this ArrowBuf starting - // at address srcAddress into the dst ByteBuffer starting at - // address dstAddress - final long dstAddress = MemoryUtil.getByteBufferAddress(dst) + dst.position(); - MemoryUtil.copyMemory(srcAddress, dstAddress, dst.remaining()); - // after copy, bump the next write position for the dst ByteBuffer - dst.position(dst.position() + dst.remaining()); - } else if (dst.hasArray()) { - // copy dst.remaining() bytes of data from this ArrowBuf starting - // at address srcAddress into the dst ByteBuffer starting at - // index dstIndex - final int dstIndex = dst.arrayOffset() + dst.position(); - MemoryUtil.copyFromMemory(srcAddress, dst.array(), dstIndex, dst.remaining()); - // after copy, bump the next write position for the dst ByteBuffer - dst.position(dst.position() + dst.remaining()); - } else { - throw new UnsupportedOperationException( - "Copy from this ArrowBuf to ByteBuffer is not supported"); - } - } - } - - /** - * Copy data into this ArrowBuf at a given index onwards from a source ByteBuffer. - * - * @param index index index (0 based relative to the portion of memory this ArrowBuf has access - * to) - * @param src src ByteBuffer where the data will be copied from - */ - public void setBytes(long index, ByteBuffer src) { - // bound check for this ArrowBuf where the data will be copied into - checkIndex(index, src.remaining()); - // length of data to copy - int length = src.remaining(); - // address in this ArrowBuf where the data will be copied to - long dstAddress = addr(index); - if (length != 0) { - if (src.isDirect()) { - // copy src.remaining() bytes of data from src ByteBuffer starting at - // address srcAddress into this ArrowBuf starting at address dstAddress - final long srcAddress = MemoryUtil.getByteBufferAddress(src) + src.position(); - MemoryUtil.copyMemory(srcAddress, dstAddress, length); - // after copy, bump the next read position for the src ByteBuffer - src.position(src.position() + length); - } else if (src.hasArray()) { - // copy src.remaining() bytes of data from src ByteBuffer starting at - // index srcIndex into this ArrowBuf starting at address dstAddress - final int srcIndex = src.arrayOffset() + src.position(); - MemoryUtil.copyToMemory(src.array(), srcIndex, dstAddress, length); - // after copy, bump the next read position for the src ByteBuffer - src.position(src.position() + length); - } else { - final ByteOrder originalByteOrder = src.order(); - src.order(order()); - try { - // copy word at a time - while (length - 128 >= LONG_SIZE) { - for (int x = 0; x < 16; x++) { - MemoryUtil.putLong(dstAddress, src.getLong()); - length -= LONG_SIZE; - dstAddress += LONG_SIZE; - } - } - while (length >= LONG_SIZE) { - MemoryUtil.putLong(dstAddress, src.getLong()); - length -= LONG_SIZE; - dstAddress += LONG_SIZE; - } - // copy last byte - while (length > 0) { - MemoryUtil.putByte(dstAddress, src.get()); - --length; - ++dstAddress; - } - } finally { - src.order(originalByteOrder); - } - } - } - } - - /** - * Copy data into this ArrowBuf at a given index onwards from a source ByteBuffer starting at a - * given srcIndex for a certain length. - * - * @param index index (0 based relative to the portion of memory this ArrowBuf has access to) - * @param src src ByteBuffer where the data will be copied from - * @param srcIndex starting index in the src ByteBuffer where the data copy will start from - * @param length length of data to copy from src ByteBuffer - */ - public void setBytes(long index, ByteBuffer src, int srcIndex, int length) { - // bound check for this ArrowBuf where the data will be copied into - checkIndex(index, length); - if (src.isDirect()) { - // copy length bytes of data from src ByteBuffer starting at address - // srcAddress into this ArrowBuf at address dstAddress - final long srcAddress = MemoryUtil.getByteBufferAddress(src) + srcIndex; - final long dstAddress = addr(index); - MemoryUtil.copyMemory(srcAddress, dstAddress, length); - } else { - if (srcIndex == 0 && src.capacity() == length) { - // copy the entire ByteBuffer from start to end of length - setBytes(index, src); - } else { - ByteBuffer newBuf = src.duplicate(); - newBuf.position(srcIndex); - newBuf.limit(srcIndex + length); - setBytes(index, newBuf); - } - } - } - - /** - * Copy a given length of data from this ArrowBuf starting at a given index into a dst ArrowBuf at - * dstIndex. - * - * @param index index (0 based relative to the portion of memory this ArrowBuf has access to) - * @param dst dst ArrowBuf where the data will be copied into - * @param dstIndex index (0 based relative to the portion of memory dst ArrowBuf has access to) - * @param length length of data to copy - */ - public void getBytes(long index, ArrowBuf dst, long dstIndex, int length) { - // bound check for this ArrowBuf where the data will be copied from - checkIndex(index, length); - // bound check for this ArrowBuf where the data will be copied into - Preconditions.checkArgument(dst != null, "expecting a valid ArrowBuf"); - // bound check for dst ArrowBuf - if (isOutOfBounds(dstIndex, length, dst.capacity())) { - throw new IndexOutOfBoundsException( - String.format( - "index: %d, length: %d (expected: range(0, %d))", dstIndex, length, dst.capacity())); - } - if (length != 0) { - // copy length bytes of data from this ArrowBuf starting at - // address srcAddress into dst ArrowBuf starting at address - // dstAddress - final long srcAddress = addr(index); - final long dstAddress = dst.memoryAddress() + (long) dstIndex; - MemoryUtil.copyMemory(srcAddress, dstAddress, length); - } - } - - /** - * Copy data from src ArrowBuf starting at index srcIndex into this ArrowBuf at given index. - * - * @param index index index (0 based relative to the portion of memory this ArrowBuf has access - * to) - * @param src src ArrowBuf where the data will be copied from - * @param srcIndex starting index in the src ArrowBuf where the copy will begin from - * @param length length of data to copy from src ArrowBuf - */ - public void setBytes(long index, ArrowBuf src, long srcIndex, long length) { - // bound check for this ArrowBuf where the data will be copied into - checkIndex(index, length); - // null check - Preconditions.checkArgument(src != null, "expecting a valid ArrowBuf"); - // bound check for src ArrowBuf - if (isOutOfBounds(srcIndex, length, src.capacity())) { - throw new IndexOutOfBoundsException( - String.format( - "index: %d, length: %d (expected: range(0, %d))", srcIndex, length, src.capacity())); - } - if (length != 0) { - // copy length bytes of data from src ArrowBuf starting at - // address srcAddress into this ArrowBuf starting at address - // dstAddress - final long srcAddress = src.memoryAddress() + srcIndex; - final long dstAddress = addr(index); - MemoryUtil.copyMemory(srcAddress, dstAddress, length); - } - } - - /** - * Copy readableBytes() number of bytes from src ArrowBuf starting from its readerIndex into this - * ArrowBuf starting at the given index. - * - * @param index index index (0 based relative to the portion of memory this ArrowBuf has access - * to) - * @param src src ArrowBuf where the data will be copied from - */ - public void setBytes(long index, ArrowBuf src) { - // null check - Preconditions.checkArgument(src != null, "expecting valid ArrowBuf"); - final long length = src.readableBytes(); - // bound check for this ArrowBuf where the data will be copied into - checkIndex(index, length); - final long srcAddress = src.memoryAddress() + src.readerIndex; - final long dstAddress = addr(index); - MemoryUtil.copyMemory(srcAddress, dstAddress, length); - src.readerIndex(src.readerIndex + length); - } - - /** - * Copy a certain length of bytes from given InputStream into this ArrowBuf at the provided index. - * - * @param index index index (0 based relative to the portion of memory this ArrowBuf has access - * to) - * @param in src stream to copy from - * @param length length of data to copy - * @return number of bytes copied from stream into ArrowBuf - * @throws IOException on failing to read from stream - */ - public int setBytes(long index, InputStream in, int length) throws IOException { - Preconditions.checkArgument(in != null, "expecting valid input stream"); - checkIndex(index, length); - int readBytes = 0; - if (length > 0) { - byte[] tmp = new byte[length]; - // read the data from input stream into tmp byte array - readBytes = in.read(tmp); - if (readBytes > 0) { - // copy readBytes length of data from the tmp byte array starting - // at srcIndex 0 into this ArrowBuf starting at address addr(index) - MemoryUtil.copyToMemory(tmp, 0, addr(index), readBytes); - } - } - return readBytes; - } - - /** - * Copy a certain length of bytes from this ArrowBuf at a given index into the given OutputStream. - * - * @param index index index (0 based relative to the portion of memory this ArrowBuf has access - * to) - * @param out dst stream to copy data into - * @param length length of data to copy - * @throws IOException on failing to write to stream - */ - public void getBytes(long index, OutputStream out, int length) throws IOException { - Preconditions.checkArgument(out != null, "expecting valid output stream"); - checkIndex(index, length); - if (length > 0) { - // copy length bytes of data from this ArrowBuf starting at - // address addr(index) into the tmp byte array starting at index 0 - byte[] tmp = new byte[length]; - MemoryUtil.copyFromMemory(addr(index), tmp, 0, length); - // write the copied data to output stream - out.write(tmp); - } - } - - @Override - public void close() { - referenceManager.release(); - } - - /** - * Returns the possible memory consumed by this ArrowBuf in the worse case scenario. (not shared, - * connected to larger underlying buffer of allocated memory) - * - * @return Size in bytes. - */ - public long getPossibleMemoryConsumed() { - return referenceManager.getSize(); - } - - /** - * Return that is Accounted for by this buffer (and its potentially shared siblings within the - * context of the associated allocator). - * - * @return Size in bytes. - */ - public long getActualMemoryConsumed() { - return referenceManager.getAccountedSize(); - } - - /** - * Return the buffer's byte contents in the form of a hex dump. - * - * @param start the starting byte index - * @param length how many bytes to log - * @return A hex dump in a String. - */ - public String toHexString(final long start, final int length) { - final long roundedStart = (start / LOG_BYTES_PER_ROW) * LOG_BYTES_PER_ROW; - - final StringBuilder sb = new StringBuilder("buffer byte dump\n"); - long index = roundedStart; - for (long nLogged = 0; nLogged < length; nLogged += LOG_BYTES_PER_ROW) { - sb.append(String.format(" [%05d-%05d]", index, index + LOG_BYTES_PER_ROW - 1)); - for (int i = 0; i < LOG_BYTES_PER_ROW; ++i) { - try { - final byte b = getByte(index++); - sb.append(String.format(" 0x%02x", b)); - } catch (IndexOutOfBoundsException ioob) { - sb.append(" "); - } - } - sb.append('\n'); - } - return sb.toString(); - } - - /** - * Get the integer id assigned to this ArrowBuf for debugging purposes. - * - * @return integer id - */ - public long getId() { - return id; - } - - /** - * Print information of this buffer into sb at the given indentation and verbosity - * level. - * - *

    It will include history if BaseAllocator.DEBUG is true and the - * verbosity.includeHistoricalLog are true. - */ - @VisibleForTesting - public void print(StringBuilder sb, int indent, Verbosity verbosity) { - CommonUtil.indent(sb, indent).append(toString()); - - if (historicalLog != null && verbosity.includeHistoricalLog) { - sb.append("\n"); - historicalLog.buildHistory(sb, indent + 1, verbosity.includeStackTraces); - } - } - - /** - * Print detailed information of this buffer into sb. - * - *

    Most information will only be present if BaseAllocator.DEBUG is true. - */ - public void print(StringBuilder sb, int indent) { - print(sb, indent, Verbosity.LOG_WITH_STACKTRACE); - } - - /** - * Get the index at which the next byte will be read from. - * - * @return reader index - */ - public long readerIndex() { - return readerIndex; - } - - /** - * Get the index at which next byte will be written to. - * - * @return writer index - */ - public long writerIndex() { - return writerIndex; - } - - /** - * Set the reader index for this ArrowBuf. - * - * @param readerIndex new reader index - * @return this ArrowBuf - */ - public ArrowBuf readerIndex(long readerIndex) { - this.readerIndex = readerIndex; - return this; - } - - /** - * Set the writer index for this ArrowBuf. - * - * @param writerIndex new writer index - * @return this ArrowBuf - */ - public ArrowBuf writerIndex(long writerIndex) { - this.writerIndex = writerIndex; - return this; - } - - /** - * Zero-out the bytes in this ArrowBuf starting at the given index for the given length. - * - * @param index index index (0 based relative to the portion of memory this ArrowBuf has access - * to) - * @param length length of bytes to zero-out - * @return this ArrowBuf - */ - public ArrowBuf setZero(long index, long length) { - if (length != 0) { - this.checkIndex(index, length); - MemoryUtil.setMemory(this.addr + index, length, (byte) 0); - } - return this; - } - - /** - * Sets all bits to one in the specified range. - * - * @param index index index (0 based relative to the portion of memory this ArrowBuf has access - * to) - * @param length length of bytes to set. - * @return this ArrowBuf - * @deprecated use {@link ArrowBuf#setOne(long, long)} instead. - */ - @Deprecated - public ArrowBuf setOne(int index, int length) { - if (length != 0) { - this.checkIndex(index, length); - MemoryUtil.setMemory(this.addr + index, length, (byte) 0xff); - } - return this; - } - - /** - * Sets all bits to one in the specified range. - * - * @param index index index (0 based relative to the portion of memory this ArrowBuf has access - * to) - * @param length length of bytes to set. - * @return this ArrowBuf - */ - public ArrowBuf setOne(long index, long length) { - if (length != 0) { - this.checkIndex(index, length); - MemoryUtil.setMemory(this.addr + index, length, (byte) 0xff); - } - return this; - } - - /** - * Returns this if size is less than {@link #capacity()}, otherwise delegates to - * {@link BufferManager#replace(ArrowBuf, long)} to get a new buffer. - */ - public ArrowBuf reallocIfNeeded(final long size) { - Preconditions.checkArgument(size >= 0, "reallocation size must be non-negative"); - if (this.capacity() >= size) { - return this; - } - if (bufferManager != null) { - return bufferManager.replace(this, size); - } else { - throw new UnsupportedOperationException( - "Realloc is only available in the context of operator's UDFs"); - } - } - - public ArrowBuf clear() { - this.readerIndex = this.writerIndex = 0; - return this; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java deleted file mode 100644 index 20a89d0b7bf18..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java +++ /dev/null @@ -1,1066 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import com.google.errorprone.annotations.FormatMethod; -import com.google.errorprone.annotations.FormatString; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.IdentityHashMap; -import java.util.Map; -import java.util.Set; -import org.apache.arrow.memory.rounding.DefaultRoundingPolicy; -import org.apache.arrow.memory.rounding.RoundingPolicy; -import org.apache.arrow.memory.util.AssertionUtil; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.HistoricalLog; -import org.apache.arrow.memory.util.LargeMemoryUtil; -import org.apache.arrow.util.Preconditions; -import org.checkerframework.checker.initialization.qual.Initialized; -import org.checkerframework.checker.nullness.qual.KeyFor; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.immutables.value.Value; - -/** - * A base-class that implements all functionality of {@linkplain BufferAllocator}s. - * - *

    The class is abstract to enforce usage of {@linkplain RootAllocator}/{@linkplain - * ChildAllocator} facades. - */ -abstract class BaseAllocator extends Accountant implements BufferAllocator { - - public static final String DEBUG_ALLOCATOR = "arrow.memory.debug.allocator"; - public static final int DEBUG_LOG_LENGTH = 6; - public static final boolean DEBUG; - private static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(BaseAllocator.class); - - // Initialize this before DEFAULT_CONFIG as DEFAULT_CONFIG will eventually initialize the - // allocation manager, - // which in turn allocates an ArrowBuf, which requires DEBUG to have been properly initialized - static { - // the system property takes precedence. - String propValue = System.getProperty(DEBUG_ALLOCATOR); - if (propValue != null) { - DEBUG = Boolean.parseBoolean(propValue); - } else { - DEBUG = false; - } - logger.info( - "Debug mode " - + (DEBUG - ? "enabled." - : "disabled. Enable with the VM option -Darrow.memory.debug.allocator=true.")); - } - - public static final Config DEFAULT_CONFIG = ImmutableConfig.builder().build(); - - // Package exposed for sharing between AllocatorManger and BaseAllocator objects - private final String name; - private final RootAllocator root; - private final Object DEBUG_LOCK = new Object(); - private final AllocationListener listener; - private final @Nullable BaseAllocator parentAllocator; - private final Map childAllocators; - private final ArrowBuf empty; - // members used purely for debugging - private final @Nullable IdentityHashMap childLedgers; - private final @Nullable IdentityHashMap reservations; - private final @Nullable HistoricalLog historicalLog; - private final RoundingPolicy roundingPolicy; - private final AllocationManager.@NonNull Factory allocationManagerFactory; - - private volatile boolean isClosed = false; // the allocator has been closed - - /** - * Initialize an allocator. - * - * @param parentAllocator parent allocator. null if defining a root allocator - * @param name name of this allocator - * @param config configuration including other options of this allocator - * @see Config - */ - @SuppressWarnings({"nullness:method.invocation", "nullness:cast.unsafe"}) - // {"call to hist(,...) not allowed on the given receiver.", "cast cannot be statically verified"} - protected BaseAllocator( - final @Nullable BaseAllocator parentAllocator, final String name, final Config config) - throws OutOfMemoryException { - super(parentAllocator, name, config.getInitReservation(), config.getMaxAllocation()); - - this.listener = config.getListener(); - this.allocationManagerFactory = config.getAllocationManagerFactory(); - - if (parentAllocator != null) { - this.root = parentAllocator.root; - empty = parentAllocator.empty; - } else if (this instanceof RootAllocator) { - this.root = (@Initialized RootAllocator) this; - empty = createEmpty(); - } else { - throw new IllegalStateException( - "An parent allocator must either carry a root or be the " + "root."); - } - - this.parentAllocator = parentAllocator; - this.name = name; - - this.childAllocators = Collections.synchronizedMap(new IdentityHashMap<>()); - - if (DEBUG) { - reservations = new IdentityHashMap<>(); - childLedgers = new IdentityHashMap<>(); - historicalLog = new HistoricalLog(DEBUG_LOG_LENGTH, "allocator[%s]", name); - hist("created by \"%s\", owned = %d", name, this.getAllocatedMemory()); - } else { - reservations = null; - historicalLog = null; - childLedgers = null; - } - this.roundingPolicy = config.getRoundingPolicy(); - } - - @Override - public AllocationListener getListener() { - return listener; - } - - @Override - public @Nullable BaseAllocator getParentAllocator() { - return parentAllocator; - } - - @Override - public Collection getChildAllocators() { - synchronized (childAllocators) { - return new HashSet<>(childAllocators.keySet()); - } - } - - private static String createErrorMsg( - final BufferAllocator allocator, final long rounded, final long requested) { - if (rounded != requested) { - return String.format( - "Unable to allocate buffer of size %d (rounded from %d) due to memory limit. Current " - + "allocation: %d", - rounded, requested, allocator.getAllocatedMemory()); - } else { - return String.format( - "Unable to allocate buffer of size %d due to memory limit. Current " + "allocation: %d", - rounded, allocator.getAllocatedMemory()); - } - } - - public static boolean isDebug() { - return DEBUG; - } - - @Override - public void assertOpen() { - if (AssertionUtil.ASSERT_ENABLED) { - if (isClosed) { - throw new IllegalStateException( - "Attempting operation on allocator when allocator is closed.\n" + toVerboseString()); - } - } - } - - @Override - public String getName() { - return name; - } - - @Override - public ArrowBuf getEmpty() { - return empty; - } - - /** - * For debug/verification purposes only. Allows an AllocationManager to tell the allocator that we - * have a new ledger associated with this allocator. - */ - void associateLedger(BufferLedger ledger) { - assertOpen(); - if (DEBUG) { - synchronized (DEBUG_LOCK) { - if (childLedgers != null) { - childLedgers.put(ledger, null); - } - } - } - } - - /** - * For debug/verification purposes only. Allows an AllocationManager to tell the allocator that we - * are removing a ledger associated with this allocator - */ - void dissociateLedger(BufferLedger ledger) { - assertOpen(); - if (DEBUG) { - synchronized (DEBUG_LOCK) { - Preconditions.checkState(childLedgers != null, "childLedgers must not be null"); - if (!childLedgers.containsKey(ledger)) { - throw new IllegalStateException("Trying to remove a child ledger that doesn't exist."); - } - childLedgers.remove(ledger); - } - } - } - - /** - * Track when a ChildAllocator of this BaseAllocator is closed. Used for debugging purposes. - * - * @param childAllocator The child allocator that has been closed. - */ - private void childClosed(final BaseAllocator childAllocator) { - assertOpen(); - - if (DEBUG) { - Preconditions.checkArgument(childAllocator != null, "child allocator can't be null"); - - synchronized (DEBUG_LOCK) { - final Object object = childAllocators.remove(childAllocator); - if (object == null) { - if (childAllocator.historicalLog != null) { - childAllocator.historicalLog.logHistory(logger); - } - throw new IllegalStateException( - "Child allocator[" - + childAllocator.name - + "] not found in parent allocator[" - + name - + "]'s childAllocators"); - } - } - } else { - childAllocators.remove(childAllocator); - } - listener.onChildRemoved(this, childAllocator); - } - - @Override - public ArrowBuf wrapForeignAllocation(ForeignAllocation allocation) { - assertOpen(); - final long size = allocation.getSize(); - listener.onPreAllocation(size); - AllocationOutcome outcome = this.allocateBytes(size); - if (!outcome.isOk()) { - if (listener.onFailedAllocation(size, outcome)) { - // Second try, in case the listener can do something about it - outcome = this.allocateBytes(size); - } - if (!outcome.isOk()) { - throw new OutOfMemoryException(createErrorMsg(this, size, size), outcome.getDetails()); - } - } - try { - final AllocationManager manager = new ForeignAllocationManager(this, allocation); - final BufferLedger ledger = manager.associate(this); - final ArrowBuf buf = - new ArrowBuf(ledger, /*bufferManager=*/ null, size, allocation.memoryAddress()); - buf.writerIndex(size); - listener.onAllocation(size); - return buf; - } catch (Throwable t) { - try { - releaseBytes(size); - } catch (Throwable e) { - t.addSuppressed(e); - } - try { - allocation.release0(); - } catch (Throwable e) { - t.addSuppressed(e); - } - throw t; - } - } - - @Override - public ArrowBuf buffer(final long initialRequestSize) { - assertOpen(); - - return buffer(initialRequestSize, null); - } - - @SuppressWarnings("nullness:dereference.of.nullable") // dereference of possibly-null reference - // allocationManagerFactory - private ArrowBuf createEmpty() { - return allocationManagerFactory.empty(); - } - - @Override - public ArrowBuf buffer(final long initialRequestSize, @Nullable BufferManager manager) { - assertOpen(); - - Preconditions.checkArgument(initialRequestSize >= 0, "the requested size must be non-negative"); - - if (initialRequestSize == 0) { - return getEmpty(); - } - - // round the request size according to the rounding policy - final long actualRequestSize = roundingPolicy.getRoundedSize(initialRequestSize); - - listener.onPreAllocation(actualRequestSize); - - AllocationOutcome outcome = this.allocateBytes(actualRequestSize); - if (!outcome.isOk()) { - if (listener.onFailedAllocation(actualRequestSize, outcome)) { - // Second try, in case the listener can do something about it - outcome = this.allocateBytes(actualRequestSize); - } - if (!outcome.isOk()) { - throw new OutOfMemoryException( - createErrorMsg(this, actualRequestSize, initialRequestSize), outcome.getDetails()); - } - } - - boolean success = false; - try { - ArrowBuf buffer = bufferWithoutReservation(actualRequestSize, manager); - success = true; - listener.onAllocation(actualRequestSize); - return buffer; - } catch (OutOfMemoryError e) { - throw e; - } finally { - if (!success) { - releaseBytes(actualRequestSize); - } - } - } - - /** - * Used by usual allocation as well as for allocating a pre-reserved buffer. Skips the typical - * accounting associated with creating a new buffer. - */ - private ArrowBuf bufferWithoutReservation(final long size, @Nullable BufferManager bufferManager) - throws OutOfMemoryException { - assertOpen(); - - final AllocationManager manager = newAllocationManager(size); - final BufferLedger ledger = manager.associate(this); // +1 ref cnt (required) - final ArrowBuf buffer = ledger.newArrowBuf(size, bufferManager); - - // make sure that our allocation is equal to what we expected. - Preconditions.checkArgument( - buffer.capacity() == size, - "Allocated capacity %d was not equal to requested capacity %d.", - buffer.capacity(), - size); - - return buffer; - } - - private AllocationManager newAllocationManager(long size) { - return newAllocationManager(this, size); - } - - private AllocationManager newAllocationManager(BaseAllocator accountingAllocator, long size) { - return allocationManagerFactory.create(accountingAllocator, size); - } - - @Override - public BufferAllocator getRoot() { - return root; - } - - @Override - public BufferAllocator newChildAllocator( - final String name, final long initReservation, final long maxAllocation) { - return newChildAllocator(name, this.listener, initReservation, maxAllocation); - } - - @Override - public BufferAllocator newChildAllocator( - final String name, - final AllocationListener listener, - final long initReservation, - final long maxAllocation) { - assertOpen(); - - final ChildAllocator childAllocator = - new ChildAllocator( - this, - name, - configBuilder() - .listener(listener) - .initReservation(initReservation) - .maxAllocation(maxAllocation) - .roundingPolicy(roundingPolicy) - .allocationManagerFactory(allocationManagerFactory) - .build()); - - if (DEBUG) { - synchronized (DEBUG_LOCK) { - childAllocators.put(childAllocator, childAllocator); - if (historicalLog != null) { - historicalLog.recordEvent( - "allocator[%s] created new child allocator[%s]", name, childAllocator.getName()); - } - } - } else { - childAllocators.put(childAllocator, childAllocator); - } - this.listener.onChildAdded(this, childAllocator); - - return childAllocator; - } - - @Override - public AllocationReservation newReservation() { - assertOpen(); - - return new Reservation(); - } - - @Override - public synchronized void close() { - /* - * Some owners may close more than once because of complex cleanup and shutdown - * procedures. - */ - if (isClosed) { - return; - } - - isClosed = true; - - StringBuilder outstandingChildAllocators = new StringBuilder(); - if (DEBUG) { - synchronized (DEBUG_LOCK) { - verifyAllocator(); - - // are there outstanding child allocators? - if (!childAllocators.isEmpty()) { - for (final BaseAllocator childAllocator : childAllocators.keySet()) { - if (childAllocator.isClosed) { - logger.warn( - String.format( - "Closed child allocator[%s] on parent allocator[%s]'s child list.\n%s", - childAllocator.name, name, toString())); - } - } - - throw new IllegalStateException( - String.format( - "Allocator[%s] closed with outstanding child allocators.\n%s", name, toString())); - } - - // are there outstanding buffers? - final int allocatedCount = childLedgers != null ? childLedgers.size() : 0; - if (allocatedCount > 0) { - throw new IllegalStateException( - String.format( - "Allocator[%s] closed with outstanding buffers allocated (%d).\n%s", - name, allocatedCount, toString())); - } - - if (reservations != null && reservations.size() != 0) { - throw new IllegalStateException( - String.format( - "Allocator[%s] closed with outstanding reservations (%d).\n%s", - name, reservations.size(), toString())); - } - } - } else { - if (!childAllocators.isEmpty()) { - outstandingChildAllocators.append("Outstanding child allocators : \n"); - synchronized (childAllocators) { - for (final BaseAllocator childAllocator : childAllocators.keySet()) { - outstandingChildAllocators.append(String.format(" %s", childAllocator.toString())); - } - } - } - } - - // Is there unaccounted-for outstanding allocation? - final long allocated = getAllocatedMemory(); - if (allocated > 0) { - if (parent != null && reservation > allocated) { - parent.releaseBytes(reservation - allocated); - } - String msg = - String.format( - "Memory was leaked by query. Memory leaked: (%d)\n%s%s", - allocated, outstandingChildAllocators.toString(), toString()); - logger.error(msg); - throw new IllegalStateException(msg); - } - - // we need to release our memory to our parent before we tell it we've closed. - super.close(); - - // Inform our parent allocator that we've closed - if (parentAllocator != null) { - parentAllocator.childClosed(this); - } - - if (DEBUG) { - if (historicalLog != null) { - historicalLog.recordEvent("closed"); - } - logger.debug(String.format("closed allocator[%s].", name)); - } - } - - @Override - public String toString() { - final Verbosity verbosity = - logger.isTraceEnabled() ? Verbosity.LOG_WITH_STACKTRACE : Verbosity.BASIC; - final StringBuilder sb = new StringBuilder(); - print(sb, 0, verbosity); - return sb.toString(); - } - - /** - * Provide a verbose string of the current allocator state. Includes the state of all child - * allocators, along with historical logs of each object and including stacktraces. - * - * @return A Verbose string of current allocator state. - */ - @Override - public String toVerboseString() { - final StringBuilder sb = new StringBuilder(); - print(sb, 0, Verbosity.LOG_WITH_STACKTRACE); - return sb.toString(); - } - - @FormatMethod - private void hist(@FormatString String noteFormat, Object... args) { - if (historicalLog != null) { - historicalLog.recordEvent(noteFormat, args); - } - } - - /** - * Verifies the accounting state of the allocator. Only works for DEBUG. - * - * @throws IllegalStateException when any problems are found - */ - void verifyAllocator() { - final IdentityHashMap seen = new IdentityHashMap<>(); - verifyAllocator(seen); - } - - /** - * Verifies the accounting state of the allocator (Only works for DEBUG) This overload is used for - * recursive calls, allowing for checking that ArrowBufs are unique across all allocators that are - * checked. - * - * @param buffersSeen a map of buffers that have already been seen when walking a tree of - * allocators - * @throws IllegalStateException when any problems are found - */ - private void verifyAllocator( - final IdentityHashMap buffersSeen) { - // The remaining tests can only be performed if we're in debug mode. - if (!DEBUG) { - return; - } - - synchronized (DEBUG_LOCK) { - final long allocated = getAllocatedMemory(); - - // verify my direct descendants - final Set childSet = childAllocators.keySet(); - for (final BaseAllocator childAllocator : childSet) { - childAllocator.verifyAllocator(buffersSeen); - } - - /* - * Verify my relationships with my descendants. - * - * The sum of direct child allocators' owned memory must be <= my allocated memory; my - * allocated memory also - * includes ArrowBuf's directly allocated by me. - */ - long childTotal = 0; - for (final BaseAllocator childAllocator : childSet) { - childTotal += Math.max(childAllocator.getAllocatedMemory(), childAllocator.reservation); - } - if (childTotal > getAllocatedMemory()) { - if (historicalLog != null) { - historicalLog.logHistory(logger); - } - logger.debug("allocator[" + name + "] child event logs BEGIN"); - for (final BaseAllocator childAllocator : childSet) { - if (childAllocator.historicalLog != null) { - childAllocator.historicalLog.logHistory(logger); - } - } - logger.debug("allocator[" + name + "] child event logs END"); - throw new IllegalStateException( - "Child allocators own more memory (" - + childTotal - + ") than their parent (name = " - + name - + " ) has allocated (" - + getAllocatedMemory() - + ')'); - } - - // Furthermore, the amount I've allocated should be that plus buffers I've allocated. - long bufferTotal = 0; - - final Set<@KeyFor("this.childLedgers") BufferLedger> ledgerSet = - childLedgers != null ? childLedgers.keySet() : null; - if (ledgerSet != null) { - for (final BufferLedger ledger : ledgerSet) { - if (!ledger.isOwningLedger()) { - continue; - } - - final AllocationManager am = ledger.getAllocationManager(); - /* - * Even when shared, ArrowBufs are rewrapped, so we should never see the same instance - * twice. - */ - final BaseAllocator otherOwner = buffersSeen.get(am); - if (otherOwner != null) { - throw new IllegalStateException( - "This allocator's ArrowBuf already owned by another " + "allocator"); - } - buffersSeen.put(am, this); - - bufferTotal += am.getSize(); - } - } - - // Preallocated space has to be accounted for - final Set<@KeyFor("this.reservations") Reservation> reservationSet = - reservations != null ? reservations.keySet() : null; - long reservedTotal = 0; - if (reservationSet != null) { - for (final Reservation reservation : reservationSet) { - if (!reservation.isUsed()) { - reservedTotal += reservation.getSize(); - } - } - } - - if (bufferTotal + reservedTotal + childTotal != getAllocatedMemory()) { - final StringBuilder sb = new StringBuilder(); - sb.append("allocator["); - sb.append(name); - sb.append("]\nallocated: "); - sb.append(Long.toString(allocated)); - sb.append(" allocated - (bufferTotal + reservedTotal + childTotal): "); - sb.append(Long.toString(allocated - (bufferTotal + reservedTotal + childTotal))); - sb.append('\n'); - - if (bufferTotal != 0) { - sb.append("buffer total: "); - sb.append(Long.toString(bufferTotal)); - sb.append('\n'); - dumpBuffers(sb, ledgerSet); - } - - if (childTotal != 0) { - sb.append("child total: "); - sb.append(Long.toString(childTotal)); - sb.append('\n'); - - for (final BaseAllocator childAllocator : childSet) { - sb.append("child allocator["); - sb.append(childAllocator.name); - sb.append("] owned "); - sb.append(Long.toString(childAllocator.getAllocatedMemory())); - sb.append('\n'); - } - } - - if (reservedTotal != 0) { - sb.append(String.format("reserved total : %d bytes.", reservedTotal)); - if (reservationSet != null) { - for (final Reservation reservation : reservationSet) { - if (reservation.historicalLog != null) { - reservation.historicalLog.buildHistory(sb, 0, true); - } - sb.append('\n'); - } - } - } - - logger.debug(sb.toString()); - - final long allocated2 = getAllocatedMemory(); - - if (allocated2 != allocated) { - throw new IllegalStateException( - String.format( - "allocator[%s]: allocated t1 (%d) + allocated t2 (%d). Someone released memory while in verification.", - name, allocated, allocated2)); - } - throw new IllegalStateException( - String.format( - "allocator[%s]: buffer space (%d) + prealloc space (%d) + child space (%d) != allocated (%d)", - name, bufferTotal, reservedTotal, childTotal, allocated)); - } - } - } - - void print(StringBuilder sb, int level, Verbosity verbosity) { - - CommonUtil.indent(sb, level) - .append("Allocator(") - .append(name) - .append(") ") - .append(reservation) - .append('/') - .append(getAllocatedMemory()) - .append('/') - .append(getPeakMemoryAllocation()) - .append('/') - .append(getLimit()) - .append(" (res/actual/peak/limit)") - .append('\n'); - - if (DEBUG) { - CommonUtil.indent(sb, level + 1) - .append(String.format("child allocators: %d\n", childAllocators.size())); - for (BaseAllocator child : childAllocators.keySet()) { - child.print(sb, level + 2, verbosity); - } - - CommonUtil.indent(sb, level + 1) - .append(String.format("ledgers: %d\n", childLedgers != null ? childLedgers.size() : 0)); - if (childLedgers != null) { - for (BufferLedger ledger : childLedgers.keySet()) { - ledger.print(sb, level + 2, verbosity); - } - } - - final Set<@KeyFor("this.reservations") Reservation> reservations = - this.reservations != null ? this.reservations.keySet() : null; - CommonUtil.indent(sb, level + 1) - .append( - String.format("reservations: %d\n", reservations != null ? reservations.size() : 0)); - if (reservations != null) { - for (final Reservation reservation : reservations) { - if (verbosity.includeHistoricalLog) { - if (reservation.historicalLog != null) { - reservation.historicalLog.buildHistory(sb, level + 3, true); - } - } - } - } - } - } - - private void dumpBuffers( - final StringBuilder sb, - final @Nullable Set<@KeyFor("this.childLedgers") BufferLedger> ledgerSet) { - if (ledgerSet != null) { - for (final BufferLedger ledger : ledgerSet) { - if (!ledger.isOwningLedger()) { - continue; - } - final AllocationManager am = ledger.getAllocationManager(); - sb.append("UnsafeDirectLittleEndian[identityHashCode == "); - sb.append(Integer.toString(System.identityHashCode(am))); - sb.append("] size "); - sb.append(Long.toString(am.getSize())); - sb.append('\n'); - } - } - } - - /** Enum for logging verbosity. */ - public enum Verbosity { - BASIC(false, false), // only include basic information - LOG(true, false), // include basic - LOG_WITH_STACKTRACE(true, true) // - ; - - public final boolean includeHistoricalLog; - public final boolean includeStackTraces; - - Verbosity(boolean includeHistoricalLog, boolean includeStackTraces) { - this.includeHistoricalLog = includeHistoricalLog; - this.includeStackTraces = includeStackTraces; - } - } - - /** - * Returns a default {@link Config} instance. - * - * @see ImmutableConfig.Builder - */ - public static Config defaultConfig() { - return DEFAULT_CONFIG; - } - - /** Returns a builder class for configuring BaseAllocator's options. */ - public static ImmutableConfig.Builder configBuilder() { - return ImmutableConfig.builder(); - } - - @Override - public RoundingPolicy getRoundingPolicy() { - return roundingPolicy; - } - - /** Config class of {@link BaseAllocator}. */ - @Value.Immutable - abstract static class Config { - /** Factory for creating {@link AllocationManager} instances. */ - @Value.Default - AllocationManager.Factory getAllocationManagerFactory() { - return DefaultAllocationManagerOption.getDefaultAllocationManagerFactory(); - } - - /** Listener callback. Must be non-null. */ - @Value.Default - AllocationListener getListener() { - return AllocationListener.NOOP; - } - - /** Initial reservation size (in bytes) for this allocator. */ - @Value.Default - long getInitReservation() { - return 0; - } - - /** - * Max allocation size (in bytes) for this allocator, allocations past this limit fail. Can be - * modified after construction. - */ - @Value.Default - long getMaxAllocation() { - return Long.MAX_VALUE; - } - - /** The policy for rounding the buffer size. */ - @Value.Default - RoundingPolicy getRoundingPolicy() { - return DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY; - } - } - - /** - * Implementation of {@link AllocationReservation} that supports history tracking under - * {@linkplain #DEBUG} is true. - */ - public class Reservation implements AllocationReservation { - - private final @Nullable HistoricalLog historicalLog; - private long nBytes = 0; - private boolean used = false; - private boolean closed = false; - - /** - * Creates a new reservation. - * - *

    If {@linkplain #DEBUG} is true this will capture a historical log of events relevant to - * this Reservation. - */ - @SuppressWarnings("nullness:argument") // to handle null assignment on third party dependency: - // System.identityHashCode - public Reservation() { - if (DEBUG) { - historicalLog = - new HistoricalLog( - "Reservation[allocator[%s], %d]", name, System.identityHashCode(this)); - historicalLog.recordEvent("created"); - synchronized (DEBUG_LOCK) { - if (reservations != null) { - reservations.put(this, this); - } - } - } else { - historicalLog = null; - } - } - - @SuppressWarnings({"removal", "InlineMeSuggester"}) - @Deprecated(forRemoval = true) - @Override - public boolean add(final int nBytes) { - return add((long) nBytes); - } - - @Override - public boolean add(final long nBytes) { - assertOpen(); - - Preconditions.checkArgument(nBytes >= 0, "nBytes(%d) < 0", nBytes); - Preconditions.checkState( - !closed, "Attempt to increase reservation after reservation has been closed"); - Preconditions.checkState( - !used, "Attempt to increase reservation after reservation has been used"); - - // we round up to next power of two since all reservations are done in powers of two. This - // may overestimate the - // preallocation since someone may perceive additions to be power of two. If this becomes a - // problem, we can look - // at - // modifying this behavior so that we maintain what we reserve and what the user asked for - // and make sure to only - // round to power of two as necessary. - final long nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes); - if (!reserve(nBytesTwo)) { - return false; - } - - this.nBytes += nBytesTwo; - return true; - } - - @Override - public ArrowBuf allocateBuffer() { - assertOpen(); - - Preconditions.checkState(!closed, "Attempt to allocate after closed"); - Preconditions.checkState(!used, "Attempt to allocate more than once"); - - final ArrowBuf arrowBuf = allocate(nBytes); - used = true; - return arrowBuf; - } - - @Override - public int getSize() { - return LargeMemoryUtil.checkedCastToInt(nBytes); - } - - @Override - public long getSizeLong() { - return nBytes; - } - - @Override - public boolean isUsed() { - return used; - } - - @Override - public boolean isClosed() { - return closed; - } - - @Override - public void close() { - assertOpen(); - - if (closed) { - return; - } - - if (DEBUG) { - if (!isClosed()) { - final Object object; - synchronized (DEBUG_LOCK) { - object = reservations != null ? reservations.remove(this) : null; - } - if (object == null) { - final StringBuilder sb = new StringBuilder(); - print(sb, 0, Verbosity.LOG_WITH_STACKTRACE); - logger.debug(sb.toString()); - throw new IllegalStateException( - String.format( - "Didn't find closing reservation[%d]", System.identityHashCode(this))); - } - - if (historicalLog != null) { - historicalLog.recordEvent("closed"); - } - } - } - - if (!used) { - releaseReservation(nBytes); - } - - closed = true; - } - - @SuppressWarnings({"removal", "InlineMeSuggester"}) - @Deprecated(forRemoval = true) - @Override - public boolean reserve(int nBytes) { - return reserve((long) nBytes); - } - - @Override - public boolean reserve(long nBytes) { - assertOpen(); - - final AllocationOutcome outcome = BaseAllocator.this.allocateBytes(nBytes); - - if (historicalLog != null) { - historicalLog.recordEvent("reserve(%d) => %s", nBytes, Boolean.toString(outcome.isOk())); - } - - return outcome.isOk(); - } - - /** - * Allocate a buffer of the requested size. - * - *

    The implementation of the allocator's inner class provides this. - * - * @param nBytes the size of the buffer requested - * @return the buffer, or null, if the request cannot be satisfied - */ - private ArrowBuf allocate(long nBytes) { - assertOpen(); - - boolean success = false; - - /* - * The reservation already added the requested bytes to the allocators owned and allocated - * bytes via reserve(). - * This ensures that they can't go away. But when we ask for the buffer here, that will add - * to the allocated bytes - * as well, so we need to return the same number back to avoid double-counting them. - */ - try { - final ArrowBuf arrowBuf = BaseAllocator.this.bufferWithoutReservation(nBytes, null); - - listener.onAllocation(nBytes); - if (historicalLog != null) { - historicalLog.recordEvent( - "allocate() => %s", String.format("ArrowBuf[%d]", arrowBuf.getId())); - } - success = true; - return arrowBuf; - } finally { - if (!success) { - releaseBytes(nBytes); - } - } - } - - /** - * Return the reservation back to the allocator without having used it. - * - * @param nBytes the size of the reservation - */ - private void releaseReservation(long nBytes) { - assertOpen(); - - releaseBytes(nBytes); - - if (historicalLog != null) { - historicalLog.recordEvent("releaseReservation(%d)", nBytes); - } - } - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java deleted file mode 100644 index 50be9ad1fbc32..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BoundsChecking.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * Configuration class to determine if bounds checking should be turned on or off. - * - *

    Bounds checking is on by default. You can disable it by setting either the system property or - * the environmental variable to "true". The system property can be - * "arrow.enable_unsafe_memory_access" or "drill.enable_unsafe_memory_access". The latter is - * deprecated. The environmental variable is named "ARROW_ENABLE_UNSAFE_MEMORY_ACCESS". When both - * the system property and the environmental variable are set, the system property takes precedence. - */ -public class BoundsChecking { - - public static final boolean BOUNDS_CHECKING_ENABLED; - static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BoundsChecking.class); - - static { - String envProperty = System.getenv("ARROW_ENABLE_UNSAFE_MEMORY_ACCESS"); - String oldProperty = System.getProperty("drill.enable_unsafe_memory_access"); - if (oldProperty != null) { - logger.warn( - "\"drill.enable_unsafe_memory_access\" has been renamed to \"arrow.enable_unsafe_memory_access\""); - logger.warn( - "\"arrow.enable_unsafe_memory_access\" can be set to: " - + " true (to not check) or false (to check, default)"); - } - String newProperty = System.getProperty("arrow.enable_unsafe_memory_access"); - - // The priority of determining the unsafe flag: - // 1. The system properties take precedence over the environmental variable. - // 2. The new system property takes precedence over the new system property. - String unsafeFlagValue = newProperty; - if (unsafeFlagValue == null) { - unsafeFlagValue = oldProperty; - } - if (unsafeFlagValue == null) { - unsafeFlagValue = envProperty; - } - - BOUNDS_CHECKING_ENABLED = !"true".equals(unsafeFlagValue); - } - - private BoundsChecking() {} -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java deleted file mode 100644 index a4db99f619323..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.util.Collection; -import org.apache.arrow.memory.rounding.DefaultRoundingPolicy; -import org.apache.arrow.memory.rounding.RoundingPolicy; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Wrapper class to deal with byte buffer allocation. Ensures users only use designated methods. */ -public interface BufferAllocator extends AutoCloseable { - - /** - * Allocate a new or reused buffer of the provided size. Note that the buffer may technically be - * larger than the requested size for rounding purposes. However, the buffer's capacity will be - * set to the configured size. - * - * @param size The size in bytes. - * @return a new ArrowBuf, or null if the request can't be satisfied - * @throws OutOfMemoryException if buffer cannot be allocated - */ - ArrowBuf buffer(long size); - - /** - * Allocate a new or reused buffer of the provided size. Note that the buffer may technically be - * larger than the requested size for rounding purposes. However, the buffer's capacity will be - * set to the configured size. - * - * @param size The size in bytes. - * @param manager A buffer manager to manage reallocation. - * @return a new ArrowBuf, or null if the request can't be satisfied - * @throws OutOfMemoryException if buffer cannot be allocated - */ - ArrowBuf buffer(long size, BufferManager manager); - - /** - * Get the root allocator of this allocator. If this allocator is already a root, return this - * directly. - * - * @return The root allocator - */ - BufferAllocator getRoot(); - - /** - * Create a new child allocator. - * - * @param name the name of the allocator. - * @param initReservation the initial space reservation (obtained from this allocator) - * @param maxAllocation maximum amount of space the new allocator can allocate - * @return the new allocator, or null if it can't be created - */ - BufferAllocator newChildAllocator(String name, long initReservation, long maxAllocation); - - /** - * Create a new child allocator. - * - * @param name the name of the allocator. - * @param listener allocation listener for the newly created child - * @param initReservation the initial space reservation (obtained from this allocator) - * @param maxAllocation maximum amount of space the new allocator can allocate - * @return the new allocator, or null if it can't be created - */ - BufferAllocator newChildAllocator( - String name, AllocationListener listener, long initReservation, long maxAllocation); - - /** - * Close and release all buffers generated from this buffer pool. - * - *

    When assertions are on, complains if there are any outstanding buffers; to avoid that, - * release all buffers before the allocator is closed. - */ - @Override - void close(); - - /** - * Returns the amount of memory currently allocated from this allocator. - * - * @return the amount of memory currently allocated - */ - long getAllocatedMemory(); - - /** - * Return the current maximum limit this allocator imposes. - * - * @return Limit in number of bytes. - */ - long getLimit(); - - /** - * Return the initial reservation. - * - * @return reservation in bytes. - */ - long getInitReservation(); - - /** - * Set the maximum amount of memory this allocator is allowed to allocate. - * - * @param newLimit The new Limit to apply to allocations - */ - void setLimit(long newLimit); - - /** - * Returns the peak amount of memory allocated from this allocator. - * - * @return the peak amount of memory allocated - */ - long getPeakMemoryAllocation(); - - /** - * Returns the amount of memory that can probably be allocated at this moment without exceeding - * this or any parents allocation maximum. - * - * @return Headroom in bytes - */ - long getHeadroom(); - - /** - * Forcibly allocate bytes. Returns whether the allocation fit within limits. - * - * @param size to increase - * @return Whether the allocation fit within limits. - */ - boolean forceAllocate(long size); - - /** - * Release bytes from this allocator. - * - * @param size to release - */ - void releaseBytes(long size); - - /** - * Returns the allocation listener used by this allocator. - * - * @return the {@link AllocationListener} instance. Or {@link AllocationListener#NOOP} by default - * if no listener is configured when this allocator was created. - */ - AllocationListener getListener(); - - /** - * Returns the parent allocator. - * - * @return parent allocator - */ - @Nullable - BufferAllocator getParentAllocator(); - - /** - * Returns the set of child allocators. - * - * @return set of child allocators - */ - Collection getChildAllocators(); - - /** - * Create an allocation reservation. A reservation is a way of building up a request for a buffer - * whose size is not known in advance. See - * - * @return the newly created reservation - * @see AllocationReservation - */ - AllocationReservation newReservation(); - - /** - * Get a reference to the empty buffer associated with this allocator. Empty buffers are special - * because we don't worry about them leaking or managing reference counts on them since they don't - * actually point to any memory. - * - * @return the empty buffer - */ - ArrowBuf getEmpty(); - - /** - * Return the name of this allocator. This is a human readable name that can help debugging. - * Typically provides coordinates about where this allocator was created - * - * @return the name of the allocator - */ - String getName(); - - /** - * Return whether or not this allocator (or one if its parents) is over its limits. In the case - * that an allocator is over its limit, all consumers of that allocator should aggressively try to - * address the overlimit situation. - * - * @return whether or not this allocator (or one if its parents) is over its limits - */ - boolean isOverLimit(); - - /** - * Return a verbose string describing this allocator. If in DEBUG mode, this will also include - * relevant stacktraces and historical logs for underlying objects - * - * @return A very verbose description of the allocator hierarchy. - */ - String toVerboseString(); - - /** - * Asserts (using java assertions) that the provided allocator is currently open. If assertions - * are disabled, this is a no-op. - */ - void assertOpen(); - - /** Gets the rounding policy of the allocator. */ - default RoundingPolicy getRoundingPolicy() { - return DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY; - } - - /** - * EXPERIMENTAL: Wrap an allocation created outside this BufferAllocator. - * - *

    This is useful to integrate allocations from native code into the same memory management - * framework as Java-allocated buffers, presenting users a consistent API. The created buffer will - * be tracked by this allocator and can be transferred like Java-allocated buffers. - * - *

    The underlying allocation will be closed when all references to the buffer are released. If - * this method throws, the underlying allocation will also be closed. - * - * @param allocation The underlying allocation. - */ - default ArrowBuf wrapForeignAllocation(ForeignAllocation allocation) { - throw new UnsupportedOperationException(); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java deleted file mode 100644 index b562a421e7f89..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java +++ /dev/null @@ -1,511 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.util.IdentityHashMap; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.HistoricalLog; -import org.apache.arrow.util.Preconditions; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * The reference manager that binds an {@link AllocationManager} to {@link BufferAllocator} and a - * set of {@link ArrowBuf}. The set of ArrowBufs managed by this reference manager share a common - * fate (same reference count). - */ -public class BufferLedger implements ValueWithKeyIncluded, ReferenceManager { - private final @Nullable IdentityHashMap buffers = - BaseAllocator.DEBUG ? new IdentityHashMap<>() : null; - private static final AtomicLong LEDGER_ID_GENERATOR = new AtomicLong(0); - // unique ID assigned to each ledger - private final long ledgerId = LEDGER_ID_GENERATOR.incrementAndGet(); - private final AtomicInteger bufRefCnt = new AtomicInteger(0); // start at zero so we can - // manage request for retain - // correctly - private final long lCreationTime = System.nanoTime(); - private final BufferAllocator allocator; - private final AllocationManager allocationManager; - private final @Nullable HistoricalLog historicalLog = - BaseAllocator.DEBUG - ? new HistoricalLog(BaseAllocator.DEBUG_LOG_LENGTH, "BufferLedger[%d]", 1) - : null; - private volatile long lDestructionTime = 0; - - BufferLedger(final BufferAllocator allocator, final AllocationManager allocationManager) { - this.allocator = allocator; - this.allocationManager = allocationManager; - } - - boolean isOwningLedger() { - return this == allocationManager.getOwningLedger(); - } - - @Override - public BufferAllocator getKey() { - return allocator; - } - - /** - * Get the buffer allocator associated with this reference manager. - * - * @return buffer allocator - */ - @Override - public BufferAllocator getAllocator() { - return allocator; - } - - /** - * Get this ledger's reference count. - * - * @return reference count - */ - @Override - public int getRefCount() { - return bufRefCnt.get(); - } - - /** - * Increment the ledger's reference count for the associated underlying memory chunk. All - * ArrowBufs managed by this ledger will share the ref count. - */ - void increment() { - bufRefCnt.incrementAndGet(); - } - - /** - * Decrement the ledger's reference count by 1 for the associated underlying memory chunk. If the - * reference count drops to 0, it implies that no ArrowBufs managed by this reference manager need - * access to the memory chunk. In that case, the ledger should inform the allocation manager about - * releasing its ownership for the chunk. Whether or not the memory chunk will be released is - * something that {@link AllocationManager} will decide since tracks the usage of memory chunk - * across multiple reference managers and allocators. - * - * @return true if the new ref count has dropped to 0, false otherwise - */ - @Override - public boolean release() { - return release(1); - } - - /** - * Decrement the ledger's reference count for the associated underlying memory chunk. If the - * reference count drops to 0, it implies that no ArrowBufs managed by this reference manager need - * access to the memory chunk. In that case, the ledger should inform the allocation manager about - * releasing its ownership for the chunk. Whether or not the memory chunk will be released is - * something that {@link AllocationManager} will decide since tracks the usage of memory chunk - * across multiple reference managers and allocators. - * - * @param decrement amount to decrease the reference count by - * @return true if the new ref count has dropped to 0, false otherwise - */ - @Override - public boolean release(int decrement) { - Preconditions.checkState( - decrement >= 1, "ref count decrement should be greater than or equal to 1"); - // decrement the ref count - final int refCnt = decrement(decrement); - if (historicalLog != null) { - historicalLog.recordEvent("release(%d). original value: %d", decrement, refCnt + decrement); - } - // the new ref count should be >= 0 - Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative"); - return refCnt == 0; - } - - /** - * Decrement the ledger's reference count for the associated underlying memory chunk. If the - * reference count drops to 0, it implies that no ArrowBufs managed by this reference manager need - * access to the memory chunk. In that case, the ledger should inform the allocation manager about - * releasing its ownership for the chunk. Whether or not the memory chunk will be released is - * something that {@link AllocationManager} will decide since tracks the usage of memory chunk - * across multiple reference managers and allocators. - * - * @param decrement amount to decrease the reference count by - * @return the new reference count - */ - private int decrement(int decrement) { - allocator.assertOpen(); - final int outcome; - synchronized (allocationManager) { - outcome = bufRefCnt.addAndGet(-decrement); - if (outcome == 0) { - lDestructionTime = System.nanoTime(); - // refcount of this reference manager has dropped to 0 - // inform the allocation manager that this reference manager - // no longer holds references to underlying memory - allocationManager.release(this); - } - } - return outcome; - } - - /** Increment the ledger's reference count for associated underlying memory chunk by 1. */ - @Override - public void retain() { - retain(1); - } - - /** - * Increment the ledger's reference count for associated underlying memory chunk by the given - * amount. - * - * @param increment amount to increase the reference count by - */ - @Override - public void retain(int increment) { - Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment); - if (historicalLog != null) { - historicalLog.recordEvent("retain(%d)", increment); - } - final int originalReferenceCount = bufRefCnt.getAndAdd(increment); - Preconditions.checkArgument(originalReferenceCount > 0); - } - - /** - * Derive a new ArrowBuf from a given source ArrowBuf. The new derived ArrowBuf will share the - * same reference count as rest of the ArrowBufs associated with this ledger. This operation is - * typically used for slicing -- creating new ArrowBufs from a compound ArrowBuf starting at a - * particular index in the underlying memory and having access to a particular length (in bytes) - * of data in memory chunk. - * - *

    This method is also used as a helper for transferring ownership and retain to target - * allocator. - * - * @param sourceBuffer source ArrowBuf - * @param index index (relative to source ArrowBuf) new ArrowBuf should be derived from - * @param length length (bytes) of data in underlying memory that derived buffer will have access - * to in underlying memory - * @return derived buffer - */ - @Override - public ArrowBuf deriveBuffer(final ArrowBuf sourceBuffer, long index, long length) { - /* - * Usage type 1 for deriveBuffer(): - * Used for slicing where index represents a relative index in the source ArrowBuf - * as the slice start point. This is why we need to add the source buffer offset - * to compute the start virtual address of derived buffer within the - * underlying chunk. - * - * Usage type 2 for deriveBuffer(): - * Used for retain(target allocator) and transferOwnership(target allocator) - * where index is 0 since these operations simply create a new ArrowBuf associated - * with another combination of allocator buffer ledger for the same underlying memory - */ - - // the memory address stored inside ArrowBuf is its starting virtual - // address in the underlying memory chunk from the point it has - // access. so it is already accounting for the offset of the source buffer - // we can simply add the index to get the starting address of new buffer. - final long derivedBufferAddress = sourceBuffer.memoryAddress() + index; - - // create new ArrowBuf - final ArrowBuf derivedBuf = - new ArrowBuf( - this, - null, - length, // length (in bytes) in the underlying memory chunk for this new ArrowBuf - derivedBufferAddress // starting byte address in the underlying memory for this new - // ArrowBuf - ); - - // logging - return loggingArrowBufHistoricalLog(derivedBuf); - } - - /** - * Used by an allocator to create a new ArrowBuf. This is provided as a helper method for the - * allocator when it allocates a new memory chunk using a new instance of allocation manager and - * creates a new reference manager too. - * - * @param length The length in bytes that this ArrowBuf will provide access to. - * @param manager An optional BufferManager argument that can be used to manage expansion of this - * ArrowBuf - * @return A new ArrowBuf that shares references with all ArrowBufs associated with this - * BufferLedger - */ - ArrowBuf newArrowBuf(final long length, final @Nullable BufferManager manager) { - allocator.assertOpen(); - - // the start virtual address of the ArrowBuf will be same as address of memory chunk - final long startAddress = allocationManager.memoryAddress(); - - // create ArrowBuf - final ArrowBuf buf = new ArrowBuf(this, manager, length, startAddress); - - // logging - return loggingArrowBufHistoricalLog(buf); - } - - private ArrowBuf loggingArrowBufHistoricalLog(ArrowBuf buf) { - if (historicalLog != null) { - historicalLog.recordEvent( - "ArrowBuf(BufferLedger, BufferAllocator[%s], " - + "UnsafeDirectLittleEndian[identityHashCode == " - + "%d](%s)) => ledger hc == %d", - allocator.getName(), - System.identityHashCode(buf), - buf.toString(), - System.identityHashCode(this)); - Preconditions.checkState(buffers != null, "IdentityHashMap of buffers must not be null"); - synchronized (buffers) { - buffers.put(buf, null); - } - } - - return buf; - } - - /** - * Create a new ArrowBuf that is associated with an alternative allocator for the purposes of - * memory ownership and accounting. This has no impact on the reference counting for the current - * ArrowBuf except in the situation where the passed in Allocator is the same as the current - * buffer. - * - *

    This operation has no impact on the reference count of this ArrowBuf. The newly created - * ArrowBuf with either have a reference count of 1 (in the case that this is the first time this - * memory is being associated with the target allocator or in other words allocation manager - * currently doesn't hold a mapping for the target allocator) or the current value of the - * reference count for the target allocator-reference manager combination + 1 in the case that the - * provided allocator already had an association to this underlying memory. - * - * @param srcBuffer source ArrowBuf - * @param target The target allocator to create an association with. - * @return A new ArrowBuf which shares the same underlying memory as the provided ArrowBuf. - */ - @Override - public ArrowBuf retain(final ArrowBuf srcBuffer, BufferAllocator target) { - - if (historicalLog != null) { - historicalLog.recordEvent("retain(%s)", target.getName()); - } - - // the call to associate will return the corresponding reference manager (buffer ledger) for - // the target allocator. if the allocation manager didn't already have a mapping - // for the target allocator, it will create one and return the new reference manager with a - // reference count of 1. Thus the newly created buffer in this case will have a ref count of 1. - // alternatively, if there was already a mapping for in - // allocation manager, the ref count of the new buffer will be targetrefmanager.refcount() + 1 - // and this will be true for all the existing buffers currently managed by targetrefmanager - final BufferLedger targetRefManager = allocationManager.associate(target); - // create a new ArrowBuf to associate with new allocator and target ref manager - final long targetBufLength = srcBuffer.capacity(); - ArrowBuf targetArrowBuf = targetRefManager.deriveBuffer(srcBuffer, 0, targetBufLength); - targetArrowBuf.readerIndex(srcBuffer.readerIndex()); - targetArrowBuf.writerIndex(srcBuffer.writerIndex()); - return targetArrowBuf; - } - - /** - * Transfer any balance the current ledger has to the target ledger. In the case that the current - * ledger holds no memory, no transfer is made to the new ledger. - * - * @param targetReferenceManager The ledger to transfer ownership account to. - * @return Whether transfer fit within target ledgers limits. - */ - boolean transferBalance(final @Nullable ReferenceManager targetReferenceManager) { - Preconditions.checkArgument( - targetReferenceManager != null, "Expecting valid target reference manager"); - boolean overlimit = false; - if (targetReferenceManager != null) { - final BufferAllocator targetAllocator = targetReferenceManager.getAllocator(); - Preconditions.checkArgument( - allocator.getRoot() == targetAllocator.getRoot(), - "You can only transfer between two allocators that share the same root."); - - allocator.assertOpen(); - targetReferenceManager.getAllocator().assertOpen(); - - // if we're transferring to ourself, just return. - if (targetReferenceManager == this) { - return true; - } - - // since two balance transfers out from the allocation manager could cause incorrect - // accounting, we need to ensure - // that this won't happen by synchronizing on the allocation manager instance. - synchronized (allocationManager) { - if (allocationManager.getOwningLedger() != this) { - // since the calling reference manager is not the owning - // reference manager for the underlying memory, transfer is - // a NO-OP - return true; - } - - if (BaseAllocator.DEBUG && this.historicalLog != null) { - this.historicalLog.recordEvent( - "transferBalance(%s)", targetReferenceManager.getAllocator().getName()); - } - - overlimit = targetAllocator.forceAllocate(allocationManager.getSize()); - allocator.releaseBytes(allocationManager.getSize()); - // since the transfer can only happen from the owning reference manager, - // we need to set the target ref manager as the new owning ref manager - // for the chunk of memory in allocation manager - allocationManager.setOwningLedger((BufferLedger) targetReferenceManager); - } - } - return overlimit; - } - - /** - * Transfer the memory accounting ownership of this ArrowBuf to another allocator. This will - * generate a new ArrowBuf that carries an association with the underlying memory of this - * ArrowBuf. If this ArrowBuf is connected to the owning BufferLedger of this memory, that memory - * ownership/accounting will be transferred to the target allocator. If this ArrowBuf does not - * currently own the memory underlying it (and is only associated with it), this does not transfer - * any ownership to the newly created ArrowBuf. - * - *

    This operation has no impact on the reference count of this ArrowBuf. The newly created - * ArrowBuf with either have a reference count of 1 (in the case that this is the first time this - * memory is being associated with the new allocator) or the current value of the reference count - * for the other AllocationManager/BufferLedger combination + 1 in the case that the provided - * allocator already had an association to this underlying memory. - * - *

    Transfers will always succeed, even if that puts the other allocator into an overlimit - * situation. This is possible due to the fact that the original owning allocator may have - * allocated this memory out of a local reservation whereas the target allocator may need to - * allocate new memory from a parent or RootAllocator. This operation is done n a mostly-lockless - * but consistent manner. As such, the overlimit==true situation could occur slightly prematurely - * to an actual overlimit==true condition. This is simply conservative behavior which means we may - * return overlimit slightly sooner than is necessary. - * - * @param target The allocator to transfer ownership to. - * @return A new transfer result with the impact of the transfer (whether it was overlimit) as - * well as the newly created ArrowBuf. - */ - @Override - public TransferResult transferOwnership(final ArrowBuf srcBuffer, final BufferAllocator target) { - // the call to associate will return the corresponding reference manager (buffer ledger) for - // the target allocator. if the allocation manager didn't already have a mapping - // for the target allocator, it will create one and return the new reference manager with a - // reference count of 1. Thus the newly created buffer in this case will have a ref count of 1. - // alternatively, if there was already a mapping for in - // allocation manager, the ref count of the new buffer will be targetrefmanager.refcount() + 1 - // and this will be true for all the existing buffers currently managed by targetrefmanager - final BufferLedger targetRefManager = allocationManager.associate(target); - // create a new ArrowBuf to associate with new allocator and target ref manager - final long targetBufLength = srcBuffer.capacity(); - final ArrowBuf targetArrowBuf = targetRefManager.deriveBuffer(srcBuffer, 0, targetBufLength); - targetArrowBuf.readerIndex(srcBuffer.readerIndex()); - targetArrowBuf.writerIndex(srcBuffer.writerIndex()); - final boolean allocationFit = transferBalance(targetRefManager); - return new TransferResult(allocationFit, targetArrowBuf); - } - - /** The outcome of a Transfer. */ - public static class TransferResult implements OwnershipTransferResult { - - // Whether this transfer fit within the target allocator's capacity. - final boolean allocationFit; - - // The newly created buffer associated with the target allocator - public final ArrowBuf buffer; - - private TransferResult(boolean allocationFit, ArrowBuf buffer) { - this.allocationFit = allocationFit; - this.buffer = buffer; - } - - @Override - public ArrowBuf getTransferredBuffer() { - return buffer; - } - - @Override - public boolean getAllocationFit() { - return allocationFit; - } - } - - /** - * Total size (in bytes) of memory underlying this reference manager. - * - * @return Size (in bytes) of the memory chunk - */ - @Override - public long getSize() { - return allocationManager.getSize(); - } - - /** - * How much memory is accounted for by this ledger. This is either getSize() if this is the owning - * ledger for the memory or zero in the case that this is not the owning ledger associated with - * this memory. - * - * @return Amount of accounted(owned) memory associated with this ledger. - */ - @Override - public long getAccountedSize() { - synchronized (allocationManager) { - if (allocationManager.getOwningLedger() == this) { - return allocationManager.getSize(); - } else { - return 0; - } - } - } - - /** - * Print the current ledger state to the provided StringBuilder. - * - * @param sb The StringBuilder to populate. - * @param indent The level of indentation to position the data. - * @param verbosity The level of verbosity to print. - */ - void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) { - CommonUtil.indent(sb, indent) - .append("ledger[") - .append(ledgerId) - .append("] allocator: ") - .append(allocator.getName()) - .append("), isOwning: ") - .append(", size: ") - .append(", references: ") - .append(bufRefCnt.get()) - .append(", life: ") - .append(lCreationTime) - .append("..") - .append(lDestructionTime) - .append(", allocatorManager: [") - .append(", life: "); - - if (!BaseAllocator.DEBUG) { - sb.append("]\n"); - } else { - Preconditions.checkArgument(buffers != null, "IdentityHashMap of buffers must not be null"); - synchronized (buffers) { - sb.append("] holds ").append(buffers.size()).append(" buffers. \n"); - for (ArrowBuf buf : buffers.keySet()) { - buf.print(sb, indent + 2, verbosity); - sb.append('\n'); - } - } - } - } - - /** - * Get the {@link AllocationManager} used by this BufferLedger. - * - * @return The AllocationManager used by this BufferLedger. - */ - public AllocationManager getAllocationManager() { - return allocationManager; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java deleted file mode 100644 index e73f6c119e7ea..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * Manages a list of {@link ArrowBuf}s that can be reallocated as needed. Upon re-allocation the old - * buffer will be freed. Managing a list of these buffers prevents some parts of the system from - * needing to define a correct location to place the final call to free them. - */ -public interface BufferManager extends AutoCloseable { - - /** - * Replace an old buffer with a new version at least of the provided size. Does not copy data. - * - * @param old Old Buffer that the user is no longer going to use. - * @param newSize Size of new replacement buffer. - * @return A new version of the buffer. - */ - ArrowBuf replace(ArrowBuf old, long newSize); - - /** - * Get a managed buffer of indeterminate size. - * - * @return A buffer. - */ - ArrowBuf getManagedBuffer(); - - /** - * Get a managed buffer of at least a certain size. - * - * @param size The desired size - * @return A buffer - */ - ArrowBuf getManagedBuffer(long size); - - @Override - void close(); -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java deleted file mode 100644 index f595858ebf531..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.io.IOException; -import java.net.URL; -import java.util.Enumeration; -import java.util.LinkedHashSet; -import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Static method to ensure we have a RootAllocator on the classpath and report which one is used. - */ -final class CheckAllocator { - private static final Logger logger = LoggerFactory.getLogger(CheckAllocator.class); - // unique package names needed by JPMS module naming - private static final String ALLOCATOR_PATH_CORE = - "org/apache/arrow/memory/DefaultAllocationManagerFactory.class"; - private static final String ALLOCATOR_PATH_UNSAFE = - "org/apache/arrow/memory/unsafe/DefaultAllocationManagerFactory.class"; - private static final String ALLOCATOR_PATH_NETTY = - "org/apache/arrow/memory/netty/DefaultAllocationManagerFactory.class"; - - private CheckAllocator() {} - - static String check() { - Set urls = scanClasspath(); - URL rootAllocator = assertOnlyOne(urls); - reportResult(rootAllocator); - if (rootAllocator.getPath().contains("memory-core") - || rootAllocator.getPath().contains("/org/apache/arrow/memory/core/")) { - return "org.apache.arrow.memory.DefaultAllocationManagerFactory"; - } else if (rootAllocator.getPath().contains("memory-unsafe") - || rootAllocator.getPath().contains("/org/apache/arrow/memory/unsafe/")) { - return "org.apache.arrow.memory.unsafe.DefaultAllocationManagerFactory"; - } else if (rootAllocator.getPath().contains("memory-netty") - || rootAllocator.getPath().contains("/org/apache/arrow/memory/netty/")) { - return "org.apache.arrow.memory.netty.DefaultAllocationManagerFactory"; - } else { - throw new IllegalStateException( - "Unknown allocation manager type to infer. Current: " + rootAllocator.getPath()); - } - } - - @SuppressWarnings("URLEqualsHashCode") - private static Set scanClasspath() { - // LinkedHashSet appropriate here because it preserves insertion order - // during iteration - Set allocatorPathSet = new LinkedHashSet<>(); - try { - ClassLoader allocatorClassLoader = CheckAllocator.class.getClassLoader(); - Enumeration paths; - if (allocatorClassLoader == null) { - paths = ClassLoader.getSystemResources(ALLOCATOR_PATH_CORE); - if (!paths.hasMoreElements()) { - paths = ClassLoader.getSystemResources(ALLOCATOR_PATH_UNSAFE); - } - if (!paths.hasMoreElements()) { - paths = ClassLoader.getSystemResources(ALLOCATOR_PATH_NETTY); - } - } else { - paths = allocatorClassLoader.getResources(ALLOCATOR_PATH_CORE); - if (!paths.hasMoreElements()) { - paths = allocatorClassLoader.getResources(ALLOCATOR_PATH_UNSAFE); - } - if (!paths.hasMoreElements()) { - paths = allocatorClassLoader.getResources(ALLOCATOR_PATH_NETTY); - } - } - while (paths.hasMoreElements()) { - URL path = paths.nextElement(); - allocatorPathSet.add(path); - } - } catch (IOException ioe) { - logger.error("Error getting resources from path", ioe); - } - return allocatorPathSet; - } - - private static void reportResult(URL rootAllocator) { - String path = rootAllocator.getPath(); - String subPath = path.substring(path.indexOf("memory")); - logger.info("Using DefaultAllocationManager at {}", subPath); - } - - private static URL assertOnlyOne(Set urls) { - if (urls.size() > 1) { - logger.warn("More than one DefaultAllocationManager on classpath. Choosing first found"); - } - if (urls.isEmpty()) { - throw new RuntimeException( - "No DefaultAllocationManager found on classpath. Can't allocate Arrow buffers." - + " Please consider adding arrow-memory-netty or arrow-memory-unsafe as a dependency."); - } - return urls.iterator().next(); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java deleted file mode 100644 index 50f33d3f021c7..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * Child allocator class. Only slightly different from the {@link RootAllocator}, in that these - * can't be created directly, but must be obtained from {@link - * BufferAllocator#newChildAllocator(String, AllocationListener, long, long)}. - * - *

    Child allocators can only be created by the root, or other children, so this class is package - * private. - */ -class ChildAllocator extends BaseAllocator { - - /** - * Constructor. - * - * @param parentAllocator parent allocator -- the one creating this child - * @param name the name of this child allocator - * @param config configuration of this child allocator - */ - ChildAllocator(BaseAllocator parentAllocator, String name, Config config) { - super(parentAllocator, name, config); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java deleted file mode 100644 index b5e508017155a..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.lang.reflect.Field; -import org.apache.arrow.util.VisibleForTesting; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** A class for choosing the default allocation manager. */ -public class DefaultAllocationManagerOption { - - /** The environmental variable to set the default allocation manager type. */ - public static final String ALLOCATION_MANAGER_TYPE_ENV_NAME = "ARROW_ALLOCATION_MANAGER_TYPE"; - - /** The system property to set the default allocation manager type. */ - public static final String ALLOCATION_MANAGER_TYPE_PROPERTY_NAME = - "arrow.allocation.manager.type"; - - static final org.slf4j.Logger LOGGER = - org.slf4j.LoggerFactory.getLogger(DefaultAllocationManagerOption.class); - - /** The default allocation manager factory. */ - private static AllocationManager.@Nullable Factory DEFAULT_ALLOCATION_MANAGER_FACTORY = null; - - /** The allocation manager type. */ - public enum AllocationManagerType { - /** Netty based allocation manager. */ - Netty, - - /** Unsafe based allocation manager. */ - Unsafe, - - /** Unknown type. */ - Unknown, - } - - /** - * Returns the default allocation manager type. - * - * @return the default allocation manager type. - */ - @SuppressWarnings("nullness:argument") // enum types valueOf are implicitly non-null - @VisibleForTesting - public static AllocationManagerType getDefaultAllocationManagerType() { - AllocationManagerType ret = AllocationManagerType.Unknown; - - try { - String envValue = System.getenv(ALLOCATION_MANAGER_TYPE_ENV_NAME); - ret = AllocationManagerType.valueOf(envValue); - } catch (IllegalArgumentException | NullPointerException e) { - // ignore the exception, and make the allocation manager type remain unchanged - } - - // system property takes precedence - try { - String propValue = System.getProperty(ALLOCATION_MANAGER_TYPE_PROPERTY_NAME); - ret = AllocationManagerType.valueOf(propValue); - } catch (IllegalArgumentException | NullPointerException e) { - // ignore the exception, and make the allocation manager type remain unchanged - } - return ret; - } - - static AllocationManager.Factory getDefaultAllocationManagerFactory() { - if (DEFAULT_ALLOCATION_MANAGER_FACTORY != null) { - return DEFAULT_ALLOCATION_MANAGER_FACTORY; - } - AllocationManagerType type = getDefaultAllocationManagerType(); - switch (type) { - case Netty: - DEFAULT_ALLOCATION_MANAGER_FACTORY = getNettyFactory(); - break; - case Unsafe: - DEFAULT_ALLOCATION_MANAGER_FACTORY = getUnsafeFactory(); - break; - case Unknown: - LOGGER.info("allocation manager type not specified, using netty as the default type"); - DEFAULT_ALLOCATION_MANAGER_FACTORY = getFactory(CheckAllocator.check()); - break; - default: - throw new IllegalStateException("Unknown allocation manager type: " + type); - } - return DEFAULT_ALLOCATION_MANAGER_FACTORY; - } - - @SuppressWarnings({"nullness:argument", "nullness:return"}) - // incompatible argument for parameter obj of Field.get - // Static member qualifying type may not be annotated - private static AllocationManager.Factory getFactory(String clazzName) { - try { - Field field = Class.forName(clazzName).getDeclaredField("FACTORY"); - field.setAccessible(true); - return (AllocationManager.Factory) field.get(null); - } catch (Exception e) { - throw new RuntimeException("Unable to instantiate Allocation Manager for " + clazzName, e); - } - } - - private static AllocationManager.Factory getUnsafeFactory() { - try { - return getFactory("org.apache.arrow.memory.unsafe.UnsafeAllocationManager"); - } catch (RuntimeException e) { - throw new RuntimeException( - "Please add arrow-memory-unsafe to your classpath," - + " No DefaultAllocationManager found to instantiate an UnsafeAllocationManager", - e); - } - } - - private static AllocationManager.Factory getNettyFactory() { - try { - return getFactory("org.apache.arrow.memory.netty.NettyAllocationManager"); - } catch (RuntimeException e) { - throw new RuntimeException( - "Please add arrow-memory-netty to your classpath," - + " No DefaultAllocationManager found to instantiate an NettyAllocationManager", - e); - } - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ForeignAllocation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ForeignAllocation.java deleted file mode 100644 index 536275b724070..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ForeignAllocation.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * EXPERIMENTAL: a memory allocation that does not come from a BufferAllocator, but rather an - * outside source (like JNI). - * - *

    To use this, subclass this class and implement {@link #release0()} to free the allocation. - */ -public abstract class ForeignAllocation { - private final long memoryAddress; - private final long size; - - /** - * Create a new AllocationManager representing an imported buffer. - * - * @param size The buffer size. - * @param memoryAddress The buffer address. - */ - protected ForeignAllocation(long size, long memoryAddress) { - this.memoryAddress = memoryAddress; - this.size = size; - } - - /** Get the size of this allocation. */ - public long getSize() { - return size; - } - - /** Get the address of this allocation. */ - protected long memoryAddress() { - return memoryAddress; - } - - /** Free this allocation. Will only be called once. */ - protected abstract void release0(); -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ForeignAllocationManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ForeignAllocationManager.java deleted file mode 100644 index 7b66e0c8c2fd2..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ForeignAllocationManager.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** An AllocationManager wrapping a ForeignAllocation. */ -class ForeignAllocationManager extends AllocationManager { - private final ForeignAllocation allocation; - - protected ForeignAllocationManager( - BufferAllocator accountingAllocator, ForeignAllocation allocation) { - super(accountingAllocator); - this.allocation = allocation; - } - - @Override - public long getSize() { - return allocation.getSize(); - } - - @Override - protected long memoryAddress() { - return allocation.memoryAddress(); - } - - @Override - protected void release0() { - allocation.release0(); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java deleted file mode 100644 index d40b7ea7c524f..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.util.VisibleForTesting; -import org.checkerframework.checker.initialization.qual.Initialized; -import org.checkerframework.checker.initialization.qual.UnderInitialization; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * Highly specialized IdentityHashMap that implements only partial Map APIs. It incurs low initial - * cost (just two elements by default). It assumes Value includes the Key - - * Implements @ValueWithKeyIncluded iface that provides "getKey" method. - * - * @param Key type - * @param Value type - */ -public class LowCostIdentityHashMap> { - - /* - * The internal data structure to hold values. - */ - private @Nullable Object[] elementData; // elementData[index] = null; - - /* Actual number of values. */ - private int size; - - /* - * maximum number of elements that can be put in this map before having to - * rehash. - */ - private int threshold; - - private static final int DEFAULT_MIN_SIZE = 1; - - /* Default load factor of 0.75; */ - private static final int LOAD_FACTOR = 7500; - - /** Creates a Map with default expected maximum size. */ - public LowCostIdentityHashMap() { - this(DEFAULT_MIN_SIZE); - } - - /** - * Creates a Map with the specified maximum size parameter. - * - * @param maxSize The estimated maximum number of entries that will be put in this map. - */ - public LowCostIdentityHashMap(int maxSize) { - if (maxSize >= 0) { - this.size = 0; - threshold = getThreshold(maxSize); - elementData = newElementArrayUnderInitialized(computeElementArraySize()); - } else { - throw new IllegalArgumentException(); - } - } - - private int getThreshold(@UnderInitialization LowCostIdentityHashMap this, int maxSize) { - // assign the threshold to maxSize initially, this will change to a - // higher value if rehashing occurs. - return maxSize > 2 ? maxSize : 2; - } - - private int computeElementArraySize(@UnderInitialization LowCostIdentityHashMap this) { - int arraySize = (int) (((long) threshold * 10000) / LOAD_FACTOR); - // ensure arraySize is positive, the above cast from long to int type - // leads to overflow and negative arraySize if threshold is too big - return arraySize < 0 ? -arraySize : arraySize; - } - - /** - * Create a new element array. - * - * @param s the number of elements - * @return Reference to the element array - */ - private Object[] newElementArrayInitialized( - @Initialized LowCostIdentityHashMap this, int s) { - return new Object[s]; - } - - /** - * Create a new element array. - * - * @param s the number of elements - * @return Reference to the element array - */ - private Object[] newElementArrayUnderInitialized( - @UnderInitialization LowCostIdentityHashMap this, int s) { - return new Object[s]; - } - - /** - * Removes all elements from this map, leaving it empty. - * - * @see #isEmpty() - * @see #size() - */ - public void clear() { - size = 0; - for (int i = 0; i < elementData.length; i++) { - elementData[i] = null; - } - } - - /** - * Returns whether this map contains the specified key. - * - * @param key the key to search for. - * @return {@code true} if this map contains the specified key, {@code false} otherwise. - */ - public boolean containsKey(K key) { - Preconditions.checkNotNull(key); - - int index = findIndex(key, elementData); - return (elementData[index] == null) ? false : ((V) elementData[index]).getKey() == key; - } - - /** - * Returns whether this map contains the specified value. - * - * @param value the value to search for. - * @return {@code true} if this map contains the specified value, {@code false} otherwise. - */ - public boolean containsValue(V value) { - Preconditions.checkNotNull(value); - - for (int i = 0; i < elementData.length; i++) { - if (elementData[i] == value) { - return true; - } - } - return false; - } - - /** - * Returns the value of the mapping with the specified key. - * - * @param key the key. - * @return the value of the mapping with the specified key. - */ - public @Nullable V get(K key) { - Preconditions.checkNotNull(key); - - int index = findIndex(key, elementData); - - return (elementData[index] == null) - ? null - : (((V) elementData[index]).getKey() == key) ? (V) elementData[index] : null; - } - - /** - * Returns the index where the key is found at, or the index of the next empty spot if the key is - * not found in this table. - */ - @VisibleForTesting - int findIndex(@Nullable Object key, @Nullable Object[] array) { - int length = array.length; - int index = getModuloHash(key, length); - int last = (index + length - 1) % length; - while (index != last) { - if ((array[index] == null) || ((V) array[index]).getKey() == key) { - /* - * Found the key, or the next empty spot (which means key is not - * in the table) - */ - break; - } - index = (index + 1) % length; - } - return index; - } - - @VisibleForTesting - static int getModuloHash(@Nullable Object key, int length) { - return ((System.identityHashCode(key) & 0x7FFFFFFF) % length); - } - - /** - * Maps the specified key to the specified value. - * - * @param value the value. - * @return the value of any previous mapping with the specified key or {@code null} if there was - * no such mapping. - */ - public V put(V value) { - Preconditions.checkNotNull(value); - K key = value.getKey(); - Preconditions.checkNotNull(key); - - int index = findIndex(key, elementData); - - // if the key doesn't exist in the table - if (elementData[index] == null || ((V) elementData[index]).getKey() != key) { - if (++size > threshold) { - rehash(); - index = findIndex(key, elementData); - } - - // insert the key and assign the value to null initially - elementData[index] = null; - } - - // insert value to where it needs to go, return the old value - Object result = elementData[index]; - elementData[index] = value; - - return (V) result; - } - - @VisibleForTesting - void rehash() { - int newlength = elementData.length * 15 / 10; - if (newlength == 0) { - newlength = 1; - } - @Nullable Object[] newData = newElementArrayInitialized(newlength); - for (int i = 0; i < elementData.length; i++) { - Object key = (elementData[i] == null) ? null : ((V) elementData[i]).getKey(); - if (key != null) { - // if not empty - int index = findIndex(key, newData); - newData[index] = elementData[i]; - } - } - elementData = newData; - computeMaxSize(); - } - - private void computeMaxSize() { - threshold = (int) ((long) elementData.length * LOAD_FACTOR / 10000); - } - - /** - * Removes the mapping with the specified key from this map. - * - * @param key the key of the mapping to remove. - * @return the value of the removed mapping, or {@code null} if no mapping for the specified key - * was found. - */ - public @Nullable V remove(K key) { - Preconditions.checkNotNull(key); - - boolean hashedOk; - int index; - int next; - int hash; - Object result; - Object object; - index = next = findIndex(key, elementData); - - if (elementData[index] == null || ((V) elementData[index]).getKey() != key) { - return null; - } - - // store the value for this key - result = elementData[index]; - // clear value to allow movement of the rest of the elements - elementData[index] = null; - size--; - - // shift the following elements up if needed - // until we reach an empty spot - int length = elementData.length; - while (true) { - next = (next + 1) % length; - object = elementData[next]; - if (object == null) { - break; - } - - hash = getModuloHash(((V) object).getKey(), length); - hashedOk = hash > index; - if (next < index) { - hashedOk = hashedOk || (hash <= next); - } else { - hashedOk = hashedOk && (hash <= next); - } - if (!hashedOk) { - elementData[index] = object; - index = next; - elementData[index] = null; - } - } - return (V) result; - } - - /** - * Returns whether this Map has no elements. - * - * @return {@code true} if this Map has no elements, {@code false} otherwise. - * @see #size() - */ - public boolean isEmpty() { - return size == 0; - } - - /** - * Returns the number of mappings in this Map. - * - * @return the number of mappings in this Map. - */ - public int size() { - return size; - } - - /** - * Special API to return next value - substitute of regular Map.values.iterator().next(). - * - * @return next available value or null if none available - */ - public @Nullable V getNextValue() { - for (int i = 0; i < elementData.length; i++) { - if (elementData[i] != null) { - return (V) elementData[i]; - } - } - return null; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OutOfMemoryException.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OutOfMemoryException.java deleted file mode 100644 index 7a0f7f32107e2..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OutOfMemoryException.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.util.Optional; - -/** - * Indicates memory could not be allocated for Arrow buffers. - * - *

    This is different from {@linkplain OutOfMemoryError} which indicates the JVM is out of memory. - * This error indicates that static limit of one of Arrow's allocators (e.g. {@linkplain - * BaseAllocator}) has been exceeded. - */ -public class OutOfMemoryException extends RuntimeException { - - static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(OutOfMemoryException.class); - private static final long serialVersionUID = -6858052345185793382L; - private Optional outcomeDetails = Optional.empty(); - - public OutOfMemoryException() { - super(); - } - - public OutOfMemoryException( - String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } - - public OutOfMemoryException(String message, Throwable cause) { - super(message, cause); - } - - public OutOfMemoryException(String message) { - super(message); - } - - public OutOfMemoryException(String message, Optional details) { - super(message); - this.outcomeDetails = details; - } - - public OutOfMemoryException(Throwable cause) { - super(cause); - } - - public Optional getOutcomeDetails() { - return outcomeDetails; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferNOOP.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferNOOP.java deleted file mode 100644 index 6b9e029719d0c..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferNOOP.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** An {@link OwnershipTransferResult} indicating no transfer needed. */ -public class OwnershipTransferNOOP implements OwnershipTransferResult { - private final ArrowBuf buffer; - - OwnershipTransferNOOP(final ArrowBuf buf) { - this.buffer = buf; - } - - @Override - public ArrowBuf getTransferredBuffer() { - return buffer; - } - - @Override - public boolean getAllocationFit() { - return true; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferResult.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferResult.java deleted file mode 100644 index 04c223aa18f83..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/OwnershipTransferResult.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** The result of transferring an {@link ArrowBuf} between {@linkplain BufferAllocator}s. */ -public interface OwnershipTransferResult { - - boolean getAllocationFit(); - - ArrowBuf getTransferredBuffer(); -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java deleted file mode 100644 index 48b937453f227..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * ReferenceManager is the reference count for one or more allocations. - * - *

    In order to integrate with the core {@link BufferAllocator} implementation, the allocation - * itself should be represented by an {@link AllocationManager}, though this is not required by the - * API. - */ -public interface ReferenceManager { - - /** - * Return the reference count. - * - * @return reference count - */ - int getRefCount(); - - /** - * Decrement this reference manager's reference count by 1 for the associated underlying memory. - * If the reference count drops to 0, it implies that ArrowBufs managed by this reference manager - * no longer need access to the underlying memory - * - * @return true if ref count has dropped to 0, false otherwise - */ - boolean release(); - - /** - * Decrement this reference manager's reference count for the associated underlying memory. If the - * reference count drops to 0, it implies that ArrowBufs managed by this reference manager no - * longer need access to the underlying memory - * - * @param decrement the count to decrease the reference count by - * @return the new reference count - */ - boolean release(int decrement); - - /** - * Increment this reference manager's reference count by 1 for the associated underlying memory. - */ - void retain(); - - /** - * Increment this reference manager's reference count by a given amount for the associated - * underlying memory. - * - * @param increment the count to increase the reference count by - */ - void retain(int increment); - - /** - * Create a new ArrowBuf that is associated with an alternative allocator for the purposes of - * memory ownership and accounting. This has no impact on the reference counting for the current - * ArrowBuf except in the situation where the passed in Allocator is the same as the current - * buffer. This operation has no impact on the reference count of this ArrowBuf. The newly created - * ArrowBuf with either have a reference count of 1 (in the case that this is the first time this - * memory is being associated with the target allocator or in other words allocation manager - * currently doesn't hold a mapping for the target allocator) or the current value of the - * reference count for the target allocator-reference manager combination + 1 in the case that the - * provided allocator already had an association to this underlying memory. - * - *

    The underlying allocation ({@link AllocationManager}) will not be copied. - * - * @param srcBuffer source ArrowBuf - * @param targetAllocator The target allocator to create an association with. - * @return A new ArrowBuf which shares the same underlying memory as this ArrowBuf. - */ - ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator); - - /** - * Derive a new ArrowBuf from a given source ArrowBuf. The new derived ArrowBuf will share the - * same reference count as rest of the ArrowBufs associated with this reference manager. - * - * @param sourceBuffer source ArrowBuf - * @param index index (relative to source ArrowBuf) new ArrowBuf should be derived from - * @param length length (bytes) of data in underlying memory that derived buffer will have access - * to in underlying memory - * @return derived buffer - */ - ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length); - - /** - * Duplicate the memory accounting ownership of the backing allocation of the given ArrowBuf in - * another allocator. This will generate a new ArrowBuf that carries an association with the same - * underlying memory ({@link AllocationManager}s) as the given ArrowBuf. - * - * @param sourceBuffer source ArrowBuf - * @param targetAllocator The target allocator to create an association with - * @return {@link OwnershipTransferResult} with info on transfer result and new buffer - */ - OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator); - - /** - * Get the buffer allocator associated with this reference manager. - * - * @return buffer allocator. - */ - BufferAllocator getAllocator(); - - /** - * Total size (in bytes) of memory underlying this reference manager. - * - * @return Size (in bytes) of the memory chunk. - */ - long getSize(); - - /** - * Get the total accounted size (in bytes). - * - * @return accounted size. - */ - long getAccountedSize(); - - String NO_OP_ERROR_MESSAGE = "Operation not supported on NO_OP Reference Manager"; - - // currently used for empty ArrowBufs - ReferenceManager NO_OP = - new ReferenceManager() { - @Override - public int getRefCount() { - return 1; - } - - @Override - public boolean release() { - return false; - } - - @Override - public boolean release(int decrement) { - return false; - } - - @Override - public void retain() {} - - @Override - public void retain(int increment) {} - - @Override - public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) { - return srcBuffer; - } - - @Override - public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) { - return sourceBuffer; - } - - @Override - public OwnershipTransferResult transferOwnership( - ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { - return new OwnershipTransferNOOP(sourceBuffer); - } - - @Override - public BufferAllocator getAllocator() { - return new RootAllocator(0); - } - - @Override - public long getSize() { - return 0L; - } - - @Override - public long getAccountedSize() { - return 0L; - } - }; -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java deleted file mode 100644 index 85359a0febba5..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * A lightweight, automatically expanding container for holding byte data. - * - * @param The type of the underlying buffer. - */ -public interface ReusableBuffer { - /** - * Get the number of valid bytes in the data. - * - * @return the number of valid bytes in the data - */ - long getLength(); - - /** Get the buffer backing this ReusableBuffer. */ - T getBuffer(); - - /** - * Set the buffer to the contents of the given ArrowBuf. The internal buffer must resize if it - * cannot fit the contents of the data. - * - * @param srcBytes the data to copy from - * @param start the first position of the new data - * @param len the number of bytes of the new data - */ - void set(ArrowBuf srcBytes, long start, long len); - - void set(byte[] srcBytes, long start, long len); -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/RootAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/RootAllocator.java deleted file mode 100644 index fcd1be6856937..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/RootAllocator.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import org.apache.arrow.memory.rounding.DefaultRoundingPolicy; -import org.apache.arrow.memory.rounding.RoundingPolicy; -import org.apache.arrow.util.VisibleForTesting; - -/** - * A root allocator for using direct memory for Arrow Vectors/Arrays. Supports creating a tree of - * descendant child allocators to facilitate better instrumentation of memory allocations. - */ -public class RootAllocator extends BaseAllocator { - - public RootAllocator() { - this(AllocationListener.NOOP, Long.MAX_VALUE); - } - - public RootAllocator(final long limit) { - this(AllocationListener.NOOP, limit); - } - - public RootAllocator(final AllocationListener listener, final long limit) { - // todo fix DefaultRoundingPolicy when using Netty - this(listener, limit, DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY); - } - - /** - * Constructor. - * - * @param listener the allocation listener - * @param limit max allocation size in bytes - * @param roundingPolicy the policy for rounding the buffer size - */ - public RootAllocator( - final AllocationListener listener, final long limit, RoundingPolicy roundingPolicy) { - this( - configBuilder() - .listener(listener) - .maxAllocation(limit) - .roundingPolicy(roundingPolicy) - .build()); - } - - public RootAllocator(Config config) { - super(null, "ROOT", config); - } - - /** Verify the accounting state of the allocation system. */ - @VisibleForTesting - public void verify() { - verifyAllocator(); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java deleted file mode 100644 index 0d480575ba81d..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -/** - * Helper interface to generify a value to be included in the map where key is part of the value. - * - * @param The type of the key. - */ -public interface ValueWithKeyIncluded { - K getKey(); -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/package-info.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/package-info.java deleted file mode 100644 index de9c82ae56309..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/package-info.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Memory Allocation, Accounting and Management. See the Arrow Java documentation for details: Memory Management - */ -package org.apache.arrow.memory; diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java deleted file mode 100644 index 90e8a1d5eca77..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.rounding; - -import org.apache.arrow.memory.util.CommonUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The default rounding policy. That is, if the requested size is within the chunk size, the rounded - * size will be the next power of two. Otherwise, the rounded size will be identical to the - * requested size. - */ -public class DefaultRoundingPolicy implements RoundingPolicy { - private static final Logger logger = LoggerFactory.getLogger(DefaultRoundingPolicy.class); - public final long chunkSize; - - /** - * The variables here and the static block calculates the DEFAULT_CHUNK_SIZE. - * - *

    It was copied from {@link io.netty.buffer.PooledByteBufAllocator}. - */ - private static final long MIN_PAGE_SIZE = 4096; - - private static final long MAX_CHUNK_SIZE = ((long) Integer.MAX_VALUE + 1) / 2; - private static final long DEFAULT_CHUNK_SIZE; - - static { - long defaultPageSize = Long.getLong("org.apache.memory.allocator.pageSize", 8192); - try { - validateAndCalculatePageShifts(defaultPageSize); - } catch (Throwable t) { - defaultPageSize = 8192; - } - - int defaultMaxOrder = Integer.getInteger("org.apache.memory.allocator.maxOrder", 11); - try { - validateAndCalculateChunkSize(defaultPageSize, defaultMaxOrder); - } catch (Throwable t) { - defaultMaxOrder = 11; - } - DEFAULT_CHUNK_SIZE = validateAndCalculateChunkSize(defaultPageSize, defaultMaxOrder); - if (logger.isDebugEnabled()) { - logger.debug("-Dorg.apache.memory.allocator.pageSize: {}", defaultPageSize); - logger.debug("-Dorg.apache.memory.allocator.maxOrder: {}", defaultMaxOrder); - } - } - - private static long validateAndCalculatePageShifts(long pageSize) { - if (pageSize < MIN_PAGE_SIZE) { - throw new IllegalArgumentException( - "pageSize: " + pageSize + " (expected: " + MIN_PAGE_SIZE + ")"); - } - - if ((pageSize & pageSize - 1) != 0) { - throw new IllegalArgumentException("pageSize: " + pageSize + " (expected: power of 2)"); - } - - // Logarithm base 2. At this point we know that pageSize is a power of two. - return Long.SIZE - 1L - Long.numberOfLeadingZeros(pageSize); - } - - private static long validateAndCalculateChunkSize(long pageSize, int maxOrder) { - if (maxOrder > 14) { - throw new IllegalArgumentException("maxOrder: " + maxOrder + " (expected: 0-14)"); - } - - // Ensure the resulting chunkSize does not overflow. - long chunkSize = pageSize; - for (long i = maxOrder; i > 0; i--) { - if (chunkSize > MAX_CHUNK_SIZE / 2) { - throw new IllegalArgumentException( - String.format( - "pageSize (%d) << maxOrder (%d) must not exceed %d", - pageSize, maxOrder, MAX_CHUNK_SIZE)); - } - chunkSize <<= 1; - } - return chunkSize; - } - - /** The singleton instance. */ - public static final DefaultRoundingPolicy DEFAULT_ROUNDING_POLICY = - new DefaultRoundingPolicy(DEFAULT_CHUNK_SIZE); - - private DefaultRoundingPolicy(long chunkSize) { - this.chunkSize = chunkSize; - } - - @Override - public long getRoundedSize(long requestSize) { - return requestSize < chunkSize ? CommonUtil.nextPowerOfTwo(requestSize) : requestSize; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/RoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/RoundingPolicy.java deleted file mode 100644 index 5270bce99a8da..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/RoundingPolicy.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.rounding; - -/** - * The policy for rounding the buffer size, to improve performance and avoid memory fragmentation. - * In particular, given a requested buffer size, the policy will determine the rounded buffer size. - */ -public interface RoundingPolicy { - long getRoundedSize(long requestSize); -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java deleted file mode 100644 index 89db736e6a0f9..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.rounding; - -import com.google.errorprone.annotations.InlineMe; -import org.apache.arrow.memory.util.LargeMemoryUtil; -import org.apache.arrow.util.Preconditions; - -/** The rounding policy that each buffer size must a multiple of the segment size. */ -public class SegmentRoundingPolicy implements RoundingPolicy { - - /** The minimal segment size. */ - public static final long MIN_SEGMENT_SIZE = 1024L; - - /** - * The segment size. It must be at least {@link SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, and be a - * power of 2. - */ - private long segmentSize; - - /** - * Constructor for the segment rounding policy. - * - * @param segmentSize the segment size. - * @throws IllegalArgumentException if the segment size is smaller than {@link - * SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2. - * @deprecated use {@link SegmentRoundingPolicy#SegmentRoundingPolicy(long)} instead. - */ - @Deprecated(forRemoval = true) - @InlineMe(replacement = "this((long) segmentSize)") - public SegmentRoundingPolicy(int segmentSize) { - this((long) segmentSize); - } - - /** - * Constructor for the segment rounding policy. - * - * @param segmentSize the segment size. - * @throws IllegalArgumentException if the segment size is smaller than {@link - * SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2. - */ - public SegmentRoundingPolicy(long segmentSize) { - Preconditions.checkArgument( - segmentSize >= MIN_SEGMENT_SIZE, - "The segment size cannot be smaller than %s", - MIN_SEGMENT_SIZE); - Preconditions.checkArgument( - (segmentSize & (segmentSize - 1)) == 0, "The segment size must be a power of 2"); - this.segmentSize = segmentSize; - } - - @Override - public long getRoundedSize(long requestSize) { - return (requestSize + (segmentSize - 1)) / segmentSize * segmentSize; - } - - @Deprecated(forRemoval = true) - public int getSegmentSize() { - return LargeMemoryUtil.checkedCastToInt(segmentSize); - } - - public long getSegmentSizeAsLong() { - return segmentSize; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java deleted file mode 100644 index 610ce04647ec7..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.util.Preconditions; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * Pointer to a memory region within an {@link ArrowBuf}. It will be used as the basis for - * calculating hash code within a vector, and equality determination. - */ -public final class ArrowBufPointer implements Comparable { - - /** The hash code when the arrow buffer is null. */ - public static final int NULL_HASH_CODE = 0; - - private @Nullable ArrowBuf buf; - - private long offset; - - private long length; - - private int hashCode = NULL_HASH_CODE; - - private final ArrowBufHasher hasher; - - /** A flag indicating if the underlying memory region has changed. */ - private boolean hashCodeChanged = false; - - /** The default constructor. */ - public ArrowBufPointer() { - this(SimpleHasher.INSTANCE); - } - - /** - * Constructs an arrow buffer pointer with the specified hasher. - * - * @param hasher the hasher to use. - */ - public ArrowBufPointer(ArrowBufHasher hasher) { - Preconditions.checkNotNull(hasher); - this.hasher = hasher; - this.buf = null; - } - - /** - * Constructs an Arrow buffer pointer. - * - * @param buf the underlying {@link ArrowBuf}, which can be null. - * @param offset the start off set of the memory region pointed to. - * @param length the length off set of the memory region pointed to. - */ - public ArrowBufPointer(ArrowBuf buf, long offset, long length) { - this(buf, offset, length, SimpleHasher.INSTANCE); - } - - /** - * Constructs an Arrow buffer pointer. - * - * @param buf the underlying {@link ArrowBuf}, which can be null. - * @param offset the start off set of the memory region pointed to. - * @param length the length off set of the memory region pointed to. - * @param hasher the hasher used to calculate the hash code. - */ - public ArrowBufPointer(ArrowBuf buf, long offset, long length, ArrowBufHasher hasher) { - Preconditions.checkNotNull(hasher); - this.hasher = hasher; - set(buf, offset, length); - } - - /** - * Sets this pointer. - * - * @param buf the underlying {@link ArrowBuf}, which can be null. - * @param offset the start off set of the memory region pointed to. - * @param length the length off set of the memory region pointed to. - */ - public void set(ArrowBuf buf, long offset, long length) { - this.buf = buf; - this.offset = offset; - this.length = length; - - hashCodeChanged = true; - } - - /** - * Gets the underlying buffer, or null if the underlying data is invalid or null. - * - * @return the underlying buffer, if any, or null if the underlying data is invalid or null. - */ - public @Nullable ArrowBuf getBuf() { - return buf; - } - - public long getOffset() { - return offset; - } - - public long getLength() { - return length; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - if (!hasher.equals(((ArrowBufPointer) o).hasher)) { - // note that the hasher is incorporated in equality determination - // this is to avoid problems in cases where two Arrow buffer pointers are not equal - // while having different hashers and equal hash codes. - return false; - } - - ArrowBufPointer other = (ArrowBufPointer) o; - if (buf == null || other.buf == null) { - if (buf == null && other.buf == null) { - return true; - } else { - return false; - } - } - - return ByteFunctionHelpers.equal( - buf, offset, offset + length, other.buf, other.offset, other.offset + other.length) - != 0; - } - - @Override - public int hashCode() { - if (!hashCodeChanged) { - return hashCode; - } - - // re-compute the hash code - if (buf == null) { - hashCode = NULL_HASH_CODE; - } else { - hashCode = hasher.hashCode(buf, offset, length); - } - - hashCodeChanged = false; - return hashCode; - } - - /** - * Compare two arrow buffer pointers. The comparison is based on lexicographic order. - * - * @param that the other pointer to compare. - * @return 0 if the two pointers are equal; a positive integer if this pointer is larger; a - * negative integer if this pointer is smaller. - */ - @Override - public int compareTo(ArrowBufPointer that) { - if (this.buf == null || that.buf == null) { - if (this.buf == null && that.buf == null) { - return 0; - } else { - // null is smaller - return this.buf == null ? -1 : 1; - } - } - - return ByteFunctionHelpers.compare( - this.buf, - this.offset, - this.offset + this.length, - that.buf, - that.offset, - that.offset + that.length); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AssertionUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AssertionUtil.java deleted file mode 100644 index c2712eb4776cd..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AssertionUtil.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -/** - * Utility class to that provides {@link #ASSERT_ENABLED} constant to determine if assertions are - * enabled. - */ -public class AssertionUtil { - - public static final boolean ASSERT_ENABLED; - static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AssertionUtil.class); - - static { - boolean isAssertEnabled = false; - assert isAssertEnabled = true; - ASSERT_ENABLED = isAssertEnabled; - } - - private AssertionUtil() {} - - public static boolean isAssertionsEnabled() { - return ASSERT_ENABLED; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AutoCloseableLock.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AutoCloseableLock.java deleted file mode 100644 index 961e772ac4896..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/AutoCloseableLock.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import java.util.concurrent.locks.Lock; - -/** Simple wrapper class that allows Locks to be released via a try-with-resources block. */ -public class AutoCloseableLock implements AutoCloseable { - - private final Lock lock; - - public AutoCloseableLock(Lock lock) { - this.lock = lock; - } - - public AutoCloseableLock open() { - lock.lock(); - return this; - } - - @Override - public void close() { - lock.unlock(); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java deleted file mode 100644 index 9243be399b6db..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import java.nio.ByteOrder; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BoundsChecking; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; - -/** Utility methods for memory comparison at a byte level. */ -public class ByteFunctionHelpers { - static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(ByteFunctionHelpers.class); - - private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; - - private ByteFunctionHelpers() {} - - /** - * Helper function to check for equality of bytes in two ArrowBufs. - * - * @param left Left ArrowBuf for comparison - * @param lStart start offset in the buffer - * @param lEnd end offset in the buffer - * @param right Right ArrowBuf for comparison - * @param rStart start offset in the buffer - * @param rEnd end offset in the buffer - * @return 1 if equals, 0 otherwise - */ - public static int equal( - final ArrowBuf left, long lStart, long lEnd, final ArrowBuf right, long rStart, long rEnd) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - left.checkBytes(lStart, lEnd); - right.checkBytes(rStart, rEnd); - } - return memEqual(left.memoryAddress(), lStart, lEnd, right.memoryAddress(), rStart, rEnd); - } - - private static int memEqual( - final long laddr, long lStart, long lEnd, final long raddr, long rStart, final long rEnd) { - - long n = lEnd - lStart; - if (n == rEnd - rStart) { - long lPos = laddr + lStart; - long rPos = raddr + rStart; - - while (n > 63) { - for (int x = 0; x < 8; x++) { - long leftLong = MemoryUtil.getLong(lPos); - long rightLong = MemoryUtil.getLong(rPos); - if (leftLong != rightLong) { - return 0; - } - lPos += 8; - rPos += 8; - } - n -= 64; - } - - while (n > 7) { - long leftLong = MemoryUtil.getLong(lPos); - long rightLong = MemoryUtil.getLong(rPos); - if (leftLong != rightLong) { - return 0; - } - lPos += 8; - rPos += 8; - n -= 8; - } - - if (n > 3) { - int leftInt = MemoryUtil.getInt(lPos); - int rightInt = MemoryUtil.getInt(rPos); - if (leftInt != rightInt) { - return 0; - } - lPos += 4; - rPos += 4; - n -= 4; - } - - while (n-- != 0) { - byte leftByte = MemoryUtil.getByte(lPos); - byte rightByte = MemoryUtil.getByte(rPos); - if (leftByte != rightByte) { - return 0; - } - lPos++; - rPos++; - } - return 1; - } else { - return 0; - } - } - - /** - * Helper function to compare a set of bytes in two ArrowBufs. - * - *

    Function will check data before completing in the case that - * - * @param left Left ArrowBuf to compare - * @param lStart start offset in the buffer - * @param lEnd end offset in the buffer - * @param right Right ArrowBuf to compare - * @param rStart start offset in the buffer - * @param rEnd end offset in the buffer - * @return 1 if left input is greater, -1 if left input is smaller, 0 otherwise - */ - public static int compare( - final ArrowBuf left, long lStart, long lEnd, final ArrowBuf right, long rStart, long rEnd) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - left.checkBytes(lStart, lEnd); - right.checkBytes(rStart, rEnd); - } - return memcmp(left.memoryAddress(), lStart, lEnd, right.memoryAddress(), rStart, rEnd); - } - - private static int memcmp( - final long laddr, long lStart, long lEnd, final long raddr, long rStart, final long rEnd) { - long lLen = lEnd - lStart; - long rLen = rEnd - rStart; - long n = Math.min(rLen, lLen); - long lPos = laddr + lStart; - long rPos = raddr + rStart; - - while (n > 63) { - for (int x = 0; x < 8; x++) { - long leftLong = MemoryUtil.getLong(lPos); - long rightLong = MemoryUtil.getLong(rPos); - if (leftLong != rightLong) { - if (LITTLE_ENDIAN) { - return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong)); - } else { - return unsignedLongCompare(leftLong, rightLong); - } - } - lPos += 8; - rPos += 8; - } - n -= 64; - } - - while (n > 7) { - long leftLong = MemoryUtil.getLong(lPos); - long rightLong = MemoryUtil.getLong(rPos); - if (leftLong != rightLong) { - if (LITTLE_ENDIAN) { - return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong)); - } else { - return unsignedLongCompare(leftLong, rightLong); - } - } - lPos += 8; - rPos += 8; - n -= 8; - } - - if (n > 3) { - int leftInt = MemoryUtil.getInt(lPos); - int rightInt = MemoryUtil.getInt(rPos); - if (leftInt != rightInt) { - if (LITTLE_ENDIAN) { - return unsignedIntCompare(Integer.reverseBytes(leftInt), Integer.reverseBytes(rightInt)); - } else { - return unsignedIntCompare(leftInt, rightInt); - } - } - lPos += 4; - rPos += 4; - n -= 4; - } - - while (n-- != 0) { - byte leftByte = MemoryUtil.getByte(lPos); - byte rightByte = MemoryUtil.getByte(rPos); - if (leftByte != rightByte) { - return ((leftByte & 0xFF) - (rightByte & 0xFF)) > 0 ? 1 : -1; - } - lPos++; - rPos++; - } - - if (lLen == rLen) { - return 0; - } - - return lLen > rLen ? 1 : -1; - } - - /** - * Helper function to compare a set of bytes in ArrowBuf to a ByteArray. - * - * @param left Left ArrowBuf for comparison purposes - * @param lStart start offset in the buffer - * @param lEnd end offset in the buffer - * @param right second input to be compared - * @param rStart start offset in the byte array - * @param rEnd end offset in the byte array - * @return 1 if left input is greater, -1 if left input is smaller, 0 otherwise - */ - public static int compare( - final ArrowBuf left, int lStart, int lEnd, final byte[] right, int rStart, final int rEnd) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - left.checkBytes(lStart, lEnd); - } - return memcmp(left.memoryAddress(), lStart, lEnd, right, rStart, rEnd); - } - - /** - * Compares the two specified {@code long} values, treating them as unsigned values between {@code - * 0} and {@code 2^64 - 1} inclusive. - * - * @param a the first unsigned {@code long} to compare - * @param b the second unsigned {@code long} to compare - * @return a negative value if {@code a} is less than {@code b}; a positive value if {@code a} is - * greater than {@code b}; or zero if they are equal - */ - public static int unsignedLongCompare(long a, long b) { - return Long.compare(a ^ Long.MIN_VALUE, b ^ Long.MIN_VALUE); - } - - public static int unsignedIntCompare(int a, int b) { - return Integer.compare(a ^ Integer.MIN_VALUE, b ^ Integer.MIN_VALUE); - } - - private static int memcmp( - final long laddr, int lStart, int lEnd, final byte[] right, int rStart, final int rEnd) { - int lLen = lEnd - lStart; - int rLen = rEnd - rStart; - int n = Math.min(rLen, lLen); - long lPos = laddr + lStart; - int rPos = rStart; - - while (n > 7) { - long leftLong = MemoryUtil.getLong(lPos); - long rightLong = MemoryUtil.getLong(right, rPos); - if (leftLong != rightLong) { - if (LITTLE_ENDIAN) { - return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong)); - } else { - return unsignedLongCompare(leftLong, rightLong); - } - } - lPos += 8; - rPos += 8; - n -= 8; - } - - if (n > 3) { - int leftInt = MemoryUtil.getInt(lPos); - int rightInt = MemoryUtil.getInt(right, rPos); - if (leftInt != rightInt) { - if (LITTLE_ENDIAN) { - return unsignedIntCompare(Integer.reverseBytes(leftInt), Integer.reverseBytes(rightInt)); - } else { - return unsignedIntCompare(leftInt, rightInt); - } - } - lPos += 4; - rPos += 4; - n -= 4; - } - - while (n-- != 0) { - byte leftByte = MemoryUtil.getByte(lPos); - byte rightByte = right[rPos]; - if (leftByte != rightByte) { - return ((leftByte & 0xFF) - (rightByte & 0xFF)) > 0 ? 1 : -1; - } - lPos++; - rPos++; - } - - if (lLen == rLen) { - return 0; - } - - return lLen > rLen ? 1 : -1; - } - - /** Compute hashCode with the given {@link ArrowBuf} and start/end index. */ - public static int hash(final ArrowBuf buf, long start, long end) { - - return hash(SimpleHasher.INSTANCE, buf, start, end); - } - - /** - * Compute hashCode with the given {@link ArrowBufHasher}, {@link ArrowBuf} and start/end index. - */ - public static final int hash(ArrowBufHasher hasher, final ArrowBuf buf, long start, long end) { - - if (hasher == null) { - hasher = SimpleHasher.INSTANCE; - } - - return hasher.hashCode(buf, start, end - start); - } - - /** Generate a new hashCode with the given current hashCode and new hashCode. */ - public static int combineHash(int currentHash, int newHash) { - return currentHash * 31 + newHash; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java deleted file mode 100644 index d8e7be7a95125..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import java.util.Arrays; - -/** Utilities and static methods needed for arrow-memory. */ -public final class CommonUtil { - - private CommonUtil() {} - - /** - * Rounds up the provided value to the nearest power of two. - * - * @param val An integer value. - * @return The closest power of two of that value. - */ - public static int nextPowerOfTwo(int val) { - if (val == 0 || val == 1) { - return val + 1; - } - int highestBit = Integer.highestOneBit(val); - if (highestBit == val) { - return val; - } else { - return highestBit << 1; - } - } - - /** - * Rounds up the provided value to the nearest power of two. - * - * @param val A long value. - * @return The closest power of two of that value. - */ - public static long nextPowerOfTwo(long val) { - if (val == 0 || val == 1) { - return val + 1; - } - long highestBit = Long.highestOneBit(val); - if (highestBit == val) { - return val; - } else { - return highestBit << 1; - } - } - - /** - * Specify an indentation amount when using a StringBuilder. - * - * @param sb StringBuilder to use - * @param indent Indentation amount - * @return the StringBuilder object with indentation applied - */ - public static StringBuilder indent(StringBuilder sb, int indent) { - final char[] indentation = new char[indent * 2]; - Arrays.fill(indentation, ' '); - sb.append(indentation); - return sb; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java deleted file mode 100644 index f19f50956eb16..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import org.apache.arrow.util.VisibleForTesting; - -/** - * Lifted from Apache Parquet MR project: - * https://github.com/apache/parquet-mr/blob/e87b80308869b77f914fcfd04364686e11158950/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java - * - *

      - * Changes made: - *
    • Modify the data type input from Parquet-MR Binary (toFloat(Binary b)) to Arrow Java short - * (toFloat(short b)) - *
    • Expose NAN and POSITIVE_INFINITY variables - *
    - * - * The class is a utility class to manipulate half-precision 16-bit IEEE 754 floating - * point data types (also called fp16 or binary16). A half-precision float can be created from or - * converted to single-precision floats, and is stored in a short data type. The IEEE 754 standard - * specifies an float16 as having the following format: - * - *
      - *
    • Sign bit: 1 bit - *
    • Exponent width: 5 bits - *
    • Significand: 10 bits - *
    - * - *

    The format is laid out as follows: - * - *

    - * 1   11111   1111111111
    - * ^   --^--   -----^----
    - * sign  |          |_______ significand
    - *       |
    - *      -- exponent
    - * 
    - * - * Half-precision floating points can be useful to save memory and/or bandwidth at the expense of - * range and precision when compared to single-precision floating points (float32). Ref: - * https://android.googlesource.com/platform/libcore/+/master/luni/src/main/java/libcore/util/FP16.java - */ -public class Float16 { - // Positive infinity of type half-precision float. - public static final short POSITIVE_INFINITY = (short) 0x7c00; - // A Not-a-Number representation of a half-precision float. - public static final short NaN = (short) 0x7e00; - // The bitmask to and a number with to obtain the sign bit. - private static final int SIGN_MASK = 0x8000; - // The offset to shift by to obtain the exponent bits. - private static final int EXPONENT_SHIFT = 10; - // The bitmask to and a number shifted by EXPONENT_SHIFT right, to obtain exponent bits. - private static final int SHIFTED_EXPONENT_MASK = 0x1f; - // The bitmask to and a number with to obtain significand bits. - private static final int SIGNIFICAND_MASK = 0x3ff; - // The offset of the exponent from the actual value. - private static final int EXPONENT_BIAS = 15; - // The offset to shift by to obtain the sign bit. - private static final int SIGN_SHIFT = 15; - // The bitmask to AND with to obtain exponent and significand bits. - private static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff; - - private static final int FP32_SIGN_SHIFT = 31; - private static final int FP32_EXPONENT_SHIFT = 23; - private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff; - private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; - private static final int FP32_EXPONENT_BIAS = 127; - private static final int FP32_QNAN_MASK = 0x400000; - private static final int FP32_DENORMAL_MAGIC = 126 << 23; - private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); - - /** - * Returns true if the specified half-precision float value represents a Not-a-Number, false - * otherwise. - * - * @param h A half-precision float value - * @return True if the value is a NaN, false otherwise - */ - @VisibleForTesting - public static boolean isNaN(short h) { - return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY; - } - - /** - * Compares the two specified half-precision float values. The following conditions apply during - * the comparison: - * - *
      - *
    • NaN is considered by this method to be equal to itself and greater than all other - * half-precision float values (including {@code #POSITIVE_INFINITY}) - *
    • POSITIVE_ZERO is considered by this method to be greater than NEGATIVE_ZERO. - *
    - * - * @param x The first half-precision float value to compare. - * @param y The second half-precision float value to compare - * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a value less than - * {@code 0} if {@code x} is numerically less than {@code y}, and a value greater than {@code - * 0} if {@code x} is numerically greater than {@code y} - */ - @VisibleForTesting - public static int compare(short x, short y) { - boolean xIsNaN = isNaN(x); - boolean yIsNaN = isNaN(y); - - if (!xIsNaN && !yIsNaN) { - int first = ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff); - int second = ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); - // Returns true if the first half-precision float value is less - // (smaller toward negative infinity) than the second half-precision float value. - if (first < second) { - return -1; - } - - // Returns true if the first half-precision float value is greater - // (larger toward positive infinity) than the second half-precision float value. - if (first > second) { - return 1; - } - } - - // Collapse NaNs, akin to halfToIntBits(), but we want to keep - // (signed) short value types to preserve the ordering of -0.0 - // and +0.0 - short xBits = xIsNaN ? NaN : x; - short yBits = yIsNaN ? NaN : y; - return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); - } - - /** - * Converts the specified half-precision float value into a single-precision float value. The - * following special cases are handled: If the input is NaN, the returned value is Float NaN. If - * the input is POSITIVE_INFINITY or NEGATIVE_INFINITY, the returned value is respectively Float - * POSITIVE_INFINITY or Float NEGATIVE_INFINITY. If the input is 0 (positive or negative), the - * returned value is +/-0.0f. Otherwise, the returned value is a normalized single-precision float - * value. - * - * @param b The half-precision float value to convert to single-precision - * @return A normalized single-precision float value - */ - @VisibleForTesting - public static float toFloat(short b) { - int bits = b & 0xffff; - int s = bits & SIGN_MASK; - int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; - int m = bits & SIGNIFICAND_MASK; - int outE = 0; - int outM = 0; - if (e == 0) { // Denormal or 0 - if (m != 0) { - // Convert denorm fp16 into normalized fp32 - float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m); - o -= FP32_DENORMAL_FLOAT; - return s == 0 ? o : -o; - } - } else { - outM = m << 13; - if (e == 0x1f) { // Infinite or NaN - outE = 0xff; - if (outM != 0) { // SNaNs are quieted - outM |= FP32_QNAN_MASK; - } - } else { - outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS; - } - } - int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM; - return Float.intBitsToFloat(out); - } - - /** - * Converts the specified single-precision float value into a half-precision float value. The - * following special cases are handled: - * - *

    If the input is NaN, the returned value is NaN. If the input is Float POSITIVE_INFINITY or - * Float NEGATIVE_INFINITY, the returned value is respectively POSITIVE_INFINITY or - * NEGATIVE_INFINITY. If the input is 0 (positive or negative), the returned value is - * POSITIVE_ZERO or NEGATIVE_ZERO. If the input is a less than MIN_VALUE, the returned value is - * flushed to POSITIVE_ZERO or NEGATIVE_ZERO. If the input is a less than MIN_NORMAL, the returned - * value is a denorm half-precision float. Otherwise, the returned value is rounded to the nearest - * representable half-precision float value. - * - * @param f The single-precision float value to convert to half-precision - * @return A half-precision float value - */ - public static short toFloat16(float f) { - int bits = Float.floatToRawIntBits(f); - int s = (bits >>> FP32_SIGN_SHIFT); - int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK; - int m = bits & FP32_SIGNIFICAND_MASK; - int outE = 0; - int outM = 0; - if (e == 0xff) { // Infinite or NaN - outE = 0x1f; - outM = m != 0 ? 0x200 : 0; - } else { - e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS; - if (e >= 0x1f) { // Overflow - outE = 0x1f; - } else if (e <= 0) { // Underflow - if (e < -10) { - // The absolute fp32 value is less than MIN_VALUE, flush to +/-0 - } else { - // The fp32 value is a normalized float less than MIN_NORMAL, - // we convert to a denorm fp16 - m = m | 0x800000; - int shift = 14 - e; - outM = m >> shift; - int lowm = m & ((1 << shift) - 1); - int hway = 1 << (shift - 1); - // if above halfway or exactly halfway and outM is odd - if (lowm + (outM & 1) > hway) { - // Round to nearest even - // Can overflow into exponent bit, which surprisingly is OK. - // This increment relies on the +outM in the return statement below - outM++; - } - } - } else { - outE = e; - outM = m >> 13; - // if above halfway or exactly halfway and outM is odd - if ((m & 0x1fff) + (outM & 0x1) > 0x1000) { - // Round to nearest even - // Can overflow into exponent bit, which surprisingly is OK. - // This increment relies on the +outM in the return statement below - outM++; - } - } - } - // The outM is added here as the +1 increments for outM above can - // cause an overflow in the exponent bit which is OK. - return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM); - } - - /** - * Returns a string representation of the specified half-precision float value. Calling this - * method is equivalent to calling Float.toString(toFloat(h)). See {@link - * Float#toString(float)} for more information on the format of the string representation. - * - * @param h A half-precision float value in binary little-endian format - * @return A string representation of the specified value - */ - @VisibleForTesting - public static String toFloatString(short h) { - return Float.toString(Float16.toFloat(h)); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java deleted file mode 100644 index 5b1bdd8b7244c..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import com.google.errorprone.annotations.FormatMethod; -import com.google.errorprone.annotations.FormatString; -import java.util.ArrayDeque; -import java.util.Arrays; -import java.util.Deque; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.slf4j.Logger; - -/** - * Utility class that can be used to log activity within a class for later logging and debugging. - * Supports recording events and recording the stack at the time they occur. - */ -public class HistoricalLog { - - private final Deque history = new ArrayDeque<>(); - private final String idString; // the formatted id string - private final int limit; // the limit on the number of events kept - private @Nullable Event firstEvent; // the first stack trace recorded - - /** - * Constructor. The format string will be formatted and have its arguments substituted at the time - * this is called. - * - * @param idStringFormat {@link String#format} format string that can be used to identify this - * object in a log. Including some kind of unique identifier that can be associated with the - * object instance is best. - * @param args for the format string, or nothing if none are required - */ - @FormatMethod - public HistoricalLog(@FormatString final String idStringFormat, Object... args) { - this(Integer.MAX_VALUE, idStringFormat, args); - } - - /** - * Constructor. The format string will be formatted and have its arguments substituted at the time - * this is called. - * - *

    This form supports the specification of a limit that will limit the number of historical - * entries kept (which keeps down the amount of memory used). With the limit, the first entry made - * is always kept (under the assumption that this is the creation site of the object, which is - * usually interesting), and then up to the limit number of entries are kept after that. Each time - * a new entry is made, the oldest that is not the first is dropped. - * - * @param limit the maximum number of historical entries that will be kept, not including the - * first entry made - * @param idStringFormat {@link String#format} format string that can be used to identify this - * object in a log. Including some kind of unique identifier that can be associated with the - * object instance is best. - * @param args for the format string, or nothing if none are required - */ - @FormatMethod - public HistoricalLog(final int limit, @FormatString final String idStringFormat, Object... args) { - this.limit = limit; - this.idString = String.format(idStringFormat, args); - this.firstEvent = null; - } - - /** - * Record an event. Automatically captures the stack trace at the time this is called. The format - * string will be formatted and have its arguments substituted at the time this is called. - * - * @param noteFormat {@link String#format} format string that describes the event - * @param args for the format string, or nothing if none are required - */ - @FormatMethod - public synchronized void recordEvent(@FormatString final String noteFormat, Object... args) { - final String note = String.format(noteFormat, args); - final Event event = new Event(note); - if (firstEvent == null) { - firstEvent = event; - } - if (history.size() == limit) { - history.removeFirst(); - } - history.add(event); - } - - /** - * Write the history of this object to the given {@link StringBuilder}. The history includes the - * identifying string provided at construction time, and all the recorded events with their stack - * traces. - * - * @param sb {@link StringBuilder} to write to - * @param includeStackTrace whether to include the stacktrace of each event in the history - */ - public void buildHistory(final StringBuilder sb, boolean includeStackTrace) { - buildHistory(sb, 0, includeStackTrace); - } - - /** - * Build the history and write it to sb. - * - * @param sb output - * @param indent starting indent (usually "") - * @param includeStackTrace whether to include the stacktrace of each event. - */ - public synchronized void buildHistory( - final StringBuilder sb, int indent, boolean includeStackTrace) { - final char[] indentation = new char[indent]; - final char[] innerIndentation = new char[indent + 2]; - Arrays.fill(indentation, ' '); - Arrays.fill(innerIndentation, ' '); - - sb.append(indentation).append("event log for: ").append(idString).append('\n'); - - if (firstEvent != null) { - long time = firstEvent.time; - String note = firstEvent.note; - final StackTrace stackTrace = firstEvent.stackTrace; - sb.append(innerIndentation).append(time).append(' ').append(note).append('\n'); - if (includeStackTrace) { - stackTrace.writeToBuilder(sb, indent + 2); - } - - for (final Event event : history) { - if (event == firstEvent) { - continue; - } - sb.append(innerIndentation) - .append(" ") - .append(event.time) - .append(' ') - .append(event.note) - .append('\n'); - - if (includeStackTrace) { - event.stackTrace.writeToBuilder(sb, indent + 2); - sb.append('\n'); - } - } - } - } - - /** - * Write the history of this object to the given {@link Logger}. The history includes the - * identifying string provided at construction time, and all the recorded events with their stack - * traces. - * - * @param logger {@link Logger} to write to - */ - public void logHistory(final Logger logger) { - final StringBuilder sb = new StringBuilder(); - buildHistory(sb, 0, true); - logger.debug(sb.toString()); - } - - private static class Event { - - private final String note; // the event text - private final StackTrace stackTrace; // where the event occurred - private final long time; - - public Event(final String note) { - this.note = note; - this.time = System.nanoTime(); - stackTrace = new StackTrace(); - } - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java deleted file mode 100644 index 5d6f11b1fa41f..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import org.apache.arrow.memory.BoundsChecking; - -/** Contains utilities for dealing with a 64-bit address base. */ -public final class LargeMemoryUtil { - - private LargeMemoryUtil() {} - - /** Casts length to an int, but raises an exception the value is outside the range of an int. */ - public static int checkedCastToInt(long length) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - return Math.toIntExact(length); - } - return (int) length; - } - - /** Returns a min(Integer.MAX_VALUE, length). */ - public static int capAtMaxInt(long length) { - return (int) Math.min(length, Integer.MAX_VALUE); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java deleted file mode 100644 index acf77547fbcdd..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import java.lang.reflect.Constructor; -import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.security.AccessController; -import java.security.PrivilegedAction; -import org.checkerframework.checker.nullness.qual.Nullable; -import sun.misc.Unsafe; - -/** Utilities for memory related operations. */ -public class MemoryUtil { - private static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(MemoryUtil.class); - - private static final @Nullable Constructor DIRECT_BUFFER_CONSTRUCTOR; - /** The unsafe object from which to access the off-heap memory. */ - private static final Unsafe UNSAFE; - - /** The start offset of array data relative to the start address of the array object. */ - private static final long BYTE_ARRAY_BASE_OFFSET; - - /** The offset of the address field with the {@link java.nio.ByteBuffer} object. */ - private static final long BYTE_BUFFER_ADDRESS_OFFSET; - - /** If the native byte order is little-endian. */ - public static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; - - // Java 1.8, 9, 11, 17, 21 becomes 1, 9, 11, 17, and 21. - @SuppressWarnings("StringSplitter") - private static final int majorVersion = - Integer.parseInt(System.getProperty("java.specification.version").split("\\D+")[0]); - - static { - try { - // try to get the unsafe object - final Object maybeUnsafe = - AccessController.doPrivileged( - new PrivilegedAction() { - @Override - @SuppressWarnings({"nullness:argument", "nullness:return"}) - // incompatible argument for parameter obj of Field.get - // incompatible types in return - public Object run() { - try { - final Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe"); - unsafeField.setAccessible(true); - return unsafeField.get(null); - } catch (Throwable e) { - return e; - } - } - }); - - if (maybeUnsafe instanceof Throwable) { - throw (Throwable) maybeUnsafe; - } - - UNSAFE = (Unsafe) maybeUnsafe; - - // get the offset of the data inside a byte array object - BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class); - - // get the offset of the address field in a java.nio.Buffer object - Field addressField = java.nio.Buffer.class.getDeclaredField("address"); - addressField.setAccessible(true); - BYTE_BUFFER_ADDRESS_OFFSET = UNSAFE.objectFieldOffset(addressField); - - Constructor directBufferConstructor; - long address = -1; - final ByteBuffer direct = ByteBuffer.allocateDirect(1); - try { - - final Object maybeDirectBufferConstructor = - AccessController.doPrivileged( - new PrivilegedAction() { - @Override - public Object run() { - try { - final Constructor constructor = - (majorVersion >= 21) - ? direct.getClass().getDeclaredConstructor(long.class, long.class) - : direct.getClass().getDeclaredConstructor(long.class, int.class); - constructor.setAccessible(true); - logger.debug("Constructor for direct buffer found and made accessible"); - return constructor; - } catch (NoSuchMethodException e) { - logger.debug("Cannot get constructor for direct buffer allocation", e); - return e; - } catch (SecurityException e) { - logger.debug("Cannot get constructor for direct buffer allocation", e); - return e; - } - } - }); - - if (maybeDirectBufferConstructor instanceof Constructor) { - address = UNSAFE.allocateMemory(1); - // try to use the constructor now - try { - ((Constructor) maybeDirectBufferConstructor).newInstance(address, 1); - directBufferConstructor = (Constructor) maybeDirectBufferConstructor; - logger.debug("direct buffer constructor: available"); - } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { - logger.warn("unable to instantiate a direct buffer via constructor", e); - directBufferConstructor = null; - } - } else { - logger.debug( - "direct buffer constructor: unavailable", (Throwable) maybeDirectBufferConstructor); - directBufferConstructor = null; - } - } finally { - if (address != -1) { - UNSAFE.freeMemory(address); - } - } - DIRECT_BUFFER_CONSTRUCTOR = directBufferConstructor; - } catch (Throwable e) { - // This exception will get swallowed, but it's necessary for the static analysis that ensures - // the static fields above get initialized - final RuntimeException failure = - new RuntimeException( - "Failed to initialize MemoryUtil. You must start Java with " - + "`--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED` " - + "(See https://arrow.apache.org/docs/java/install.html)", - e); - failure.printStackTrace(); - throw failure; - } - } - - /** - * Given a {@link ByteBuffer}, gets the address the underlying memory space. - * - * @param buf the byte buffer. - * @return address of the underlying memory. - */ - public static long getByteBufferAddress(ByteBuffer buf) { - return UNSAFE.getLong(buf, BYTE_BUFFER_ADDRESS_OFFSET); - } - - private MemoryUtil() {} - - /** Create nio byte buffer. */ - public static ByteBuffer directBuffer(long address, int capacity) { - if (DIRECT_BUFFER_CONSTRUCTOR != null) { - if (capacity < 0) { - throw new IllegalArgumentException("Capacity is negative, has to be positive or 0"); - } - try { - return (ByteBuffer) DIRECT_BUFFER_CONSTRUCTOR.newInstance(address, capacity); - } catch (Throwable cause) { - throw new Error(cause); - } - } - throw new UnsupportedOperationException( - "sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available"); - } - - @SuppressWarnings( - "nullness:argument") // to handle null assignment on third party dependency: Unsafe - private static void copyMemory( - @Nullable Object srcBase, - long srcOffset, - @Nullable Object destBase, - long destOffset, - long bytes) { - UNSAFE.copyMemory(srcBase, srcOffset, destBase, destOffset, bytes); - } - - public static void copyMemory(long srcAddress, long destAddress, long bytes) { - UNSAFE.copyMemory(srcAddress, destAddress, bytes); - } - - public static void copyToMemory(byte[] src, long srcIndex, long destAddress, long bytes) { - copyMemory(src, BYTE_ARRAY_BASE_OFFSET + srcIndex, null, destAddress, bytes); - } - - public static void copyFromMemory(long srcAddress, byte[] dest, long destIndex, long bytes) { - copyMemory(null, srcAddress, dest, BYTE_ARRAY_BASE_OFFSET + destIndex, bytes); - } - - public static byte getByte(long address) { - return UNSAFE.getByte(address); - } - - public static void putByte(long address, byte value) { - UNSAFE.putByte(address, value); - } - - public static short getShort(long address) { - return UNSAFE.getShort(address); - } - - public static void putShort(long address, short value) { - UNSAFE.putShort(address, value); - } - - public static int getInt(long address) { - return UNSAFE.getInt(address); - } - - public static void putInt(long address, int value) { - UNSAFE.putInt(address, value); - } - - public static long getLong(long address) { - return UNSAFE.getLong(address); - } - - public static void putLong(long address, long value) { - UNSAFE.putLong(address, value); - } - - public static void setMemory(long address, long bytes, byte value) { - UNSAFE.setMemory(address, bytes, value); - } - - public static int getInt(byte[] bytes, int index) { - return UNSAFE.getInt(bytes, BYTE_ARRAY_BASE_OFFSET + index); - } - - public static long getLong(byte[] bytes, int index) { - return UNSAFE.getLong(bytes, BYTE_ARRAY_BASE_OFFSET + index); - } - - public static long allocateMemory(long bytes) { - return UNSAFE.allocateMemory(bytes); - } - - public static void freeMemory(long address) { - UNSAFE.freeMemory(address); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java deleted file mode 100644 index 6edf7e3787ec8..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import java.util.Arrays; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Convenient way of obtaining and manipulating stack traces for debugging. */ -public class StackTrace { - - private final @Nullable StackTraceElement[] stackTraceElements; - - /** Constructor. Captures the current stack trace. */ - public StackTrace() { - final StackTraceElement[] stack = Thread.currentThread().getStackTrace(); - // Skip first two elements to remove getStackTrace/StackTrace. - stackTraceElements = Arrays.copyOfRange(stack, 2, stack.length); - } - - /** - * Write the stack trace to a StringBuilder. - * - * @param sb where to write it - * @param indent how many double spaces to indent each line - */ - public void writeToBuilder(final StringBuilder sb, final int indent) { - // create the indentation string - final char[] indentation = new char[indent * 2]; - Arrays.fill(indentation, ' '); - - // write the stack trace in standard Java format - for (StackTraceElement ste : stackTraceElements) { - if (ste != null) { - sb.append(indentation) - .append("at ") - .append(ste.getClassName()) - .append('.') - .append(ste.getMethodName()) - .append('(') - .append(ste.getFileName()) - .append(':') - .append(Integer.toString(ste.getLineNumber())) - .append(")\n"); - } - } - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - writeToBuilder(sb, 0); - return sb.toString(); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/ArrowBufHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/ArrowBufHasher.java deleted file mode 100644 index 883381b48558a..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/ArrowBufHasher.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util.hash; - -import org.apache.arrow.memory.ArrowBuf; - -/** - * Utility for calculating the hash code for a consecutive memory region. This class provides the - * basic framework for efficiently calculating the hash code. - * - *

    A default light-weight implementation is given in {@link SimpleHasher}. - */ -public interface ArrowBufHasher { - - /** - * Calculates the hash code for a memory region. - * - * @param address start address of the memory region. - * @param length length of the memory region. - * @return the hash code. - */ - int hashCode(long address, long length); - - /** - * Calculates the hash code for a memory region. - * - * @param buf the buffer for the memory region. - * @param offset offset within the buffer for the memory region. - * @param length length of the memory region. - * @return the hash code. - */ - int hashCode(ArrowBuf buf, long offset, long length); -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java deleted file mode 100644 index 7907018d0a815..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util.hash; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.MemoryUtil; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * Implementation of the Murmur hashing algorithm. Details of the algorithm can be found in - * https://en.wikipedia.org/wiki/MurmurHash - * - *

    Murmur hashing is computationally expensive, as it involves several integer multiplications. - * However, the produced hash codes have good quality in the sense that they are uniformly - * distributed in the universe. - * - *

    Therefore, this algorithm is suitable for scenarios where uniform hashing is desired (e.g. in - * an open addressing hash table/hash set). - */ -public class MurmurHasher implements ArrowBufHasher { - - private final int seed; - - /** Creates a default Murmur hasher, with seed 0. */ - public MurmurHasher() { - this(0); - } - - /** - * Creates a Murmur hasher. - * - * @param seed the seed for the hasher. - */ - public MurmurHasher(int seed) { - this.seed = seed; - } - - @Override - public int hashCode(long address, long length) { - return hashCode(address, length, seed); - } - - @Override - public int hashCode(ArrowBuf buf, long offset, long length) { - buf.checkBytes(offset, offset + length); - return hashCode(buf.memoryAddress() + offset, length); - } - - /** - * Calculates the hash code for a memory region. - * - * @param buf the buffer for the memory region. - * @param offset offset within the buffer for the memory region. - * @param length length of the memory region. - * @param seed the seed. - * @return the hash code. - */ - public static int hashCode(ArrowBuf buf, long offset, long length, int seed) { - buf.checkBytes(offset, offset + length); - return hashCode(buf.memoryAddress() + offset, length, seed); - } - - /** - * Calculates the hash code for a memory region. - * - * @param address start address of the memory region. - * @param length length of the memory region. - * @param seed the seed. - * @return the hash code. - */ - public static int hashCode(long address, long length, int seed) { - int index = 0; - int hash = seed; - while (index + 4 <= length) { - int intValue = MemoryUtil.getInt(address + index); - hash = combineHashCode(hash, intValue); - index += 4; - } - - if (index < length) { - // process remaining data as a integer in little endian - int intValue = 0; - for (long i = length - 1; i >= index; i--) { - intValue <<= 8; - intValue |= (MemoryUtil.getByte(address + i) & 0x000000ff); - index += 1; - } - hash = combineHashCode(hash, intValue); - } - return finalizeHashCode(hash, length); - } - - /** - * Combine the current hash code and a new int value to calculate a new hash code. - * - * @param currentHashCode the current hash code. - * @param intValue the new int value. - * @return the new hah code. - */ - public static int combineHashCode(int currentHashCode, int intValue) { - int c1 = 0xcc9e2d51; - int c2 = 0x1b873593; - int r1 = 15; - int r2 = 13; - int m = 5; - int n = 0xe6546b64; - - int k = intValue; - k = k * c1; - k = rotateLeft(k, r1); - k = k * c2; - - int hash = currentHashCode; - hash = hash ^ k; - hash = rotateLeft(hash, r2); - hash = hash * m + n; - - return hash; - } - - /** - * Finalizing the hash code. - * - * @param hashCode the current hash code. - * @param length the length of the memory region. - * @return the finalized hash code. - */ - public static int finalizeHashCode(int hashCode, long length) { - hashCode = hashCode ^ (int) length; - - hashCode = hashCode ^ (hashCode >>> 16); - hashCode = hashCode * 0x85ebca6b; - hashCode = hashCode ^ (hashCode >>> 13); - hashCode = hashCode * 0xc2b2ae35; - hashCode = hashCode ^ (hashCode >>> 16); - - return hashCode; - } - - private static int rotateLeft(int value, int count) { - return (value << count) | (value >>> (32 - count)); - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (!(o instanceof MurmurHasher)) { - return false; - } - MurmurHasher that = (MurmurHasher) o; - return seed == that.seed; - } - - @Override - public int hashCode() { - return seed; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java deleted file mode 100644 index 5c1384163e81e..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util.hash; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.MemoryUtil; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * A simple hasher that calculates the hash code of integers as is, and does not perform any - * finalization. So the computation is extremely efficient. - * - *

    This algorithm only provides the most basic semantics for the hash code. That is, if two - * objects are equal, they must have equal hash code. However, the quality of the produced hash code - * may not be good. In other words, the generated hash codes are far from being uniformly - * distributed in the universe. - * - *

    Therefore, this algorithm is suitable only for scenarios where the most basic semantics of the - * hash code is required (e.g. in scenarios that require fast and proactive data pruning) - * - *

    An object of this class is stateless, so it can be shared between threads. - */ -public class SimpleHasher implements ArrowBufHasher { - - public static SimpleHasher INSTANCE = new SimpleHasher(); - - protected SimpleHasher() {} - - /** - * Calculates the hash code for a memory region. - * - * @param address start address of the memory region. - * @param length length of the memory region. - * @return the hash code. - */ - @Override - public int hashCode(long address, long length) { - int hashValue = 0; - int index = 0; - while (index + 8 <= length) { - long longValue = MemoryUtil.getLong(address + index); - int longHash = getLongHashCode(longValue); - hashValue = combineHashCode(hashValue, longHash); - index += 8; - } - - if (index + 4 <= length) { - int intValue = MemoryUtil.getInt(address + index); - int intHash = intValue; - hashValue = combineHashCode(hashValue, intHash); - index += 4; - } - - while (index < length) { - byte byteValue = MemoryUtil.getByte(address + index); - int byteHash = byteValue; - hashValue = combineHashCode(hashValue, byteHash); - index += 1; - } - - return finalizeHashCode(hashValue); - } - - /** - * Calculates the hash code for a memory region. - * - * @param buf the buffer for the memory region. - * @param offset offset within the buffer for the memory region. - * @param length length of the memory region. - * @return the hash code. - */ - @Override - public int hashCode(ArrowBuf buf, long offset, long length) { - buf.checkBytes(offset, offset + length); - return hashCode(buf.memoryAddress() + offset, length); - } - - protected int combineHashCode(int currentHashCode, int newHashCode) { - return currentHashCode * 37 + newHashCode; - } - - protected int getLongHashCode(long longValue) { - return Long.hashCode(longValue); - } - - protected int finalizeHashCode(int hashCode) { - return hashCode; - } - - @Override - public int hashCode() { - return 123; - } - - @Override - public boolean equals(@Nullable Object obj) { - return obj != null && (obj instanceof SimpleHasher); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java deleted file mode 100644 index 3796fb94bcfd6..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/AutoCloseables.java +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.util; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.stream.StreamSupport; - -/** Utilities for AutoCloseable classes. */ -public final class AutoCloseables { - // Utility class. Should not be instantiated - private AutoCloseables() {} - - /** - * Returns a new {@link AutoCloseable} that calls {@link #close(Iterable)} on autoCloseables - * when close is called. - */ - public static AutoCloseable all(final Collection autoCloseables) { - return new AutoCloseable() { - @Override - public void close() throws Exception { - AutoCloseables.close(autoCloseables); - } - }; - } - - /** - * Closes all autoCloseables if not null and suppresses exceptions by adding them to t. - * - * @param t the throwable to add suppressed exception to - * @param autoCloseables the closeables to close - */ - public static void close(Throwable t, AutoCloseable... autoCloseables) { - close(t, Arrays.asList(autoCloseables)); - } - - /** - * Closes all autoCloseables if not null and suppresses exceptions by adding them to t. - * - * @param t the throwable to add suppressed exception to - * @param autoCloseables the closeables to close - */ - public static void close(Throwable t, Iterable autoCloseables) { - try { - close(autoCloseables); - } catch (Exception e) { - t.addSuppressed(e); - } - } - - /** - * Closes all autoCloseables if not null and suppresses subsequent exceptions if more than one. - * - * @param autoCloseables the closeables to close - */ - public static void close(AutoCloseable... autoCloseables) throws Exception { - close(Arrays.asList(autoCloseables)); - } - - /** - * Closes all autoCloseables if not null and suppresses subsequent exceptions if more than one. - * - * @param ac the closeables to close - */ - public static void close(Iterable ac) throws Exception { - // this method can be called on a single object if it implements Iterable - // like for example VectorContainer make sure we handle that properly - if (ac == null) { - return; - } else if (ac instanceof AutoCloseable) { - ((AutoCloseable) ac).close(); - return; - } - - Exception topLevelException = null; - for (AutoCloseable closeable : ac) { - try { - if (closeable != null) { - closeable.close(); - } - } catch (Exception e) { - if (topLevelException == null) { - topLevelException = e; - } else if (e != topLevelException) { - topLevelException.addSuppressed(e); - } - } - } - if (topLevelException != null) { - throw topLevelException; - } - } - - /** Calls {@link #close(Iterable)} on the flattened list of closeables. */ - @SafeVarargs - public static void close(Iterable... closeables) throws Exception { - close(flatten(closeables)); - } - - @SafeVarargs - private static Iterable flatten(Iterable... closeables) { - return new Iterable() { - // Cast from Iterable to Iterable is safe in this - // context - // since there's no modification of the original collection - @SuppressWarnings("unchecked") - @Override - public Iterator iterator() { - return Arrays.stream(closeables) - .flatMap( - (Iterable i) -> - StreamSupport.stream( - ((Iterable) i).spliterator(), /*parallel=*/ false)) - .iterator(); - } - }; - } - - /** Converts ac to a {@link Iterable} filtering out any null values. */ - public static Iterable iter(AutoCloseable... ac) { - if (ac.length == 0) { - return Collections.emptyList(); - } else { - final List nonNullAc = new ArrayList<>(); - for (AutoCloseable autoCloseable : ac) { - if (autoCloseable != null) { - nonNullAc.add(autoCloseable); - } - } - return nonNullAc; - } - } - - /** A closeable wrapper that will close the underlying closeables if a commit does not occur. */ - public static class RollbackCloseable implements AutoCloseable { - - private boolean commit = false; - private List closeables; - - public RollbackCloseable(AutoCloseable... closeables) { - this.closeables = new ArrayList<>(Arrays.asList(closeables)); - } - - public T add(T t) { - closeables.add(t); - return t; - } - - /** Add all of list to the rollback list. */ - public void addAll(AutoCloseable... list) { - closeables.addAll(Arrays.asList(list)); - } - - /** Add all of list to the rollback list. */ - public void addAll(Iterable list) { - for (AutoCloseable ac : list) { - closeables.add(ac); - } - } - - public void commit() { - commit = true; - } - - @Override - public void close() throws Exception { - if (!commit) { - AutoCloseables.close(closeables); - } - } - } - - /** Creates an {@link RollbackCloseable} from the given closeables. */ - public static RollbackCloseable rollbackable(AutoCloseable... closeables) { - return new RollbackCloseable(closeables); - } - - /** - * close() an {@link java.lang.AutoCloseable} without throwing a (checked) {@link - * java.lang.Exception}. This wraps the close() call with a try-catch that will rethrow an - * Exception wrapped with a {@link java.lang.RuntimeException}, providing a way to call close() - * without having to do the try-catch everywhere or propagate the Exception. - * - * @param autoCloseable the AutoCloseable to close; may be null - * @throws RuntimeException if an Exception occurs; the Exception is wrapped by the - * RuntimeException - */ - public static void closeNoChecked(final AutoCloseable autoCloseable) { - if (autoCloseable != null) { - try { - autoCloseable.close(); - } catch (final Exception e) { - throw new RuntimeException("Exception while closing: " + e.getMessage(), e); - } - } - } - - private static final AutoCloseable noOpAutocloseable = - new AutoCloseable() { - @Override - public void close() {} - }; - - /** - * Get an AutoCloseable that does nothing. - * - * @return A do-nothing autocloseable - */ - public static AutoCloseable noop() { - return noOpAutocloseable; - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java deleted file mode 100644 index 9293d87397f42..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.util; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Spliterator; -import java.util.Spliterators; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - -/** - * Utility methods for manipulating {@link java.util.Collections} and their - * subclasses/implementations. - */ -public final class Collections2 { - private Collections2() {} - - /** Creates a {@link List} from the elements remaining in iterator. */ - public static List toList(Iterator iterator) { - List target = new ArrayList<>(); - iterator.forEachRemaining(target::add); - return target; - } - - /** Converts the iterable into a new {@link List}. */ - public static List toList(Iterable iterable) { - if (iterable instanceof Collection) { - // If iterable is a collection, take advantage of it for a more efficient copy - return new ArrayList((Collection) iterable); - } - return toList(iterable.iterator()); - } - - /** Converts the iterable into a new immutable {@link List}. */ - public static List toImmutableList(Iterable iterable) { - return Collections.unmodifiableList(toList(iterable)); - } - - /** Copies the elements of map to a new unmodifiable map. */ - public static Map immutableMapCopy(Map map) { - return Collections.unmodifiableMap(new HashMap<>(map)); - } - - /** Copies the elements of list to a new unmodifiable list. */ - public static List immutableListCopy(List list) { - return Collections.unmodifiableList(new ArrayList<>(list)); - } - - /** Copies the values to a new unmodifiable list. */ - public static List asImmutableList(V... values) { - return Collections.unmodifiableList(Arrays.asList(values)); - } - - /** - * Creates a human readable string from the remaining elements in iterator. - * - *

    The output should be similar to {@code Arrays#toString(Object[])} - */ - public static String toString(Iterator iterator) { - return StreamSupport.stream( - Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) - .map(String::valueOf) - .collect(Collectors.joining(", ", "[", "]")); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java deleted file mode 100644 index 71e622b45f7a5..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java +++ /dev/null @@ -1,1218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.util; - -import org.checkerframework.dataflow.qual.AssertMethod; - -/** - * Static convenience methods that help a method or constructor check whether it was invoked - * correctly (whether its preconditions have been met). These methods generally accept a - * {@code boolean} expression which is expected to be {@code true} (or in the case of {@code - * checkNotNull}, an object reference which is expected to be non-null). When {@code false} (or - * {@code null}) is passed instead, the {@code Preconditions} method throws an unchecked exception, - * which helps the calling method communicate to its caller that that caller has made - * a mistake. Example: - * - *

    {@code
    - * /**
    - *  * Returns the positive square root of the given value.
    - *  *
    - *  * @throws IllegalArgumentException if the value is negative
    - *  *}{@code /
    - * public static double sqrt(double value) {
    - *   Preconditions.checkArgument(value >= 0.0, "negative value: %s", value);
    - *   // calculate the square root
    - * }
    - *
    - * void exampleBadCaller() {
    - *   double d = sqrt(-1.0);
    - * }
    - * }
    - * - *

    In this example, {@code checkArgument} throws an {@code IllegalArgumentException} to indicate - * that {@code exampleBadCaller} made an error in its call to {@code sqrt}. - * - *

    Warning about performance

    - * - *

    The goal of this class is to improve readability of code, but in some circumstances this may - * come at a significant performance cost. Remember that parameter values for message construction - * must all be computed eagerly, and autoboxing and varargs array creation may happen as well, even - * when the precondition check then succeeds (as it should almost always do in production). In some - * circumstances these wasted CPU cycles and allocations can add up to a real problem. - * Performance-sensitive precondition checks can always be converted to the customary form: - * - *

    {@code
    - * if (value < 0.0) {
    - *   throw new IllegalArgumentException("negative value: " + value);
    - * }
    - * }
    - * - *

    Other types of preconditions

    - * - *

    Not every type of precondition failure is supported by these methods. Continue to throw - * standard JDK exceptions such as {@link java.util.NoSuchElementException} or {@link - * UnsupportedOperationException} in the situations they are intended for. - * - *

    Non-preconditions

    - * - *

    It is of course possible to use the methods of this class to check for invalid conditions - * which are not the caller's fault. Doing so is not recommended because it is - * misleading to future readers of the code and of stack traces. See Conditional failures - * explained in the Guava User Guide for more advice. - * - *

    {@code java.util.Objects.requireNonNull()}

    - * - *

    Projects which use {@code com.google.common} should generally avoid the use of {@link - * java.util.Objects#requireNonNull(Object)}. Instead, use whichever of {@link - * #checkNotNull(Object)} or {@link Verify#verifyNotNull(Object)} is appropriate to the situation. - * (The same goes for the message-accepting overloads.) - * - *

    Only {@code %s} is supported

    - * - *

    In {@code Preconditions} error message template strings, only the {@code "%s"} specifier is - * supported, not the full range of {@link java.util.Formatter} specifiers. - * - *

    More information

    - * - *

    See the Guava User Guide on using {@code - * Preconditions}. - * - * @author Kevin Bourrillion - * @since 2.0 - */ -public final class Preconditions { - private Preconditions() {} - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - * @param expression a boolean expression - * @throws IllegalArgumentException if {@code expression} is false - */ - @AssertMethod - public static void checkArgument(boolean expression) { - if (!expression) { - throw new IllegalArgumentException(); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - * @param expression a boolean expression - * @param errorMessage the exception message to use if the check fails; will be converted to a - * string using {@link String#valueOf(Object)} - * @throws IllegalArgumentException if {@code expression} is false - */ - @AssertMethod - public static void checkArgument(boolean expression, Object errorMessage) { - if (!expression) { - throw new IllegalArgumentException(String.valueOf(errorMessage)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - * @param expression a boolean expression - * @param errorMessageTemplate a template for the exception message should the check fail. The - * message is formed by replacing each {@code %s} placeholder in the template with an - * argument. These are matched by position - the first {@code %s} gets {@code - * errorMessageArgs[0]}, etc. Unmatched arguments will be appended to the formatted message in - * square braces. Unmatched placeholders will be left as-is. - * @param errorMessageArgs the arguments to be substituted into the message template. Arguments - * are converted to strings using {@link String#valueOf(Object)}. - * @throws IllegalArgumentException if {@code expression} is false - * @throws NullPointerException if the check fails and either {@code errorMessageTemplate} or - * {@code errorMessageArgs} is null (don't let this happen) - */ - public static void checkArgument( - boolean expression, String errorMessageTemplate, Object... errorMessageArgs) { - if (!expression) { - throw new IllegalArgumentException(format(errorMessageTemplate, errorMessageArgs)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, char p1) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, int p1) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, long p1) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, Object p1) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, char p1, char p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, char p1, int p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, char p1, long p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, char p1, Object p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, int p1, char p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, int p1, int p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, int p1, long p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, int p1, Object p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, long p1, char p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, long p1, int p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, long p1, long p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, long p1, Object p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, Object p1, char p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, Object p1, int p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, Object p1, long p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument(boolean b, String errorMessageTemplate, Object p1, Object p2) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument( - boolean b, String errorMessageTemplate, Object p1, Object p2, Object p3) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2, p3)); - } - } - - /** - * Ensures the truth of an expression involving one or more parameters to the calling method. - * - *

    See {@link #checkArgument(boolean, String, Object...)} for details. - */ - public static void checkArgument( - boolean b, String errorMessageTemplate, Object p1, Object p2, Object p3, Object p4) { - if (!b) { - throw new IllegalArgumentException(format(errorMessageTemplate, p1, p2, p3, p4)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - * @param expression a boolean expression - * @throws IllegalStateException if {@code expression} is false - */ - @AssertMethod - public static void checkState(boolean expression) { - if (!expression) { - throw new IllegalStateException(); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - * @param expression a boolean expression - * @param errorMessage the exception message to use if the check fails; will be converted to a - * string using {@link String#valueOf(Object)} - * @throws IllegalStateException if {@code expression} is false - */ - @AssertMethod - public static void checkState(boolean expression, Object errorMessage) { - if (!expression) { - throw new IllegalStateException(String.valueOf(errorMessage)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - * @param expression a boolean expression - * @param errorMessageTemplate a template for the exception message should the check fail. The - * message is formed by replacing each {@code %s} placeholder in the template with an - * argument. These are matched by position - the first {@code %s} gets {@code - * errorMessageArgs[0]}, etc. Unmatched arguments will be appended to the formatted message in - * square braces. Unmatched placeholders will be left as-is. - * @param errorMessageArgs the arguments to be substituted into the message template. Arguments - * are converted to strings using {@link String#valueOf(Object)}. - * @throws IllegalStateException if {@code expression} is false - * @throws NullPointerException if the check fails and either {@code errorMessageTemplate} or - * {@code errorMessageArgs} is null (don't let this happen) - */ - public static void checkState( - boolean expression, String errorMessageTemplate, Object... errorMessageArgs) { - if (!expression) { - throw new IllegalStateException(format(errorMessageTemplate, errorMessageArgs)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, char p1) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, int p1) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, long p1) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, Object p1) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, char p1, char p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, char p1, int p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, char p1, long p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, char p1, Object p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, int p1, char p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, int p1, int p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, int p1, long p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, int p1, Object p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, long p1, char p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, long p1, int p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, long p1, long p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, long p1, Object p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, Object p1, char p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, Object p1, int p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, Object p1, long p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState(boolean b, String errorMessageTemplate, Object p1, Object p2) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState( - boolean b, String errorMessageTemplate, Object p1, Object p2, Object p3) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2, p3)); - } - } - - /** - * Ensures the truth of an expression involving the state of the calling instance, but not - * involving any parameters to the calling method. - * - *

    See {@link #checkState(boolean, String, Object...)} for details. - */ - public static void checkState( - boolean b, String errorMessageTemplate, Object p1, Object p2, Object p3, Object p4) { - if (!b) { - throw new IllegalStateException(format(errorMessageTemplate, p1, p2, p3, p4)); - } - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - * @param reference an object reference - * @return the non-null reference that was validated - * @throws NullPointerException if {@code reference} is null - */ - public static T checkNotNull(T reference) { - if (reference == null) { - throw new NullPointerException(); - } - return reference; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - * @param reference an object reference - * @param errorMessage the exception message to use if the check fails; will be converted to a - * string using {@link String#valueOf(Object)} - * @return the non-null reference that was validated - * @throws NullPointerException if {@code reference} is null - */ - public static T checkNotNull(T reference, Object errorMessage) { - if (reference == null) { - throw new NullPointerException(String.valueOf(errorMessage)); - } - return reference; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - * @param reference an object reference - * @param errorMessageTemplate a template for the exception message should the check fail. The - * message is formed by replacing each {@code %s} placeholder in the template with an - * argument. These are matched by position - the first {@code %s} gets {@code - * errorMessageArgs[0]}, etc. Unmatched arguments will be appended to the formatted message in - * square braces. Unmatched placeholders will be left as-is. - * @param errorMessageArgs the arguments to be substituted into the message template. Arguments - * are converted to strings using {@link String#valueOf(Object)}. - * @return the non-null reference that was validated - * @throws NullPointerException if {@code reference} is null - */ - public static T checkNotNull( - T reference, String errorMessageTemplate, Object... errorMessageArgs) { - if (reference == null) { - // If either of these parameters is null, the right thing happens anyway - throw new NullPointerException(format(errorMessageTemplate, errorMessageArgs)); - } - return reference; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, char p1) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, int p1) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, long p1) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, Object p1) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, char p1, char p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, char p1, int p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, char p1, long p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, char p1, Object p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, int p1, char p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, int p1, int p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, int p1, long p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, int p1, Object p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, long p1, char p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, long p1, int p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, long p1, long p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, long p1, Object p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, Object p1, char p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, Object p1, int p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, Object p1, long p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull(T obj, String errorMessageTemplate, Object p1, Object p2) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull( - T obj, String errorMessageTemplate, Object p1, Object p2, Object p3) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2, p3)); - } - return obj; - } - - /** - * Ensures that an object reference passed as a parameter to the calling method is not null. - * - *

    See {@link #checkNotNull(Object, String, Object...)} for details. - */ - public static T checkNotNull( - T obj, String errorMessageTemplate, Object p1, Object p2, Object p3, Object p4) { - if (obj == null) { - throw new NullPointerException(format(errorMessageTemplate, p1, p2, p3, p4)); - } - return obj; - } - - /* - * All recent hotspots (as of 2009) *really* like to have the natural code - * - * if (guardExpression) { - * throw new BadException(messageExpression); - * } - * - * refactored so that messageExpression is moved to a separate String-returning method. - * - * if (guardExpression) { - * throw new BadException(badMsg(...)); - * } - * - * The alternative natural refactorings into void or Exception-returning methods are much slower. - * This is a big deal - we're talking factors of 2-8 in microbenchmarks, not just 10-20%. (This is - * a hotspot optimizer bug, which should be fixed, but that's a separate, big project). - * - * The coding pattern above is heavily used in java.util, e.g. in ArrayList. There is a - * RangeCheckMicroBenchmark in the JDK that was used to test this. - * - * But the methods in this class want to throw different exceptions, depending on the args, so it - * appears that this pattern is not directly applicable. But we can use the ridiculous, devious - * trick of throwing an exception in the middle of the construction of another exception. Hotspot - * is fine with that. - */ - - /** - * Ensures that {@code index} specifies a valid element in an array, list or string of size - * {@code size}. An element index may range from zero, inclusive, to {@code size}, exclusive. - * - * @param index a user-supplied index identifying an element of an array, list or string - * @param size the size of that array, list or string - * @return the value of {@code index} - * @throws IndexOutOfBoundsException if {@code index} is negative or is not less than {@code size} - * @throws IllegalArgumentException if {@code size} is negative - */ - public static int checkElementIndex(int index, int size) { - return checkElementIndex(index, size, "index"); - } - - /** - * Ensures that {@code index} specifies a valid element in an array, list or string of size - * {@code size}. An element index may range from zero, inclusive, to {@code size}, exclusive. - * - * @param index a user-supplied index identifying an element of an array, list or string - * @param size the size of that array, list or string - * @param desc the text to use to describe this index in an error message - * @return the value of {@code index} - * @throws IndexOutOfBoundsException if {@code index} is negative or is not less than {@code size} - * @throws IllegalArgumentException if {@code size} is negative - */ - public static int checkElementIndex(int index, int size, String desc) { - // Carefully optimized for execution by hotspot (explanatory comment above) - if (index < 0 || index >= size) { - throw new IndexOutOfBoundsException(badElementIndex(index, size, desc)); - } - return index; - } - - private static String badElementIndex(int index, int size, String desc) { - if (index < 0) { - return format("%s (%s) must not be negative", desc, index); - } else if (size < 0) { - throw new IllegalArgumentException("negative size: " + size); - } else { // index >= size - return format("%s (%s) must be less than size (%s)", desc, index, size); - } - } - - /** - * Ensures that {@code index} specifies a valid position in an array, list or string of - * size {@code size}. A position index may range from zero to {@code size}, inclusive. - * - * @param index a user-supplied index identifying a position in an array, list or string - * @param size the size of that array, list or string - * @return the value of {@code index} - * @throws IndexOutOfBoundsException if {@code index} is negative or is greater than {@code size} - * @throws IllegalArgumentException if {@code size} is negative - */ - public static long checkPositionIndex(long index, long size) { - return checkPositionIndex(index, size, "index"); - } - - /** - * Ensures that {@code index} specifies a valid position in an array, list or string of - * size {@code size}. A position index may range from zero to {@code size}, inclusive. - * - * @param index a user-supplied index identifying a position in an array, list or string - * @param size the size of that array, list or string - * @param desc the text to use to describe this index in an error message - * @return the value of {@code index} - * @throws IndexOutOfBoundsException if {@code index} is negative or is greater than {@code size} - * @throws IllegalArgumentException if {@code size} is negative - */ - public static long checkPositionIndex(long index, long size, String desc) { - // Carefully optimized for execution by hotspot (explanatory comment above) - if (index < 0 || index > size) { - throw new IndexOutOfBoundsException(badPositionIndex(index, size, desc)); - } - return index; - } - - private static String badPositionIndex(long index, long size, String desc) { - if (index < 0) { - return format("%s (%s) must not be negative", desc, index); - } else if (size < 0) { - throw new IllegalArgumentException("negative size: " + size); - } else { // index > size - return format("%s (%s) must not be greater than size (%s)", desc, index, size); - } - } - - /** - * Ensures that {@code start} and {@code end} specify a valid positions in an array, list - * or string of size {@code size}, and are in order. A position index may range from zero to - * {@code size}, inclusive. - * - * @param start a user-supplied index identifying a starting position in an array, list or string - * @param end a user-supplied index identifying a ending position in an array, list or string - * @param size the size of that array, list or string - * @throws IndexOutOfBoundsException if either index is negative or is greater than {@code size}, - * or if {@code end} is less than {@code start} - * @throws IllegalArgumentException if {@code size} is negative - */ - public static void checkPositionIndexes(int start, int end, int size) { - // Carefully optimized for execution by hotspot (explanatory comment above) - if (start < 0 || end < start || end > size) { - throw new IndexOutOfBoundsException(badPositionIndexes(start, end, size)); - } - } - - private static String badPositionIndexes(int start, int end, int size) { - if (start < 0 || start > size) { - return badPositionIndex(start, size, "start index"); - } - if (end < 0 || end > size) { - return badPositionIndex(end, size, "end index"); - } - // end < start - return format("end index (%s) must not be less than start index (%s)", end, start); - } - - /** - * Substitutes each {@code %s} in {@code template} with an argument. These are matched by - * position: the first {@code %s} gets {@code args[0]}, etc. If there are more arguments than - * placeholders, the unmatched arguments will be appended to the end of the formatted message in - * square braces. - * - * @param template a non-null string containing 0 or more {@code %s} placeholders. - * @param args the arguments to be substituted into the message template. Arguments are converted - * to strings using {@link String#valueOf(Object)}. Arguments can be null. - */ - // Note that this is somewhat-improperly used from Verify.java as well. - static String format(String template, Object... args) { - template = String.valueOf(template); // null -> "null" - - // start substituting the arguments into the '%s' placeholders - StringBuilder builder = new StringBuilder(template.length() + 16 * args.length); - int templateStart = 0; - int i = 0; - while (i < args.length) { - int placeholderStart = template.indexOf("%s", templateStart); - if (placeholderStart == -1) { - break; - } - builder.append(template, templateStart, placeholderStart); - builder.append(args[i++]); - templateStart = placeholderStart + 2; - } - builder.append(template, templateStart, template.length()); - - // if we run out of placeholders, append the extra args in square braces - if (i < args.length) { - builder.append(" ["); - builder.append(args[i++]); - while (i < args.length) { - builder.append(", "); - builder.append(args[i++]); - } - builder.append(']'); - } - - return builder.toString(); - } -} diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/VisibleForTesting.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/VisibleForTesting.java deleted file mode 100644 index 44473dc8cc637..0000000000000 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/VisibleForTesting.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.util; - -/** - * Annotation to indicate a class member or class is visible only for the purposes of testing and - * otherwise should not be referenced by other classes. - */ -public @interface VisibleForTesting {} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java deleted file mode 100644 index cd2038440c7fb..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * Counting allocation listener. It counts the number of times it has been invoked, and how much - * memory allocation it has seen When set to 'expand on fail', it attempts to expand the associated - * allocator's limit. - */ -final class CountingAllocationListener implements AllocationListener { - private int numPreCalls; - private int numCalls; - private int numReleaseCalls; - private int numChildren; - private long totalMem; - private long currentMem; - private boolean expandOnFail; - @Nullable BufferAllocator expandAlloc; - long expandLimit; - - CountingAllocationListener() { - this.numCalls = 0; - this.numChildren = 0; - this.totalMem = 0; - this.currentMem = 0; - this.expandOnFail = false; - this.expandAlloc = null; - this.expandLimit = 0; - } - - @Override - public void onPreAllocation(long size) { - numPreCalls++; - } - - @Override - public void onAllocation(long size) { - numCalls++; - totalMem += size; - currentMem += size; - } - - @Override - public boolean onFailedAllocation(long size, AllocationOutcome outcome) { - if (expandOnFail) { - if (expandAlloc == null) { - throw new IllegalStateException( - "expandAlloc must be non-null because this " + "listener is set to expand on failure."); - } - expandAlloc.setLimit(expandLimit); - return true; - } - return false; - } - - @Override - public void onRelease(long size) { - numReleaseCalls++; - currentMem -= size; - } - - @Override - public void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) { - ++numChildren; - } - - @Override - public void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) { - --numChildren; - } - - void setExpandOnFail(BufferAllocator expandAlloc, long expandLimit) { - this.expandOnFail = true; - this.expandAlloc = expandAlloc; - this.expandLimit = expandLimit; - } - - int getNumPreCalls() { - return numPreCalls; - } - - int getNumReleaseCalls() { - return numReleaseCalls; - } - - int getNumCalls() { - return numCalls; - } - - int getNumChildren() { - return numChildren; - } - - long getTotalMem() { - return totalMem; - } - - long getCurrentMem() { - return currentMem; - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java deleted file mode 100644 index 348ed3e7933b0..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import org.apache.arrow.memory.util.MemoryUtil; - -/** - * The default Allocation Manager Factory for a module. - * - *

    This is only used by tests and contains only a simplistic allocator method. - */ -public class DefaultAllocationManagerFactory implements AllocationManager.Factory { - - public static final AllocationManager.Factory FACTORY = new DefaultAllocationManagerFactory(); - private static final ArrowBuf EMPTY = - new ArrowBuf(ReferenceManager.NO_OP, null, 0, MemoryUtil.allocateMemory(0)); - - @Override - public AllocationManager create(BufferAllocator accountingAllocator, long size) { - return new AllocationManager(accountingAllocator) { - private final long allocatedSize = size; - private final long address = MemoryUtil.allocateMemory(size); - - @Override - public long getSize() { - return allocatedSize; - } - - @Override - protected long memoryAddress() { - return address; - } - - @Override - protected void release0() { - MemoryUtil.freeMemory(address); - } - }; - } - - @Override - public ArrowBuf empty() { - return EMPTY; - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAccountant.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAccountant.java deleted file mode 100644 index 07663b4ab67ed..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAccountant.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.fail; - -import org.junit.jupiter.api.Test; - -public class TestAccountant { - - @Test - public void basic() { - ensureAccurateReservations(null); - } - - @Test - public void nested() { - final Accountant parent = new Accountant(null, "test", 0, Long.MAX_VALUE); - ensureAccurateReservations(parent); - assertEquals(0, parent.getAllocatedMemory()); - assertEquals(parent.getLimit() - parent.getAllocatedMemory(), parent.getHeadroom()); - } - - @Test - public void multiThread() throws InterruptedException { - final Accountant parent = new Accountant(null, "test", 0, Long.MAX_VALUE); - - final int numberOfThreads = 32; - final int loops = 100; - Thread[] threads = new Thread[numberOfThreads]; - - for (int i = 0; i < numberOfThreads; i++) { - Thread t = - new Thread() { - - @Override - public void run() { - try { - for (int i = 0; i < loops; i++) { - ensureAccurateReservations(parent); - } - } catch (Exception ex) { - ex.printStackTrace(); - fail(ex.getMessage()); - } - } - }; - threads[i] = t; - t.start(); - } - - for (Thread thread : threads) { - thread.join(); - } - - assertEquals(0, parent.getAllocatedMemory()); - assertEquals(parent.getLimit() - parent.getAllocatedMemory(), parent.getHeadroom()); - } - - private void ensureAccurateReservations(Accountant outsideParent) { - final Accountant parent = new Accountant(outsideParent, "test", 0, 10); - assertEquals(0, parent.getAllocatedMemory()); - - final Accountant child = new Accountant(parent, "test", 2, Long.MAX_VALUE); - assertEquals(2, parent.getAllocatedMemory()); - assertEquals(10, child.getHeadroom()); - { - AllocationOutcome first = child.allocateBytes(1); - assertEquals(AllocationOutcome.Status.SUCCESS, first.getStatus()); - } - - // child will have new allocation - assertEquals(1, child.getAllocatedMemory()); - - // root has no change since within reservation - assertEquals(2, parent.getAllocatedMemory()); - - { - AllocationOutcome first = child.allocateBytes(1); - assertEquals(AllocationOutcome.Status.SUCCESS, first.getStatus()); - } - - // child will have new allocation - assertEquals(2, child.getAllocatedMemory()); - - // root has no change since within reservation - assertEquals(2, parent.getAllocatedMemory()); - - child.releaseBytes(1); - - // child will have new allocation - assertEquals(1, child.getAllocatedMemory()); - - // root has no change since within reservation - assertEquals(2, parent.getAllocatedMemory()); - - { - AllocationOutcome first = child.allocateBytes(2); - assertEquals(AllocationOutcome.Status.SUCCESS, first.getStatus()); - } - - // child will have new allocation - assertEquals(3, child.getAllocatedMemory()); - - // went beyond reservation, now in parent accountant - assertEquals(3, parent.getAllocatedMemory()); - - assertEquals(7, child.getHeadroom()); - assertEquals(7, parent.getHeadroom()); - - { - AllocationOutcome first = child.allocateBytes(7); - assertEquals(AllocationOutcome.Status.SUCCESS, first.getStatus()); - } - - // child will have new allocation - assertEquals(10, child.getAllocatedMemory()); - - // went beyond reservation, now in parent accountant - assertEquals(10, parent.getAllocatedMemory()); - - child.releaseBytes(9); - - assertEquals(1, child.getAllocatedMemory()); - assertEquals(9, child.getHeadroom()); - - // back to reservation size - assertEquals(2, parent.getAllocatedMemory()); - assertEquals(8, parent.getHeadroom()); - - AllocationOutcome first = child.allocateBytes(10); - assertEquals(AllocationOutcome.Status.FAILED_PARENT, first.getStatus()); - - // unchanged - assertEquals(1, child.getAllocatedMemory()); - assertEquals(2, parent.getAllocatedMemory()); - - boolean withinLimit = child.forceAllocate(10); - assertFalse(withinLimit); - - // at new limit - assertEquals(child.getAllocatedMemory(), 11); - assertEquals(parent.getAllocatedMemory(), 11); - assertEquals(-1, child.getHeadroom()); - assertEquals(-1, parent.getHeadroom()); - - child.releaseBytes(11); - assertEquals(child.getAllocatedMemory(), 0); - assertEquals(parent.getAllocatedMemory(), 2); - assertEquals(10, child.getHeadroom()); - assertEquals(8, parent.getHeadroom()); - - child.close(); - parent.close(); - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAllocationManager.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAllocationManager.java deleted file mode 100644 index b1a360a7b3f3d..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestAllocationManager.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.Test; - -/** Test cases for {@link AllocationManager}. */ -public class TestAllocationManager { - - @Test - public void testAllocationManagerType() { - - // test unknown allocation manager type - System.clearProperty(DefaultAllocationManagerOption.ALLOCATION_MANAGER_TYPE_PROPERTY_NAME); - DefaultAllocationManagerOption.AllocationManagerType mgrType = - DefaultAllocationManagerOption.getDefaultAllocationManagerType(); - - assertEquals(DefaultAllocationManagerOption.AllocationManagerType.Unknown, mgrType); - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java deleted file mode 100644 index 60e118d557cf6..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import ch.qos.logback.classic.Level; -import ch.qos.logback.classic.Logger; -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.Arrays; -import org.apache.arrow.memory.util.Float16; -import org.junit.jupiter.api.Test; -import org.slf4j.LoggerFactory; - -public class TestArrowBuf { - - @Test - public void testSliceOutOfBoundsLength_RaisesIndexOutOfBoundsException() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(2)) { - assertEquals(2, buf.capacity()); - assertThrows(IndexOutOfBoundsException.class, () -> buf.slice(0, 3)); - } - } - - @Test - public void testSliceOutOfBoundsIndexPlusLength_RaisesIndexOutOfBoundsException() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(2)) { - assertEquals(2, buf.capacity()); - assertThrows(IndexOutOfBoundsException.class, () -> buf.slice(1, 2)); - } - } - - @Test - public void testSliceOutOfBoundsIndex_RaisesIndexOutOfBoundsException() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(2)) { - assertEquals(2, buf.capacity()); - assertThrows(IndexOutOfBoundsException.class, () -> buf.slice(3, 0)); - } - } - - @Test - public void testSliceWithinBoundsLength_ReturnsSlice() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(2)) { - assertEquals(2, buf.capacity()); - assertEquals(1, buf.slice(1, 1).capacity()); - assertEquals(2, buf.slice(0, 2).capacity()); - } - } - - @Test - public void testSetBytesSliced() { - int arrLength = 64; - byte[] expected = new byte[arrLength]; - for (int i = 0; i < expected.length; i++) { - expected[i] = (byte) i; - } - ByteBuffer data = ByteBuffer.wrap(expected); - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(expected.length)) { - buf.setBytes(0, data, 0, data.capacity()); - - byte[] actual = new byte[expected.length]; - buf.getBytes(0, actual); - assertArrayEquals(expected, actual); - } - } - - @Test - public void testSetBytesUnsliced() { - int arrLength = 64; - byte[] arr = new byte[arrLength]; - for (int i = 0; i < arr.length; i++) { - arr[i] = (byte) i; - } - ByteBuffer data = ByteBuffer.wrap(arr); - - int from = 10; - int to = arrLength; - byte[] expected = Arrays.copyOfRange(arr, from, to); - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(expected.length)) { - buf.setBytes(0, data, from, to - from); - - byte[] actual = new byte[expected.length]; - buf.getBytes(0, actual); - assertArrayEquals(expected, actual); - } - } - - /** ARROW-9221: guard against big-endian byte buffers. */ - @Test - public void testSetBytesBigEndian() { - final byte[] expected = new byte[64]; - for (int i = 0; i < expected.length; i++) { - expected[i] = (byte) i; - } - // Only this code path is susceptible: others use unsafe or byte-by-byte copies, while this - // override copies longs. - final ByteBuffer data = ByteBuffer.wrap(expected).asReadOnlyBuffer(); - assertFalse(data.hasArray()); - assertFalse(data.isDirect()); - assertEquals(ByteOrder.BIG_ENDIAN, data.order()); - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(expected.length)) { - buf.setBytes(0, data); - byte[] actual = new byte[expected.length]; - buf.getBytes(0, actual); - assertArrayEquals(expected, actual); - } - } - - @Test - /* - * Test that allocation history is not recorded even though - * assertions are enabled in tests (GH-34338). - */ - public void testEnabledAssertion() { - ((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(Level.TRACE); - try (BufferAllocator allocator = new RootAllocator(128)) { - allocator.buffer(2); - Exception e = assertThrows(IllegalStateException.class, allocator::close); - assertFalse(e.getMessage().contains("event log for:")); - } finally { - ((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null); - } - } - - @Test - public void testEnabledHistoricalLog() { - ((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(Level.TRACE); - try { - Field fieldDebug = BaseAllocator.class.getField("DEBUG"); - fieldDebug.setAccessible(true); - Field modifiersDebug = Field.class.getDeclaredField("modifiers"); - modifiersDebug.setAccessible(true); - modifiersDebug.setInt(fieldDebug, fieldDebug.getModifiers() & ~Modifier.FINAL); - fieldDebug.set(null, true); - try (BufferAllocator allocator = new RootAllocator(128)) { - allocator.buffer(2); - Exception e = assertThrows(IllegalStateException.class, allocator::close); - assertTrue( - e.getMessage().contains("event log for:"), // JDK8, JDK11 - "Exception had the following message: " + e.getMessage()); - } finally { - fieldDebug.set(null, false); - } - } catch (Exception e) { - assertTrue( - e.toString().contains("java.lang.NoSuchFieldException: modifiers"), - "Exception had the following toString(): " + e); // JDK17+ - } finally { - ((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null); - } - } - - @Test - public void testArrowBufFloat16() { - try (BufferAllocator allocator = new RootAllocator(); - ArrowBuf buf = allocator.buffer(1024)) { - buf.setShort(0, Float16.toFloat16(+32.875f)); - assertEquals((short) 0x501c, buf.getShort(0)); - } - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java deleted file mode 100644 index 87e9316964dfc..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java +++ /dev/null @@ -1,1141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import org.apache.arrow.memory.AllocationOutcomeDetails.Entry; -import org.apache.arrow.memory.rounding.RoundingPolicy; -import org.apache.arrow.memory.rounding.SegmentRoundingPolicy; -import org.apache.arrow.memory.util.AssertionUtil; -import org.apache.arrow.memory.util.MemoryUtil; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class TestBaseAllocator { - - private static final int MAX_ALLOCATION = 8 * 1024; - - /* - // ---------------------------------------- DEBUG ----------------------------------- - - @After - public void checkBuffers() { - final int bufferCount = UnsafeDirectLittleEndian.getBufferCount(); - if (bufferCount != 0) { - UnsafeDirectLittleEndian.logBuffers(logger); - UnsafeDirectLittleEndian.releaseBuffers(); - } - - assertEquals(0, bufferCount); - } - - // @AfterClass - // public static void dumpBuffers() { - // UnsafeDirectLittleEndian.logBuffers(logger); - // } - - // ---------------------------------------- DEBUG ------------------------------------ - */ - - @Test - public void test_privateMax() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final ArrowBuf arrowBuf1 = rootAllocator.buffer(MAX_ALLOCATION / 2); - assertNotNull(arrowBuf1, "allocation failed"); - - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("noLimits", 0, MAX_ALLOCATION)) { - final ArrowBuf arrowBuf2 = childAllocator.buffer(MAX_ALLOCATION / 2); - assertNotNull(arrowBuf2, "allocation failed"); - arrowBuf2.getReferenceManager().release(); - } - - arrowBuf1.getReferenceManager().release(); - } - } - - @Test - public void testRootAllocator_closeWithOutstanding() throws Exception { - assertThrows( - IllegalStateException.class, - () -> { - try { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final ArrowBuf arrowBuf = rootAllocator.buffer(512); - assertNotNull(arrowBuf, "allocation failed"); - } - } finally { - /* - * We expect there to be one unreleased underlying buffer because we're closing - * without releasing it. - */ - /* - // ------------------------------- DEBUG --------------------------------- - final int bufferCount = UnsafeDirectLittleEndian.getBufferCount(); - UnsafeDirectLittleEndian.releaseBuffers(); - assertEquals(1, bufferCount); - // ------------------------------- DEBUG --------------------------------- - */ - } - }); - } - - @Test - @Disabled - public void testRootAllocator_getEmpty() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final ArrowBuf arrowBuf = rootAllocator.buffer(0); - assertNotNull(arrowBuf, "allocation failed"); - assertEquals(0, arrowBuf.capacity(), "capacity was non-zero"); - assertTrue(arrowBuf.memoryAddress() != 0, "address should be valid"); - arrowBuf.getReferenceManager().release(); - } - } - - @Disabled // TODO(DRILL-2740) - @Test - public void testAllocator_unreleasedEmpty() throws Exception { - assertThrows( - IllegalStateException.class, - () -> { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - @SuppressWarnings("unused") - final ArrowBuf arrowBuf = rootAllocator.buffer(0); - } - }); - } - - @Test - public void testAllocator_transferOwnership() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("changeOwnership1", 0, MAX_ALLOCATION); - final BufferAllocator childAllocator2 = - rootAllocator.newChildAllocator("changeOwnership2", 0, MAX_ALLOCATION); - - final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 4); - rootAllocator.verify(); - final ReferenceManager referenceManager = arrowBuf1.getReferenceManager(); - OwnershipTransferResult transferOwnership = - referenceManager.transferOwnership(arrowBuf1, childAllocator2); - assertEquiv(arrowBuf1, transferOwnership.getTransferredBuffer()); - final boolean allocationFit = transferOwnership.getAllocationFit(); - rootAllocator.verify(); - assertTrue(allocationFit); - - arrowBuf1.getReferenceManager().release(); - childAllocator1.close(); - rootAllocator.verify(); - - transferOwnership.getTransferredBuffer().getReferenceManager().release(); - childAllocator2.close(); - } - } - - static boolean equalsIgnoreOrder(Collection c1, Collection c2) { - return (c1.size() == c2.size() && c1.containsAll(c2)); - } - - @Test - public void testAllocator_getParentAndChild() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - assertNull(rootAllocator.getParentAllocator()); - - try (final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("child1", 0, MAX_ALLOCATION)) { - assertEquals(childAllocator1.getParentAllocator(), rootAllocator); - assertTrue( - equalsIgnoreOrder(Arrays.asList(childAllocator1), rootAllocator.getChildAllocators())); - - try (final BufferAllocator childAllocator2 = - rootAllocator.newChildAllocator("child2", 0, MAX_ALLOCATION)) { - assertEquals(childAllocator2.getParentAllocator(), rootAllocator); - assertTrue( - equalsIgnoreOrder( - Arrays.asList(childAllocator1, childAllocator2), - rootAllocator.getChildAllocators())); - - try (final BufferAllocator grandChildAllocator = - childAllocator1.newChildAllocator("grand-child", 0, MAX_ALLOCATION)) { - assertEquals(grandChildAllocator.getParentAllocator(), childAllocator1); - assertTrue( - equalsIgnoreOrder( - Arrays.asList(grandChildAllocator), childAllocator1.getChildAllocators())); - } - } - } - } - } - - @Test - public void testAllocator_childRemovedOnClose() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("child1", 0, MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator2 = - rootAllocator.newChildAllocator("child2", 0, MAX_ALLOCATION)) { - - // root has two child allocators - assertTrue( - equalsIgnoreOrder( - Arrays.asList(childAllocator1, childAllocator2), - rootAllocator.getChildAllocators())); - - try (final BufferAllocator grandChildAllocator = - childAllocator1.newChildAllocator("grand-child", 0, MAX_ALLOCATION)) { - - // child1 has one allocator i.e grand-child - assertTrue( - equalsIgnoreOrder( - Arrays.asList(grandChildAllocator), childAllocator1.getChildAllocators())); - } - - // grand-child closed - assertTrue( - equalsIgnoreOrder(Collections.EMPTY_SET, childAllocator1.getChildAllocators())); - } - // root has only one child left - assertTrue( - equalsIgnoreOrder(Arrays.asList(childAllocator1), rootAllocator.getChildAllocators())); - } - // all child allocators closed. - assertTrue(equalsIgnoreOrder(Collections.EMPTY_SET, rootAllocator.getChildAllocators())); - } - } - - @Test - public void testAllocator_shareOwnership() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("shareOwnership1", 0, MAX_ALLOCATION); - final BufferAllocator childAllocator2 = - rootAllocator.newChildAllocator("shareOwnership2", 0, MAX_ALLOCATION); - final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 4); - rootAllocator.verify(); - - // share ownership of buffer. - final ArrowBuf arrowBuf2 = arrowBuf1.getReferenceManager().retain(arrowBuf1, childAllocator2); - rootAllocator.verify(); - assertNotNull(arrowBuf2); - assertNotEquals(arrowBuf2, arrowBuf1); - assertEquiv(arrowBuf1, arrowBuf2); - - // release original buffer (thus transferring ownership to allocator 2. (should leave - // allocator 1 in empty state) - arrowBuf1.getReferenceManager().release(); - rootAllocator.verify(); - childAllocator1.close(); - rootAllocator.verify(); - - final BufferAllocator childAllocator3 = - rootAllocator.newChildAllocator("shareOwnership3", 0, MAX_ALLOCATION); - final ArrowBuf arrowBuf3 = arrowBuf1.getReferenceManager().retain(arrowBuf1, childAllocator3); - assertNotNull(arrowBuf3); - assertNotEquals(arrowBuf3, arrowBuf1); - assertNotEquals(arrowBuf3, arrowBuf2); - assertEquiv(arrowBuf1, arrowBuf3); - rootAllocator.verify(); - - arrowBuf2.getReferenceManager().release(); - rootAllocator.verify(); - childAllocator2.close(); - rootAllocator.verify(); - - arrowBuf3.getReferenceManager().release(); - rootAllocator.verify(); - childAllocator3.close(); - } - } - - @Test - public void testRootAllocator_createChildAndUse() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("createChildAndUse", 0, MAX_ALLOCATION)) { - final ArrowBuf arrowBuf = childAllocator.buffer(512); - assertNotNull(arrowBuf, "allocation failed"); - arrowBuf.getReferenceManager().release(); - } - } - } - - @Test - public void testRootAllocator_createChildDontClose() throws Exception { - assertThrows( - IllegalStateException.class, - () -> { - try { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("createChildDontClose", 0, MAX_ALLOCATION); - final ArrowBuf arrowBuf = childAllocator.buffer(512); - assertNotNull(arrowBuf, "allocation failed"); - } - } finally { - /* - * We expect one underlying buffer because we closed a child allocator without - * releasing the buffer allocated from it. - */ - /* - // ------------------------------- DEBUG --------------------------------- - final int bufferCount = UnsafeDirectLittleEndian.getBufferCount(); - UnsafeDirectLittleEndian.releaseBuffers(); - assertEquals(1, bufferCount); - // ------------------------------- DEBUG --------------------------------- - */ - } - }); - } - - @Test - public void testSegmentAllocator() { - RoundingPolicy policy = new SegmentRoundingPolicy(1024L); - try (RootAllocator allocator = - new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy)) { - ArrowBuf buf = allocator.buffer(798); - assertEquals(1024, buf.capacity()); - buf.setInt(333, 959); - assertEquals(959, buf.getInt(333)); - buf.close(); - - buf = allocator.buffer(1025); - assertEquals(2048, buf.capacity()); - buf.setInt(193, 939); - assertEquals(939, buf.getInt(193)); - buf.close(); - } - } - - @Test - public void testSegmentAllocator_childAllocator() { - RoundingPolicy policy = new SegmentRoundingPolicy(1024L); - try (RootAllocator allocator = new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy); - BufferAllocator childAllocator = allocator.newChildAllocator("child", 0, 512 * 1024)) { - - assertEquals("child", childAllocator.getName()); - - ArrowBuf buf = childAllocator.buffer(798); - assertEquals(1024, buf.capacity()); - buf.setInt(333, 959); - assertEquals(959, buf.getInt(333)); - buf.close(); - - buf = childAllocator.buffer(1025); - assertEquals(2048, buf.capacity()); - buf.setInt(193, 939); - assertEquals(939, buf.getInt(193)); - buf.close(); - } - } - - @Test - public void testSegmentAllocator_smallSegment() { - IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128L)); - assertEquals("The segment size cannot be smaller than 1024", e.getMessage()); - } - - @Test - public void testSegmentAllocator_segmentSizeNotPowerOf2() { - IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097L)); - assertEquals("The segment size must be a power of 2", e.getMessage()); - } - - @Test - public void testCustomizedAllocationManager() { - try (BaseAllocator allocator = createAllocatorWithCustomizedAllocationManager()) { - final ArrowBuf arrowBuf1 = allocator.buffer(MAX_ALLOCATION); - assertNotNull(arrowBuf1, "allocation failed"); - - arrowBuf1.setInt(0, 1); - assertEquals(1, arrowBuf1.getInt(0)); - - try { - allocator.buffer(1); - fail("allocated memory beyond max allowed"); - } catch (OutOfMemoryException e) { - // expected - } - arrowBuf1.getReferenceManager().release(); - - try { - arrowBuf1.getInt(0); - fail("data read from released buffer"); - } catch (RuntimeException e) { - // expected - } - } - } - - private BaseAllocator createAllocatorWithCustomizedAllocationManager() { - return new RootAllocator( - BaseAllocator.configBuilder() - .maxAllocation(MAX_ALLOCATION) - .allocationManagerFactory( - new AllocationManager.Factory() { - @Override - public AllocationManager create( - BufferAllocator accountingAllocator, long requestedSize) { - return new AllocationManager(accountingAllocator) { - private final long address = MemoryUtil.allocateMemory(requestedSize); - - @Override - protected long memoryAddress() { - return address; - } - - @Override - protected void release0() { - MemoryUtil.setMemory(address, requestedSize, (byte) 0); - MemoryUtil.freeMemory(address); - } - - @Override - public long getSize() { - return requestedSize; - } - }; - } - - @Override - public ArrowBuf empty() { - return null; - } - }) - .build()); - } - - @Test - public void testRootAllocator_listeners() throws Exception { - CountingAllocationListener listener1 = new CountingAllocationListener(); - assertEquals(0, listener1.getNumPreCalls()); - assertEquals(0, listener1.getNumCalls()); - assertEquals(0, listener1.getNumReleaseCalls()); - assertEquals(0, listener1.getNumChildren()); - assertEquals(0, listener1.getTotalMem()); - CountingAllocationListener listener2 = new CountingAllocationListener(); - assertEquals(0, listener2.getNumPreCalls()); - assertEquals(0, listener2.getNumCalls()); - assertEquals(0, listener2.getNumReleaseCalls()); - assertEquals(0, listener2.getNumChildren()); - assertEquals(0, listener2.getTotalMem()); - // root and first-level child share the first listener - // second-level and third-level child share the second listener - try (final RootAllocator rootAllocator = new RootAllocator(listener1, MAX_ALLOCATION)) { - try (final BufferAllocator c1 = rootAllocator.newChildAllocator("c1", 0, MAX_ALLOCATION)) { - assertEquals(1, listener1.getNumChildren()); - final ArrowBuf buf1 = c1.buffer(16); - assertNotNull(buf1, "allocation failed"); - assertEquals(1, listener1.getNumPreCalls()); - assertEquals(1, listener1.getNumCalls()); - assertEquals(0, listener1.getNumReleaseCalls()); - assertEquals(16, listener1.getTotalMem()); - buf1.getReferenceManager().release(); - try (final BufferAllocator c2 = c1.newChildAllocator("c2", listener2, 0, MAX_ALLOCATION)) { - assertEquals( - 2, listener1.getNumChildren()); // c1 got a new child, so listener1 is notified. - assertEquals(0, listener2.getNumChildren()); - final ArrowBuf buf2 = c2.buffer(32); - assertNotNull(buf2, "allocation failed"); - assertEquals(1, listener1.getNumCalls()); - assertEquals(16, listener1.getTotalMem()); - assertEquals(1, listener2.getNumPreCalls()); - assertEquals(1, listener2.getNumCalls()); - assertEquals(0, listener2.getNumReleaseCalls()); - assertEquals(32, listener2.getTotalMem()); - buf2.getReferenceManager().release(); - try (final BufferAllocator c3 = c2.newChildAllocator("c3", 0, MAX_ALLOCATION)) { - assertEquals(2, listener1.getNumChildren()); - assertEquals(1, listener2.getNumChildren()); - final ArrowBuf buf3 = c3.buffer(64); - assertNotNull(buf3, "allocation failed"); - assertEquals(1, listener1.getNumPreCalls()); - assertEquals(1, listener1.getNumCalls()); - assertEquals(1, listener1.getNumReleaseCalls()); - assertEquals(16, listener1.getTotalMem()); - assertEquals(2, listener2.getNumPreCalls()); - assertEquals(2, listener2.getNumCalls()); - assertEquals(1, listener2.getNumReleaseCalls()); - assertEquals(32 + 64, listener2.getTotalMem()); - buf3.getReferenceManager().release(); - } - assertEquals(2, listener1.getNumChildren()); - assertEquals(0, listener2.getNumChildren()); // third-level child removed - } - assertEquals(1, listener1.getNumChildren()); // second-level child removed - assertEquals(0, listener2.getNumChildren()); - } - assertEquals(0, listener1.getNumChildren()); // first-level child removed - - assertEquals(2, listener2.getNumReleaseCalls()); - } - } - - @Test - public void testRootAllocator_listenerAllocationFail() throws Exception { - CountingAllocationListener listener1 = new CountingAllocationListener(); - assertEquals(0, listener1.getNumCalls()); - assertEquals(0, listener1.getTotalMem()); - // Test attempts to allocate too much from a child whose limit is set to half of the max - // allocation. The listener's callback triggers, expanding the child allocator's limit, so then - // the allocation succeeds. - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator c1 = - rootAllocator.newChildAllocator("c1", listener1, 0, MAX_ALLOCATION / 2)) { - try { - c1.buffer(MAX_ALLOCATION); - fail("allocated memory beyond max allowed"); - } catch (OutOfMemoryException e) { - // expected - } - assertEquals(0, listener1.getNumCalls()); - assertEquals(0, listener1.getTotalMem()); - - listener1.setExpandOnFail(c1, MAX_ALLOCATION); - ArrowBuf arrowBuf = c1.buffer(MAX_ALLOCATION); - assertNotNull(arrowBuf, "allocation failed"); - assertEquals(1, listener1.getNumCalls()); - assertEquals(MAX_ALLOCATION, listener1.getTotalMem()); - arrowBuf.getReferenceManager().release(); - } - } - } - - private static void allocateAndFree(final BufferAllocator allocator) { - final ArrowBuf arrowBuf = allocator.buffer(512); - assertNotNull(arrowBuf, "allocation failed"); - arrowBuf.getReferenceManager().release(); - - final ArrowBuf arrowBuf2 = allocator.buffer(MAX_ALLOCATION); - assertNotNull(arrowBuf2, "allocation failed"); - arrowBuf2.getReferenceManager().release(); - - final int nBufs = 8; - final ArrowBuf[] arrowBufs = new ArrowBuf[nBufs]; - for (int i = 0; i < arrowBufs.length; ++i) { - ArrowBuf arrowBufi = allocator.buffer(MAX_ALLOCATION / nBufs); - assertNotNull(arrowBufi, "allocation failed"); - arrowBufs[i] = arrowBufi; - } - for (ArrowBuf arrowBufi : arrowBufs) { - arrowBufi.getReferenceManager().release(); - } - } - - @Test - public void testAllocator_manyAllocations() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("manyAllocations", 0, MAX_ALLOCATION)) { - allocateAndFree(childAllocator); - } - } - } - - @Test - public void testAllocator_overAllocate() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("overAllocate", 0, MAX_ALLOCATION)) { - allocateAndFree(childAllocator); - - try { - childAllocator.buffer(MAX_ALLOCATION + 1); - fail("allocated memory beyond max allowed"); - } catch (OutOfMemoryException e) { - // expected - } - } - } - } - - @Test - public void testAllocator_overAllocateParent() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("overAllocateParent", 0, MAX_ALLOCATION)) { - final ArrowBuf arrowBuf1 = rootAllocator.buffer(MAX_ALLOCATION / 2); - assertNotNull(arrowBuf1, "allocation failed"); - final ArrowBuf arrowBuf2 = childAllocator.buffer(MAX_ALLOCATION / 2); - assertNotNull(arrowBuf2, "allocation failed"); - - try { - childAllocator.buffer(MAX_ALLOCATION / 4); - fail("allocated memory beyond max allowed"); - } catch (OutOfMemoryException e) { - // expected - } - - arrowBuf1.getReferenceManager().release(); - arrowBuf2.getReferenceManager().release(); - } - } - } - - @Test - public void testAllocator_failureAtParentLimitOutcomeDetails() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("child", 0, MAX_ALLOCATION / 2)) { - try (final BufferAllocator grandChildAllocator = - childAllocator.newChildAllocator("grandchild", MAX_ALLOCATION / 4, MAX_ALLOCATION)) { - OutOfMemoryException e = - assertThrows( - OutOfMemoryException.class, () -> grandChildAllocator.buffer(MAX_ALLOCATION)); - // expected - assertTrue(e.getMessage().contains("Unable to allocate buffer")); - - assertTrue(e.getOutcomeDetails().isPresent(), "missing outcome details"); - AllocationOutcomeDetails outcomeDetails = e.getOutcomeDetails().get(); - - assertEquals(outcomeDetails.getFailedAllocator(), childAllocator); - - // The order of allocators should be child to root (request propagates to parent if - // child cannot satisfy the request). - Iterator iterator = outcomeDetails.allocEntries.iterator(); - AllocationOutcomeDetails.Entry first = iterator.next(); - assertEquals(MAX_ALLOCATION / 4, first.getAllocatedSize()); - assertEquals(MAX_ALLOCATION, first.getRequestedSize()); - assertFalse(first.isAllocationFailed()); - - AllocationOutcomeDetails.Entry second = iterator.next(); - assertEquals(MAX_ALLOCATION - MAX_ALLOCATION / 4, second.getRequestedSize()); - assertEquals(0, second.getAllocatedSize()); - assertTrue(second.isAllocationFailed()); - - assertFalse(iterator.hasNext()); - } - } - } - } - - @Test - public void testAllocator_failureAtRootLimitOutcomeDetails() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("child", MAX_ALLOCATION / 2, Long.MAX_VALUE)) { - try (final BufferAllocator grandChildAllocator = - childAllocator.newChildAllocator("grandchild", MAX_ALLOCATION / 4, Long.MAX_VALUE)) { - OutOfMemoryException e = - assertThrows( - OutOfMemoryException.class, () -> grandChildAllocator.buffer(MAX_ALLOCATION * 2)); - - assertTrue(e.getMessage().contains("Unable to allocate buffer")); - assertTrue(e.getOutcomeDetails().isPresent(), "missing outcome details"); - AllocationOutcomeDetails outcomeDetails = e.getOutcomeDetails().get(); - - assertEquals(outcomeDetails.getFailedAllocator(), rootAllocator); - - // The order of allocators should be child to root (request propagates to parent if - // child cannot satisfy the request). - Iterator iterator = outcomeDetails.allocEntries.iterator(); - AllocationOutcomeDetails.Entry first = iterator.next(); - assertEquals(MAX_ALLOCATION / 4, first.getAllocatedSize()); - assertEquals(2 * MAX_ALLOCATION, first.getRequestedSize()); - assertFalse(first.isAllocationFailed()); - - AllocationOutcomeDetails.Entry second = iterator.next(); - assertEquals(MAX_ALLOCATION / 4, second.getAllocatedSize()); - assertEquals(2 * MAX_ALLOCATION - MAX_ALLOCATION / 4, second.getRequestedSize()); - assertFalse(second.isAllocationFailed()); - - AllocationOutcomeDetails.Entry third = iterator.next(); - assertEquals(0, third.getAllocatedSize()); - assertTrue(third.isAllocationFailed()); - - assertFalse(iterator.hasNext()); - } - } - } - } - - private static void testAllocator_sliceUpBufferAndRelease( - final RootAllocator rootAllocator, final BufferAllocator bufferAllocator) { - final ArrowBuf arrowBuf1 = bufferAllocator.buffer(MAX_ALLOCATION / 2); - rootAllocator.verify(); - - final ArrowBuf arrowBuf2 = arrowBuf1.slice(16, arrowBuf1.capacity() - 32); - rootAllocator.verify(); - final ArrowBuf arrowBuf3 = arrowBuf2.slice(16, arrowBuf2.capacity() - 32); - rootAllocator.verify(); - @SuppressWarnings("unused") - final ArrowBuf arrowBuf4 = arrowBuf3.slice(16, arrowBuf3.capacity() - 32); - rootAllocator.verify(); - - arrowBuf3 - .getReferenceManager() - .release(); // since they share refcounts, one is enough to release them all - rootAllocator.verify(); - } - - @Test - public void testAllocator_createSlices() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - testAllocator_sliceUpBufferAndRelease(rootAllocator, rootAllocator); - - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("createSlices", 0, MAX_ALLOCATION)) { - testAllocator_sliceUpBufferAndRelease(rootAllocator, childAllocator); - } - rootAllocator.verify(); - - testAllocator_sliceUpBufferAndRelease(rootAllocator, rootAllocator); - - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("createSlices", 0, MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator2 = - childAllocator.newChildAllocator("createSlices", 0, MAX_ALLOCATION)) { - final ArrowBuf arrowBuf1 = childAllocator2.buffer(MAX_ALLOCATION / 8); - @SuppressWarnings("unused") - final ArrowBuf arrowBuf2 = arrowBuf1.slice(MAX_ALLOCATION / 16, MAX_ALLOCATION / 16); - testAllocator_sliceUpBufferAndRelease(rootAllocator, childAllocator); - arrowBuf1.getReferenceManager().release(); - rootAllocator.verify(); - } - rootAllocator.verify(); - - testAllocator_sliceUpBufferAndRelease(rootAllocator, childAllocator); - } - rootAllocator.verify(); - } - } - - @Test - public void testAllocator_sliceRanges() throws Exception { - // final AllocatorOwner allocatorOwner = new NamedOwner("sliceRanges"); - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - // Populate a buffer with byte values corresponding to their indices. - final ArrowBuf arrowBuf = rootAllocator.buffer(256); - assertEquals(256, arrowBuf.capacity()); - assertEquals(0, arrowBuf.readerIndex()); - assertEquals(0, arrowBuf.readableBytes()); - assertEquals(0, arrowBuf.writerIndex()); - assertEquals(256, arrowBuf.writableBytes()); - - final ArrowBuf slice3 = arrowBuf.slice(); - assertEquals(0, slice3.readerIndex()); - assertEquals(0, slice3.readableBytes()); - assertEquals(0, slice3.writerIndex()); - // assertEquals(256, slice3.capacity()); - // assertEquals(256, slice3.writableBytes()); - - for (int i = 0; i < 256; ++i) { - arrowBuf.writeByte(i); - } - assertEquals(0, arrowBuf.readerIndex()); - assertEquals(256, arrowBuf.readableBytes()); - assertEquals(256, arrowBuf.writerIndex()); - assertEquals(0, arrowBuf.writableBytes()); - - final ArrowBuf slice1 = arrowBuf.slice(); - assertEquals(0, slice1.readerIndex()); - assertEquals(256, slice1.readableBytes()); - for (int i = 0; i < 10; ++i) { - assertEquals(i, slice1.readByte()); - } - assertEquals(256 - 10, slice1.readableBytes()); - for (int i = 0; i < 256; ++i) { - assertEquals((byte) i, slice1.getByte(i)); - } - - final ArrowBuf slice2 = arrowBuf.slice(25, 25); - assertEquals(0, slice2.readerIndex()); - assertEquals(25, slice2.readableBytes()); - for (int i = 25; i < 50; ++i) { - assertEquals(i, slice2.readByte()); - } - - /* - for(int i = 256; i > 0; --i) { - slice3.writeByte(i - 1); - } - for(int i = 0; i < 256; ++i) { - assertEquals(255 - i, slice1.getByte(i)); - } - */ - - arrowBuf.getReferenceManager().release(); // all the derived buffers share this fate - } - } - - @Test - public void testAllocator_slicesOfSlices() throws Exception { - // final AllocatorOwner allocatorOwner = new NamedOwner("slicesOfSlices"); - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - // Populate a buffer with byte values corresponding to their indices. - final ArrowBuf arrowBuf = rootAllocator.buffer(256); - for (int i = 0; i < 256; ++i) { - arrowBuf.writeByte(i); - } - - // Slice it up. - final ArrowBuf slice0 = arrowBuf.slice(0, arrowBuf.capacity()); - for (int i = 0; i < 256; ++i) { - assertEquals((byte) i, arrowBuf.getByte(i)); - } - - final ArrowBuf slice10 = slice0.slice(10, arrowBuf.capacity() - 10); - for (int i = 10; i < 256; ++i) { - assertEquals((byte) i, slice10.getByte(i - 10)); - } - - final ArrowBuf slice20 = slice10.slice(10, arrowBuf.capacity() - 20); - for (int i = 20; i < 256; ++i) { - assertEquals((byte) i, slice20.getByte(i - 20)); - } - - final ArrowBuf slice30 = slice20.slice(10, arrowBuf.capacity() - 30); - for (int i = 30; i < 256; ++i) { - assertEquals((byte) i, slice30.getByte(i - 30)); - } - - arrowBuf.getReferenceManager().release(); - } - } - - @Test - public void testAllocator_transferSliced() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("transferSliced1", 0, MAX_ALLOCATION); - final BufferAllocator childAllocator2 = - rootAllocator.newChildAllocator("transferSliced2", 0, MAX_ALLOCATION); - - final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 8); - final ArrowBuf arrowBuf2 = childAllocator2.buffer(MAX_ALLOCATION / 8); - - final ArrowBuf arrowBuf1s = arrowBuf1.slice(0, arrowBuf1.capacity() / 2); - final ArrowBuf arrowBuf2s = arrowBuf2.slice(0, arrowBuf2.capacity() / 2); - - rootAllocator.verify(); - - OwnershipTransferResult result1 = - arrowBuf2s.getReferenceManager().transferOwnership(arrowBuf2s, childAllocator1); - assertEquiv(arrowBuf2s, result1.getTransferredBuffer()); - rootAllocator.verify(); - OwnershipTransferResult result2 = - arrowBuf1s.getReferenceManager().transferOwnership(arrowBuf1s, childAllocator2); - assertEquiv(arrowBuf1s, result2.getTransferredBuffer()); - rootAllocator.verify(); - - result1.getTransferredBuffer().getReferenceManager().release(); - result2.getTransferredBuffer().getReferenceManager().release(); - - arrowBuf1s.getReferenceManager().release(); // releases arrowBuf1 - arrowBuf2s.getReferenceManager().release(); // releases arrowBuf2 - - childAllocator1.close(); - childAllocator2.close(); - } - } - - @Test - public void testAllocator_shareSliced() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("transferSliced", 0, MAX_ALLOCATION); - final BufferAllocator childAllocator2 = - rootAllocator.newChildAllocator("transferSliced", 0, MAX_ALLOCATION); - - final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 8); - final ArrowBuf arrowBuf2 = childAllocator2.buffer(MAX_ALLOCATION / 8); - - final ArrowBuf arrowBuf1s = arrowBuf1.slice(0, arrowBuf1.capacity() / 2); - final ArrowBuf arrowBuf2s = arrowBuf2.slice(0, arrowBuf2.capacity() / 2); - - rootAllocator.verify(); - - final ArrowBuf arrowBuf2s1 = - arrowBuf2s.getReferenceManager().retain(arrowBuf2s, childAllocator1); - assertEquiv(arrowBuf2s, arrowBuf2s1); - final ArrowBuf arrowBuf1s2 = - arrowBuf1s.getReferenceManager().retain(arrowBuf1s, childAllocator2); - assertEquiv(arrowBuf1s, arrowBuf1s2); - rootAllocator.verify(); - - arrowBuf1s.getReferenceManager().release(); // releases arrowBuf1 - arrowBuf2s.getReferenceManager().release(); // releases arrowBuf2 - rootAllocator.verify(); - - arrowBuf2s1.getReferenceManager().release(); // releases the shared arrowBuf2 slice - arrowBuf1s2.getReferenceManager().release(); // releases the shared arrowBuf1 slice - - childAllocator1.close(); - childAllocator2.close(); - } - } - - @Test - public void testAllocator_transferShared() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("transferShared1", 0, MAX_ALLOCATION); - final BufferAllocator childAllocator2 = - rootAllocator.newChildAllocator("transferShared2", 0, MAX_ALLOCATION); - final BufferAllocator childAllocator3 = - rootAllocator.newChildAllocator("transferShared3", 0, MAX_ALLOCATION); - - final ArrowBuf arrowBuf1 = childAllocator1.buffer(MAX_ALLOCATION / 8); - - boolean allocationFit; - - ArrowBuf arrowBuf2 = arrowBuf1.getReferenceManager().retain(arrowBuf1, childAllocator2); - rootAllocator.verify(); - assertNotNull(arrowBuf2); - assertNotEquals(arrowBuf2, arrowBuf1); - assertEquiv(arrowBuf1, arrowBuf2); - - final ReferenceManager refManager1 = arrowBuf1.getReferenceManager(); - final OwnershipTransferResult result1 = - refManager1.transferOwnership(arrowBuf1, childAllocator3); - allocationFit = result1.getAllocationFit(); - final ArrowBuf arrowBuf3 = result1.getTransferredBuffer(); - assertTrue(allocationFit); - assertEquiv(arrowBuf1, arrowBuf3); - rootAllocator.verify(); - - // Since childAllocator3 now has childAllocator1's buffer, 1, can close - arrowBuf1.getReferenceManager().release(); - childAllocator1.close(); - rootAllocator.verify(); - - arrowBuf2.getReferenceManager().release(); - childAllocator2.close(); - rootAllocator.verify(); - - final BufferAllocator childAllocator4 = - rootAllocator.newChildAllocator("transferShared4", 0, MAX_ALLOCATION); - final ReferenceManager refManager3 = arrowBuf3.getReferenceManager(); - final OwnershipTransferResult result3 = - refManager3.transferOwnership(arrowBuf3, childAllocator4); - allocationFit = result3.getAllocationFit(); - final ArrowBuf arrowBuf4 = result3.getTransferredBuffer(); - assertTrue(allocationFit); - assertEquiv(arrowBuf3, arrowBuf4); - rootAllocator.verify(); - - arrowBuf3.getReferenceManager().release(); - childAllocator3.close(); - rootAllocator.verify(); - - arrowBuf4.getReferenceManager().release(); - childAllocator4.close(); - rootAllocator.verify(); - } - } - - @Test - public void testAllocator_unclaimedReservation() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("unclaimedReservation", 0, MAX_ALLOCATION)) { - try (final AllocationReservation reservation = childAllocator1.newReservation()) { - assertTrue(reservation.add(64L)); - } - rootAllocator.verify(); - } - } - } - - @Test - public void testAllocator_claimedReservation() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - - try (final BufferAllocator childAllocator1 = - rootAllocator.newChildAllocator("claimedReservation", 0, MAX_ALLOCATION)) { - - try (final AllocationReservation reservation = childAllocator1.newReservation()) { - assertTrue(reservation.add(32L)); - assertTrue(reservation.add(32L)); - - final ArrowBuf arrowBuf = reservation.allocateBuffer(); - assertEquals(64, arrowBuf.capacity()); - rootAllocator.verify(); - - arrowBuf.getReferenceManager().release(); - rootAllocator.verify(); - } - rootAllocator.verify(); - } - } - } - - @Test - public void testInitReservationAndLimit() throws Exception { - try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - try (final BufferAllocator childAllocator = - rootAllocator.newChildAllocator("child", 2048, 4096)) { - assertEquals(2048, childAllocator.getInitReservation()); - assertEquals(4096, childAllocator.getLimit()); - } - } - } - - @Test - public void multiple() throws Exception { - final String owner = "test"; - try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { - - final int op = 100000; - - BufferAllocator frag1 = allocator.newChildAllocator(owner, 1500000, Long.MAX_VALUE); - BufferAllocator frag2 = allocator.newChildAllocator(owner, 500000, Long.MAX_VALUE); - - allocator.verify(); - - BufferAllocator allocator11 = frag1.newChildAllocator(owner, op, Long.MAX_VALUE); - ArrowBuf b11 = allocator11.buffer(1000000); - - allocator.verify(); - - BufferAllocator allocator12 = frag1.newChildAllocator(owner, op, Long.MAX_VALUE); - ArrowBuf b12 = allocator12.buffer(500000); - - allocator.verify(); - - BufferAllocator allocator21 = frag1.newChildAllocator(owner, op, Long.MAX_VALUE); - - allocator.verify(); - - BufferAllocator allocator22 = frag2.newChildAllocator(owner, op, Long.MAX_VALUE); - ArrowBuf b22 = allocator22.buffer(2000000); - - allocator.verify(); - - BufferAllocator frag3 = allocator.newChildAllocator(owner, 1000000, Long.MAX_VALUE); - - allocator.verify(); - - BufferAllocator allocator31 = frag3.newChildAllocator(owner, op, Long.MAX_VALUE); - ArrowBuf b31a = allocator31.buffer(200000); - - allocator.verify(); - - // Previously running operator completes - b22.getReferenceManager().release(); - - allocator.verify(); - - allocator22.close(); - - b31a.getReferenceManager().release(); - allocator31.close(); - - b12.getReferenceManager().release(); - allocator12.close(); - - allocator21.close(); - - b11.getReferenceManager().release(); - allocator11.close(); - - frag1.close(); - frag2.close(); - frag3.close(); - } - } - - // This test needs to run in non-debug mode. So disabling the assertion status through class - // loader for this. - // The test passes if run individually with -Dtest=TestBaseAllocator#testMemoryLeakWithReservation - // but fails generally since the assertion status cannot be changed once the class is initialized. - // So setting the test to @ignore - @Test - @Disabled - public void testMemoryLeakWithReservation() throws Exception { - // disabling assertion status - AssertionUtil.class - .getClassLoader() - .setClassAssertionStatus(AssertionUtil.class.getName(), false); - - try (RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) { - ChildAllocator childAllocator1 = - (ChildAllocator) rootAllocator.newChildAllocator("child1", 1024, MAX_ALLOCATION); - rootAllocator.verify(); - - ChildAllocator childAllocator2 = - (ChildAllocator) childAllocator1.newChildAllocator("child2", 1024, MAX_ALLOCATION); - rootAllocator.verify(); - - childAllocator2.buffer(256); - - Exception exception = assertThrows(IllegalStateException.class, childAllocator2::close); - String exMessage = exception.getMessage(); - assertTrue(exMessage.contains("Memory leaked: (256)")); - - exception = assertThrows(IllegalStateException.class, childAllocator1::close); - exMessage = exception.getMessage(); - assertTrue(exMessage.contains("Memory leaked: (256)")); - } - } - - @Test - public void testOverlimit() { - try (BufferAllocator allocator = new RootAllocator(1024)) { - try (BufferAllocator child1 = allocator.newChildAllocator("ChildA", 0, 1024); - BufferAllocator child2 = allocator.newChildAllocator("ChildB", 1024, 1024)) { - assertThrows( - OutOfMemoryException.class, - () -> { - ArrowBuf buf1 = child1.buffer(8); - buf1.close(); - }); - assertEquals(0, child1.getAllocatedMemory()); - assertEquals(0, child2.getAllocatedMemory()); - assertEquals(1024, allocator.getAllocatedMemory()); - } - } - } - - @Test - public void testOverlimitOverflow() { - // Regression test for https://github.com/apache/arrow/issues/35960 - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { - try (BufferAllocator child1 = allocator.newChildAllocator("ChildA", 0, Long.MAX_VALUE); - BufferAllocator child2 = - allocator.newChildAllocator("ChildB", Long.MAX_VALUE, Long.MAX_VALUE)) { - assertThrows( - OutOfMemoryException.class, - () -> { - ArrowBuf buf1 = child1.buffer(1024); - buf1.close(); - }); - assertEquals(0, child1.getAllocatedMemory()); - assertEquals(0, child2.getAllocatedMemory()); - assertEquals(Long.MAX_VALUE, allocator.getAllocatedMemory()); - } - } - } - - public void assertEquiv(ArrowBuf origBuf, ArrowBuf newBuf) { - assertEquals(origBuf.readerIndex(), newBuf.readerIndex()); - assertEquals(origBuf.writerIndex(), newBuf.writerIndex()); - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java deleted file mode 100644 index 7d4d1e7b67260..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBoundaryChecking.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.lang.reflect.Field; -import java.net.URLClassLoader; -import org.junit.jupiter.api.Test; - -/** Test cases for evaluating the value of {@link BoundsChecking#BOUNDS_CHECKING_ENABLED}. */ -public class TestBoundaryChecking { - - /** - * Get a copy of the current class loader. - * - * @return the newly created class loader. - */ - private ClassLoader copyClassLoader() { - ClassLoader curClassLoader = this.getClass().getClassLoader(); - if (curClassLoader instanceof URLClassLoader) { - // for Java 1.8 - return new URLClassLoader(((URLClassLoader) curClassLoader).getURLs(), null); - } - - // for Java 1.9 and Java 11. - return null; - } - - /** - * Get the value of flag {@link BoundsChecking#BOUNDS_CHECKING_ENABLED}. - * - * @param classLoader the class loader from which to get the flag value. - * @return value of the flag. - */ - private boolean getFlagValue(ClassLoader classLoader) throws Exception { - Class clazz = classLoader.loadClass("org.apache.arrow.memory.BoundsChecking"); - Field field = clazz.getField("BOUNDS_CHECKING_ENABLED"); - return (Boolean) field.get(null); - } - - /** - * Ensure the flag for bounds checking is enabled by default. This will protect users from JVM - * crashes. - */ - @Test - public void testDefaultValue() throws Exception { - ClassLoader classLoader = copyClassLoader(); - if (classLoader != null) { - boolean boundsCheckingEnabled = getFlagValue(classLoader); - assertTrue(boundsCheckingEnabled); - } - } - - /** - * Test setting the bounds checking flag by the old property. - * - * @throws Exception if loading class {@link BoundsChecking#BOUNDS_CHECKING_ENABLED} fails. - */ - @Test - public void testEnableOldProperty() throws Exception { - String savedOldProperty = System.getProperty("drill.enable_unsafe_memory_access"); - System.setProperty("drill.enable_unsafe_memory_access", "true"); - - ClassLoader classLoader = copyClassLoader(); - if (classLoader != null) { - boolean boundsCheckingEnabled = getFlagValue(classLoader); - assertFalse(boundsCheckingEnabled); - } - - // restore system property - if (savedOldProperty != null) { - System.setProperty("drill.enable_unsafe_memory_access", savedOldProperty); - } else { - System.clearProperty("drill.enable_unsafe_memory_access"); - } - } - - /** - * Test setting the bounds checking flag by the new property. - * - * @throws Exception if loading class {@link BoundsChecking#BOUNDS_CHECKING_ENABLED} fails. - */ - @Test - public void testEnableNewProperty() throws Exception { - String savedNewProperty = System.getProperty("arrow.enable_unsafe_memory_access"); - - System.setProperty("arrow.enable_unsafe_memory_access", "true"); - - ClassLoader classLoader = copyClassLoader(); - if (classLoader != null) { - boolean boundsCheckingEnabled = getFlagValue(classLoader); - assertFalse(boundsCheckingEnabled); - } - - // restore system property - if (savedNewProperty != null) { - System.setProperty("arrow.enable_unsafe_memory_access", savedNewProperty); - } else { - System.clearProperty("arrow.enable_unsafe_memory_access"); - } - } - - /** - * Test setting the bounds checking flag by both old and new properties. In this case, the new - * property should take precedence. - * - * @throws Exception if loading class {@link BoundsChecking#BOUNDS_CHECKING_ENABLED} fails. - */ - @Test - public void testEnableBothProperties() throws Exception { - String savedOldProperty = System.getProperty("drill.enable_unsafe_memory_access"); - String savedNewProperty = System.getProperty("arrow.enable_unsafe_memory_access"); - - System.setProperty("drill.enable_unsafe_memory_access", "false"); - System.setProperty("arrow.enable_unsafe_memory_access", "true"); - - // new property takes precedence. - ClassLoader classLoader = copyClassLoader(); - if (classLoader != null) { - boolean boundsCheckingEnabled = getFlagValue(classLoader); - assertFalse(boundsCheckingEnabled); - } - - // restore system property - if (savedOldProperty != null) { - System.setProperty("drill.enable_unsafe_memory_access", savedOldProperty); - } else { - System.clearProperty("drill.enable_unsafe_memory_access"); - } - - if (savedNewProperty != null) { - System.setProperty("arrow.enable_unsafe_memory_access", savedNewProperty); - } else { - System.clearProperty("arrow.enable_unsafe_memory_access"); - } - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java deleted file mode 100644 index b19453df5e109..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.memory.util.MemoryUtil; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestForeignAllocation { - BufferAllocator allocator; - - @BeforeEach - public void before() { - allocator = new RootAllocator(); - } - - @AfterEach - public void after() { - allocator.close(); - } - - @Test - public void wrapForeignAllocation() { - final long bufferSize = 16; - UnsafeForeignAllocation allocation = new UnsafeForeignAllocation(bufferSize); - try { - assertEquals(0, allocator.getAllocatedMemory()); - ArrowBuf buf = allocator.wrapForeignAllocation(allocation); - assertEquals(bufferSize, buf.capacity()); - buf.close(); - assertTrue(allocation.released); - } finally { - allocation.release0(); - } - assertEquals(0, allocator.getAllocatedMemory()); - } - - @Test - public void wrapForeignAllocationWithAllocationListener() { - final long bufferSize = 16; - - final CountingAllocationListener listener = new CountingAllocationListener(); - try (BufferAllocator listenedAllocator = - allocator.newChildAllocator("child", listener, 0L, allocator.getLimit())) { - UnsafeForeignAllocation allocation = new UnsafeForeignAllocation(bufferSize); - try { - assertEquals(0, listenedAllocator.getAllocatedMemory()); - ArrowBuf buf = listenedAllocator.wrapForeignAllocation(allocation); - assertEquals(bufferSize, buf.capacity()); - assertEquals(16, listener.getCurrentMem()); - buf.close(); - assertEquals(0, listener.getCurrentMem()); - assertTrue(allocation.released); - } finally { - allocation.release0(); - } - assertEquals(0, listenedAllocator.getAllocatedMemory()); - } - assertEquals(1, listener.getNumPreCalls()); - assertEquals(1, listener.getNumCalls()); - assertEquals(1, listener.getNumReleaseCalls()); - assertEquals(16, listener.getTotalMem()); - } - - @Test - public void wrapForeignAllocationFailedWithAllocationListener() { - assertThrows( - OutOfMemoryException.class, - () -> { - final long bufferSize = 16; - final long limit = bufferSize - 1; - final CountingAllocationListener listener = new CountingAllocationListener(); - try (BufferAllocator listenedAllocator = - allocator.newChildAllocator("child", listener, 0L, limit)) { - UnsafeForeignAllocation allocation = new UnsafeForeignAllocation(bufferSize); - try { - assertEquals(0, listenedAllocator.getAllocatedMemory()); - ArrowBuf buf = listenedAllocator.wrapForeignAllocation(allocation); - assertEquals(bufferSize, buf.capacity()); - buf.close(); - assertTrue(allocation.released); - } finally { - allocation.release0(); - } - } - }); - } - - @Test - public void wrapForeignAllocationWithAllocationListenerReclaimingSpace() { - final long bufferSize = 16; - final long limit = 2 * bufferSize - 1; - - final List buffersToBeFreed = new ArrayList<>(); - final AllocationListener listener = - new AllocationListener() { - @Override - public boolean onFailedAllocation(long size, AllocationOutcome outcome) { - buffersToBeFreed.forEach(ArrowBuf::close); - return true; - } - }; - - try (BufferAllocator listenedAllocator = - allocator.newChildAllocator("child", listener, 0L, limit)) { - final ArrowBuf buffer1 = listenedAllocator.buffer(bufferSize); - buffersToBeFreed.add(buffer1); - UnsafeForeignAllocation allocation = new UnsafeForeignAllocation(bufferSize); - try (final ArrowBuf buffer2 = listenedAllocator.wrapForeignAllocation(allocation)) { - assertEquals(bufferSize, buffer2.capacity()); - assertEquals( - 0, buffer1.getReferenceManager().getRefCount()); // buffer1 was closed by listener - } - } - } - - private static class UnsafeForeignAllocation extends ForeignAllocation { - boolean released = false; - - public UnsafeForeignAllocation(long bufferSize) { - super(bufferSize, MemoryUtil.allocateMemory(bufferSize)); - } - - @Override - protected void release0() { - if (!released) { - MemoryUtil.freeMemory(memoryAddress()); - released = true; - } - } - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java deleted file mode 100644 index 5fbf6a55dc53f..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.Test; - -/** To test simplified implementation of IdentityHashMap. */ -public class TestLowCostIdentityHashMap { - - @Test - public void testIdentityHashMap() { - LowCostIdentityHashMap hashMap = new LowCostIdentityHashMap<>(); - - StringWithKey obj1 = new StringWithKey("s1key", "s1value"); - StringWithKey obj2 = new StringWithKey("s2key", "s2value"); - StringWithKey obj3 = new StringWithKey("s3key", "s3value"); - StringWithKey obj4 = new StringWithKey("s1key", "s4value"); - StringWithKey obj5 = new StringWithKey("s5key", "s5value"); - - assertNull(hashMap.put(obj1)); - assertNull(hashMap.put(obj2)); - assertNull(hashMap.put(obj3)); - assertEquals(obj1, hashMap.put(obj4)); - assertNull(hashMap.put(obj5)); - - assertEquals(4, hashMap.size()); - - assertEquals(obj4, hashMap.get("s1key")); - - assertNull(hashMap.remove("abc")); - - assertEquals(obj3, hashMap.remove("s3key")); - - assertEquals(3, hashMap.size()); - - assertFalse(hashMap.isEmpty()); - - StringWithKey nextValue = hashMap.getNextValue(); - - assertNotNull(nextValue); - - assertTrue( - (hashMap.get("s1key") == nextValue - || hashMap.get("s2key") == nextValue - || hashMap.get("s5key") == nextValue)); - - assertTrue(hashMap.containsValue(obj4)); - assertTrue(hashMap.containsValue(obj2)); - assertTrue(hashMap.containsValue(obj5)); - - assertEquals(obj4, hashMap.remove("s1key")); - - nextValue = hashMap.getNextValue(); - - assertNotNull(nextValue); - - assertTrue(hashMap.get("s2key") == nextValue || hashMap.get("s5key") == nextValue); - - assertEquals(2, hashMap.size()); - - assertEquals(obj2, hashMap.remove("s2key")); - assertEquals(obj5, hashMap.remove("s5key")); - - assertEquals(0, hashMap.size()); - - assertTrue(hashMap.isEmpty()); - } - - @Test - public void testLargeMap() { - LowCostIdentityHashMap hashMap = new LowCostIdentityHashMap<>(); - - String[] keys = new String[200]; - for (int i = 0; i < 200; i++) { - keys[i] = "s" + i + "key"; - } - - for (int i = 0; i < 100; i++) { - if (i % 5 == 0 && i != 0) { - StringWithKey obj = new StringWithKey(keys[i - 5], "s" + i + "value"); - StringWithKey retObj = hashMap.put(obj); - assertNotNull(retObj); - StringWithKey obj1 = new StringWithKey(keys[i], "s" + 2 * i + "value"); - StringWithKey retObj1 = hashMap.put(obj1); - assertNull(retObj1); - } else { - StringWithKey obj = new StringWithKey(keys[i], "s" + i + "value"); - StringWithKey retObj = hashMap.put(obj); - assertNull(retObj); - } - } - assertEquals(100, hashMap.size()); - for (int i = 0; i < 100; i++) { - StringWithKey returnObj = hashMap.get(keys[i]); - assertNotNull(returnObj); - if (i == 95) { - assertEquals("s190value", returnObj.getValue()); - continue; - } - if (i % 5 == 0) { - assertEquals("s" + (i + 5) + "value", returnObj.getValue()); - } else { - assertEquals("s" + i + "value", returnObj.getValue()); - } - } - - for (int i = 0; i < 100; i++) { - if (i % 4 == 0) { - StringWithKey returnObj = hashMap.remove(keys[i]); - assertNotNull(returnObj); - assertFalse(hashMap.containsKey(keys[i])); - } - StringWithKey obj = new StringWithKey(keys[100 + i], "s" + (100 + i) + "value"); - StringWithKey retObj = hashMap.put(obj); - assertNull(retObj); - assertTrue(hashMap.containsKey(keys[100 + i])); - } - assertEquals(175, hashMap.size()); - for (int i = 0; i < 100; i++) { - StringWithKey retObj = hashMap.getNextValue(); - assertNotNull(retObj); - hashMap.remove(retObj.getKey()); - } - assertFalse(hashMap.isEmpty()); - assertEquals(75, hashMap.size()); - hashMap.clear(); - assertTrue(hashMap.isEmpty()); - } - - private static class StringWithKey implements ValueWithKeyIncluded { - - private final String myValue; - private final String myKey; - - StringWithKey(String myKey, String myValue) { - this.myKey = myKey; - this.myValue = myValue; - } - - @Override - public String getKey() { - return myKey; - } - - String getValue() { - return myValue; - } - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java deleted file mode 100644 index b5e0a71e7ee0e..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.condition.JRE.JAVA_16; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.EnabledForJreRange; - -public class TestOpens { - /** Instantiating the RootAllocator should poke MemoryUtil and fail. */ - @Test - @EnabledForJreRange(min = JAVA_16) - public void testMemoryUtilFailsLoudly() { - // This test is configured by Maven to run WITHOUT add-opens. So this should fail on JDK16+ - // (where JEP396 means that add-opens is required to access JDK internals). - // The test will likely fail in your IDE if it doesn't correctly pick this up. - Throwable e = - assertThrows( - Throwable.class, - () -> { - BufferAllocator allocator = new RootAllocator(); - allocator.close(); - }); - boolean found = false; - while (e != null) { - e = e.getCause(); - if (e instanceof RuntimeException - && e.getMessage().contains("Failed to initialize MemoryUtil")) { - found = true; - break; - } - } - assertTrue(found, "Expected exception was not thrown"); - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java deleted file mode 100644 index bb94ed71a8dee..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link ArrowBufPointer}. */ -public class TestArrowBufPointer { - - private final int BUFFER_LENGTH = 1024; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testArrowBufPointersEqual() { - try (ArrowBuf buf1 = allocator.buffer(BUFFER_LENGTH); - ArrowBuf buf2 = allocator.buffer(BUFFER_LENGTH)) { - for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf1.setInt(i * 4L, i * 1234); - buf2.setInt(i * 4L, i * 1234); - } - - ArrowBufPointer ptr1 = new ArrowBufPointer(null, 0, 100); - ArrowBufPointer ptr2 = new ArrowBufPointer(null, 100, 5032); - assertEquals(ptr1, ptr2); - for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - ptr1.set(buf1, i * 4L, 4); - ptr2.set(buf2, i * 4L, 4); - assertEquals(ptr1, ptr2); - } - } - } - - @Test - public void testArrowBufPointersHashCode() { - final int vectorLength = 100; - try (ArrowBuf buf1 = allocator.buffer(vectorLength * 4); - ArrowBuf buf2 = allocator.buffer(vectorLength * 4)) { - for (int i = 0; i < vectorLength; i++) { - buf1.setInt(i * 4L, i); - buf2.setInt(i * 4L, i); - } - - CounterHasher hasher1 = new CounterHasher(); - CounterHasher hasher2 = new CounterHasher(); - - ArrowBufPointer pointer1 = new ArrowBufPointer(hasher1); - assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer1.hashCode()); - - ArrowBufPointer pointer2 = new ArrowBufPointer(hasher2); - assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer2.hashCode()); - - for (int i = 0; i < vectorLength; i++) { - pointer1.set(buf1, i * 4L, 4); - pointer2.set(buf2, i * 4L, 4); - - assertEquals(pointer1.hashCode(), pointer2.hashCode()); - - // verify that the hash codes have been re-computed - assertEquals(hasher1.counter, i + 1); - assertEquals(hasher2.counter, i + 1); - } - } - } - - @Test - public void testNullPointersHashCode() { - ArrowBufPointer pointer = new ArrowBufPointer(); - assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer.hashCode()); - - pointer.set(null, 0, 0); - assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer.hashCode()); - } - - @Test - public void testReuseHashCode() { - try (ArrowBuf buf = allocator.buffer(10)) { - buf.setInt(0, 10); - buf.setInt(4, 20); - - CounterHasher hasher = new CounterHasher(); - ArrowBufPointer pointer = new ArrowBufPointer(hasher); - - pointer.set(buf, 0, 4); - int hashCode = pointer.hashCode(); - - // hash code computed - assertEquals(1, hasher.counter); - - // no hash code re-compute - assertEquals(hashCode, pointer.hashCode()); - assertEquals(1, hasher.counter); - - // hash code re-computed - pointer.set(buf, 4, 4); - assertNotSame(hashCode, pointer.hashCode()); - assertEquals(2, hasher.counter); - } - } - - @Test - public void testHashersForEquality() { - try (ArrowBuf buf = allocator.buffer(10)) { - // pointer 1 uses the default hasher - ArrowBufPointer pointer1 = new ArrowBufPointer(buf, 0, 10); - - // pointer 2 uses the counter hasher - ArrowBufPointer pointer2 = new ArrowBufPointer(buf, 0, 10, new CounterHasher()); - - // the two pointers cannot be equal, since they have different hashers - assertNotEquals(pointer1, pointer2); - } - } - - @Test - public void testArrowBufPointersComparison() { - final int vectorLength = 100; - try (ArrowBuf buf1 = allocator.buffer(vectorLength); - ArrowBuf buf2 = allocator.buffer(vectorLength)) { - for (int i = 0; i < vectorLength; i++) { - buf1.setByte(i, i); - buf2.setByte(i, i); - } - - ArrowBufPointer pointer1 = new ArrowBufPointer(); - ArrowBufPointer pointer2 = new ArrowBufPointer(); - - pointer1.set(buf1, 0, 10); - pointer2.set(buf2, 0, 10); - assertEquals(0, pointer1.compareTo(pointer2)); - - pointer1.set(null, 0, 0); - pointer2.set(null, 0, 0); - assertEquals(0, pointer1.compareTo(pointer2)); - - pointer2.set(buf2, 0, 5); - assertTrue(pointer1.compareTo(pointer2) < 0); - - pointer1.set(buf1, 0, 10); - assertTrue(pointer1.compareTo(pointer2) > 0); - - pointer1.set(buf1, 1, 5); - pointer2.set(buf2, 3, 8); - assertTrue(pointer1.compareTo(pointer2) < 0); - } - } - - /** - * Hasher with a counter that increments each time a hash code is calculated. This is to validate - * that the hash code in {@link ArrowBufPointer} is reused. - */ - static class CounterHasher implements ArrowBufHasher { - - protected int counter = 0; - - @Override - public int hashCode(long address, long length) { - counter += 1; - return SimpleHasher.INSTANCE.hashCode(address, length); - } - - @Override - public int hashCode(ArrowBuf buf, long offset, long length) { - counter += 1; - return SimpleHasher.INSTANCE.hashCode(buf, offset, length); - } - - @Override - public int hashCode() { - return super.hashCode(); - } - - @Override - public boolean equals(Object o) { - return o instanceof CounterHasher; - } - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java deleted file mode 100644 index 757bc85b47f21..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestByteFunctionHelpers { - - private BufferAllocator allocator; - - private static final int SIZE = 100; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testEquals() { - ArrowBuf buffer1 = allocator.buffer(SIZE); - ArrowBuf buffer2 = allocator.buffer(SIZE); - - for (int i = 0; i < SIZE; i++) { - buffer1.setByte(i, i); - buffer2.setByte(i, i); - } - - // test three cases, length>8, length>3, length<3 - - assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, SIZE - 1, buffer2, 0, SIZE - 1)); - assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, 6, buffer2, 0, 6)); - assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, 2, buffer2, 0, 2)); - - // change value at index1 - buffer1.setByte(1, 10); - - assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, SIZE - 1, buffer2, 0, SIZE - 1)); - assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, 6, buffer2, 0, 6)); - assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, 2, buffer2, 0, 2)); - - buffer1.close(); - buffer2.close(); - } - - @Test - public void testCompare() { - ArrowBuf buffer1 = allocator.buffer(SIZE); - ArrowBuf buffer2 = allocator.buffer(SIZE); - - for (int i = 0; i < SIZE; i++) { - buffer1.setByte(i, i); - buffer2.setByte(i, i); - } - - // test three cases, length>8, length>3, length<3 - - assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1, buffer2, 0, SIZE - 1)); - assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 6, buffer2, 0, 6)); - assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 2, buffer2, 0, 2)); - - // change value at index 1 - buffer1.setByte(1, 0); - - assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1, buffer2, 0, SIZE - 1)); - assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 6, buffer2, 0, 6)); - assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 2, buffer2, 0, 2)); - - buffer1.close(); - buffer2.close(); - } - - @Test - public void testStringCompare() { - String[] leftStrings = {"cat", "cats", "catworld", "dogs", "bags"}; - String[] rightStrings = {"dog", "dogs", "dogworld", "dog", "sgab"}; - - for (int i = 0; i < leftStrings.length; ++i) { - String leftStr = leftStrings[i]; - String rightStr = rightStrings[i]; - - ArrowBuf left = allocator.buffer(SIZE); - left.setBytes(0, leftStr.getBytes(StandardCharsets.UTF_8)); - ArrowBuf right = allocator.buffer(SIZE); - right.setBytes(0, rightStr.getBytes(StandardCharsets.UTF_8)); - - assertEquals( - leftStr.compareTo(rightStr) < 0 ? -1 : 1, - ByteFunctionHelpers.compare(left, 0, leftStr.length(), right, 0, rightStr.length())); - - left.close(); - right.close(); - } - } - - @Test - public void testCompareWithByteArray() { - ArrowBuf buffer1 = allocator.buffer(SIZE); - byte[] buffer2 = new byte[SIZE]; - - for (int i = 0; i < SIZE; i++) { - buffer1.setByte(i, i); - buffer2[i] = (byte) i; - } - - // test three cases, length>8, length>3, length<3 - - assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1, buffer2, 0, SIZE - 1)); - assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 6, buffer2, 0, 6)); - assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 2, buffer2, 0, 2)); - - // change value at index 1 - buffer1.setByte(1, 0); - - assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1, buffer2, 0, SIZE - 1)); - assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 6, buffer2, 0, 6)); - assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 2, buffer2, 0, 2)); - - buffer1.close(); - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestLargeMemoryUtil.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestLargeMemoryUtil.java deleted file mode 100755 index ae72e6714344e..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestLargeMemoryUtil.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; - -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.net.URLClassLoader; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -public class TestLargeMemoryUtil { - - /** - * Get a copy of the current class loader. - * - * @return the newly created class loader. - */ - private ClassLoader copyClassLoader() { - ClassLoader curClassLoader = this.getClass().getClassLoader(); - if (curClassLoader instanceof URLClassLoader) { - // for Java 1.8 - return new URLClassLoader(((URLClassLoader) curClassLoader).getURLs(), null); - } - - // for Java 1.9 and Java 11. - return null; - } - - /** - * Use the checkedCastToInt method from the current classloader. - * - * @param classLoader the class loader from which to call the method. - * @return the return value of the method. - */ - private int checkedCastToInt(ClassLoader classLoader, long value) throws Exception { - Class clazz = classLoader.loadClass("org.apache.arrow.memory.util.LargeMemoryUtil"); - Method method = clazz.getMethod("checkedCastToInt", long.class); - return (int) method.invoke(null, value); - } - - private void checkExpectedOverflow(ClassLoader classLoader, long value) { - InvocationTargetException ex = - Assertions.assertThrows( - InvocationTargetException.class, () -> checkedCastToInt(classLoader, value)); - assertInstanceOf(ArithmeticException.class, ex.getCause()); - assertEquals("integer overflow", ex.getCause().getMessage()); - } - - @Test - public void testEnableLargeMemoryUtilCheck() throws Exception { - String savedNewProperty = System.getProperty("arrow.enable_unsafe_memory_access"); - System.setProperty("arrow.enable_unsafe_memory_access", "false"); - try { - ClassLoader classLoader = copyClassLoader(); - if (classLoader != null) { - assertEquals(Integer.MAX_VALUE, checkedCastToInt(classLoader, Integer.MAX_VALUE)); - checkExpectedOverflow(classLoader, Integer.MAX_VALUE + 1L); - checkExpectedOverflow(classLoader, Integer.MIN_VALUE - 1L); - } - } finally { - // restore system property - if (savedNewProperty != null) { - System.setProperty("arrow.enable_unsafe_memory_access", savedNewProperty); - } else { - System.clearProperty("arrow.enable_unsafe_memory_access"); - } - } - } - - @Test - public void testDisabledLargeMemoryUtilCheck() throws Exception { - String savedNewProperty = System.getProperty("arrow.enable_unsafe_memory_access"); - System.setProperty("arrow.enable_unsafe_memory_access", "true"); - try { - ClassLoader classLoader = copyClassLoader(); - if (classLoader != null) { - assertEquals(Integer.MAX_VALUE, checkedCastToInt(classLoader, Integer.MAX_VALUE)); - assertEquals(Integer.MIN_VALUE, checkedCastToInt(classLoader, Integer.MAX_VALUE + 1L)); - assertEquals(Integer.MAX_VALUE, checkedCastToInt(classLoader, Integer.MIN_VALUE - 1L)); - } - } finally { - // restore system property - if (savedNewProperty != null) { - System.setProperty("arrow.enable_unsafe_memory_access", savedNewProperty); - } else { - System.clearProperty("arrow.enable_unsafe_memory_access"); - } - } - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java deleted file mode 100644 index c261fd090b923..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util.hash; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.nio.charset.StandardCharsets; -import java.util.stream.Stream; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test cases for {@link ArrowBufHasher} and its subclasses. */ -public class TestArrowBufHasher { - - private final int BUFFER_LENGTH = 1024; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @ParameterizedTest(name = "hasher = {0}") - @MethodSource("getHasher") - void testHasher(String name, ArrowBufHasher hasher) { - try (ArrowBuf buf1 = allocator.buffer(BUFFER_LENGTH); - ArrowBuf buf2 = allocator.buffer(BUFFER_LENGTH)) { - // prepare data - for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf1.setFloat(i * 4L, i / 10.0f); - buf2.setFloat(i * 4L, i / 10.0f); - } - - verifyHashCodesEqual(hasher, buf1, 0, 100, buf2, 0, 100); - verifyHashCodesEqual(hasher, buf1, 1, 5, buf2, 1, 5); - verifyHashCodesEqual(hasher, buf1, 10, 17, buf2, 10, 17); - verifyHashCodesEqual(hasher, buf1, 33, 25, buf2, 33, 25); - verifyHashCodesEqual(hasher, buf1, 22, 22, buf2, 22, 22); - verifyHashCodesEqual(hasher, buf1, 123, 333, buf2, 123, 333); - verifyHashCodesEqual(hasher, buf1, 374, 1, buf2, 374, 1); - verifyHashCodesEqual(hasher, buf1, 11, 0, buf2, 11, 0); - verifyHashCodesEqual(hasher, buf1, 75, 25, buf2, 75, 25); - verifyHashCodesEqual(hasher, buf1, 0, 1024, buf2, 0, 1024); - } - } - - private void verifyHashCodesEqual( - ArrowBufHasher hasher, - ArrowBuf buf1, - int offset1, - int length1, - ArrowBuf buf2, - int offset2, - int length2) { - int hashCode1 = hasher.hashCode(buf1, offset1, length1); - int hashCode2 = hasher.hashCode(buf2, offset2, length2); - assertEquals(hashCode1, hashCode2); - } - - @ParameterizedTest(name = "hasher = {0}") - @MethodSource("getHasher") - public void testHasherNegative(String name, ArrowBufHasher hasher) { - try (ArrowBuf buf = allocator.buffer(BUFFER_LENGTH)) { - // prepare data - for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf.setFloat(i * 4L, i / 10.0f); - } - - assertThrows(IllegalArgumentException.class, () -> hasher.hashCode(buf, 0, -1)); - - assertThrows(IndexOutOfBoundsException.class, () -> hasher.hashCode(buf, 0, 1028)); - - assertThrows(IndexOutOfBoundsException.class, () -> hasher.hashCode(buf, 500, 1000)); - } - } - - @ParameterizedTest(name = "hasher = {0}") - @MethodSource("getHasher") - public void testHasherLessThanInt(String name, ArrowBufHasher hasher) { - try (ArrowBuf buf1 = allocator.buffer(4); - ArrowBuf buf2 = allocator.buffer(4)) { - buf1.writeBytes("foo1".getBytes(StandardCharsets.UTF_8)); - buf2.writeBytes("bar2".getBytes(StandardCharsets.UTF_8)); - - for (int i = 1; i <= 4; i++) { - verifyHashCodeNotEqual(hasher, buf1, i, buf2, i); - } - } - } - - private void verifyHashCodeNotEqual( - ArrowBufHasher hasher, ArrowBuf buf1, int length1, ArrowBuf buf2, int length2) { - int hashCode1 = hasher.hashCode(buf1, 0, length1); - int hashCode2 = hasher.hashCode(buf2, 0, length2); - assertNotEquals(hashCode1, hashCode2); - } - - private static Stream getHasher() { - return Stream.of( - Arguments.of(SimpleHasher.class.getSimpleName(), SimpleHasher.INSTANCE), - Arguments.of(MurmurHasher.class.getSimpleName(), new MurmurHasher())); - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java b/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java deleted file mode 100644 index 20052e42b367f..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestCollections2.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.fail; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import org.junit.jupiter.api.Test; - -/** Tests for {@code Collections2} class. */ -public class TestCollections2 { - - @Test - public void testToImmutableListFromIterable() { - final List source = new ArrayList<>(Arrays.asList("foo", "bar", "baz")); - - final List copy = Collections2.toImmutableList(source); - assertEquals(source, copy); - - try { - copy.add("unexpected"); - fail("add operation should not be supported"); - } catch (UnsupportedOperationException ignored) { - } - - try { - copy.set(0, "unexpected"); - fail("set operation should not be supported"); - } catch (UnsupportedOperationException ignored) { - } - - try { - copy.remove(0); - fail("remove operation should not be supported"); - } catch (UnsupportedOperationException ignored) { - } - - source.set(1, "newvalue"); - source.add("anothervalue"); - - assertEquals("bar", copy.get(1)); - assertEquals(3, copy.size()); - } - - @Test - public void testStringFromEmptyIterator() { - assertEquals("[]", Collections2.toString(Collections.emptyIterator())); - } - - @Test - public void testStringFromIterator() { - Iterator iterator = Arrays.asList("foo", "bar", "baz").iterator(); - iterator.next(); - - assertEquals("[bar, baz]", Collections2.toString(iterator)); - assertFalse(iterator.hasNext()); - } -} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestStackTrace.java b/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestStackTrace.java deleted file mode 100644 index cf1e69a7b6577..0000000000000 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/util/TestStackTrace.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.util; - -import org.apache.arrow.memory.util.StackTrace; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -public class TestStackTrace { - /** Check that the stack trace includes the origin line. */ - @Test - public void testStackTraceComplete() { - final String stackTrace = new StackTrace().toString(); - Assertions.assertTrue(stackTrace.contains("TestStackTrace.testStackTraceComplete"), stackTrace); - } - - /** Check that the stack trace doesn't include getStackTrace or StackTrace. */ - @Test - public void testStackTraceOmit() { - final String stackTrace = new StackTrace().toString(); - Assertions.assertFalse(stackTrace.contains("Thread.getStackTrace"), stackTrace); - Assertions.assertFalse( - stackTrace.contains("org.apache.arrow.memory.util.StackTrace"), stackTrace); - } -} diff --git a/java/memory/memory-core/src/test/resources/logback.xml b/java/memory/memory-core/src/test/resources/logback.xml deleted file mode 100644 index 4c54d18a210ff..0000000000000 --- a/java/memory/memory-core/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/memory/memory-netty-buffer-patch/pom.xml b/java/memory/memory-netty-buffer-patch/pom.xml deleted file mode 100644 index 07dc7d2403d9e..0000000000000 --- a/java/memory/memory-netty-buffer-patch/pom.xml +++ /dev/null @@ -1,50 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-memory - 19.0.0-SNAPSHOT - - - arrow-memory-netty-buffer-patch - Arrow Memory - Netty Buffer - Netty Buffer needed to patch that is consumed by Arrow Memory Netty - - - - org.apache.arrow - arrow-memory-core - - - io.netty - netty-buffer - - - io.netty - netty-common - - - org.slf4j - slf4j-api - - - diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/ExpandableByteBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/ExpandableByteBuf.java deleted file mode 100644 index ef8afcac1f94f..0000000000000 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/ExpandableByteBuf.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -import org.apache.arrow.memory.BufferAllocator; - -/** - * Allows us to decorate ArrowBuf to make it expandable so that we can use them in the context of - * the Netty framework (thus supporting RPC level memory accounting). - */ -public class ExpandableByteBuf extends MutableWrappedByteBuf { - - private final BufferAllocator allocator; - - public ExpandableByteBuf(ByteBuf buffer, BufferAllocator allocator) { - super(buffer); - this.allocator = allocator; - } - - @Override - public ByteBuf copy(int index, int length) { - return new ExpandableByteBuf(buffer.copy(index, length), allocator); - } - - @Override - public ByteBuf capacity(int newCapacity) { - if (newCapacity > capacity()) { - ByteBuf newBuf = NettyArrowBuf.unwrapBuffer(allocator.buffer(newCapacity)); - newBuf.writeBytes(buffer, 0, buffer.capacity()); - newBuf.readerIndex(buffer.readerIndex()); - newBuf.writerIndex(buffer.writerIndex()); - buffer.release(); - buffer = newBuf; - return newBuf; - } else { - return super.capacity(newCapacity); - } - } -} diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/LargeBuffer.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/LargeBuffer.java deleted file mode 100644 index c29f1f0b2ad85..0000000000000 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/LargeBuffer.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -/** - * A MutableWrappedByteBuf that also maintains a metric of the number of huge buffer bytes and - * counts. - */ -public class LargeBuffer extends MutableWrappedByteBuf { - - public LargeBuffer(ByteBuf buffer) { - super(buffer); - } - - @Override - public ByteBuf copy(int index, int length) { - return new LargeBuffer(buffer.copy(index, length)); - } -} diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java deleted file mode 100644 index 1db4645a47311..0000000000000 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -import io.netty.util.ByteProcessor; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.FileChannel; -import java.nio.channels.GatheringByteChannel; -import java.nio.channels.ScatteringByteChannel; - -/** - * This is basically a complete copy of netty's DuplicatedByteBuf. We copy because we want to - * override some behaviors and make buffer mutable. - */ -abstract class MutableWrappedByteBuf extends AbstractByteBuf { - - ByteBuf buffer; - - public MutableWrappedByteBuf(ByteBuf buffer) { - super(buffer.maxCapacity()); - - if (buffer instanceof MutableWrappedByteBuf) { - this.buffer = ((MutableWrappedByteBuf) buffer).buffer; - } else { - this.buffer = buffer; - } - - setIndex(buffer.readerIndex(), buffer.writerIndex()); - } - - @Override - public ByteBuffer nioBuffer(int index, int length) { - return unwrap().nioBuffer(index, length); - } - - @Override - public ByteBuf unwrap() { - return buffer; - } - - @Override - public ByteBufAllocator alloc() { - return buffer.alloc(); - } - - @Override - public ByteOrder order() { - return buffer.order(); - } - - @Override - public boolean isDirect() { - return buffer.isDirect(); - } - - @Override - public int capacity() { - return buffer.capacity(); - } - - @Override - public ByteBuf capacity(int newCapacity) { - buffer.capacity(newCapacity); - return this; - } - - @Override - public boolean hasArray() { - return buffer.hasArray(); - } - - @Override - public byte[] array() { - return buffer.array(); - } - - @Override - public int arrayOffset() { - return buffer.arrayOffset(); - } - - @Override - public boolean hasMemoryAddress() { - return buffer.hasMemoryAddress(); - } - - @Override - public long memoryAddress() { - return buffer.memoryAddress(); - } - - @Override - public byte getByte(int index) { - return _getByte(index); - } - - @Override - protected byte _getByte(int index) { - return buffer.getByte(index); - } - - @Override - public short getShort(int index) { - return _getShort(index); - } - - @Override - protected short _getShort(int index) { - return buffer.getShort(index); - } - - @Override - public short getShortLE(int index) { - return buffer.getShortLE(index); - } - - @Override - protected short _getShortLE(int index) { - return buffer.getShortLE(index); - } - - @Override - public int getUnsignedMedium(int index) { - return _getUnsignedMedium(index); - } - - @Override - protected int _getUnsignedMedium(int index) { - return buffer.getUnsignedMedium(index); - } - - @Override - public int getUnsignedMediumLE(int index) { - return buffer.getUnsignedMediumLE(index); - } - - @Override - protected int _getUnsignedMediumLE(int index) { - return buffer.getUnsignedMediumLE(index); - } - - @Override - public int getInt(int index) { - return _getInt(index); - } - - @Override - protected int _getInt(int index) { - return buffer.getInt(index); - } - - @Override - public int getIntLE(int index) { - return buffer.getIntLE(index); - } - - @Override - protected int _getIntLE(int index) { - return buffer.getIntLE(index); - } - - @Override - public long getLong(int index) { - return _getLong(index); - } - - @Override - protected long _getLong(int index) { - return buffer.getLong(index); - } - - @Override - public long getLongLE(int index) { - return buffer.getLongLE(index); - } - - @Override - protected long _getLongLE(int index) { - return buffer.getLongLE(index); - } - - @Override - public abstract ByteBuf copy(int index, int length); - - @Override - public ByteBuf slice(int index, int length) { - return new SlicedByteBuf(this, index, length); - } - - @Override - public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { - buffer.getBytes(index, dst, dstIndex, length); - return this; - } - - @Override - public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) { - buffer.getBytes(index, dst, dstIndex, length); - return this; - } - - @Override - public ByteBuf getBytes(int index, ByteBuffer dst) { - buffer.getBytes(index, dst); - return this; - } - - @Override - public ByteBuf setByte(int index, int value) { - _setByte(index, value); - return this; - } - - @Override - protected void _setByte(int index, int value) { - buffer.setByte(index, value); - } - - @Override - public ByteBuf setShort(int index, int value) { - _setShort(index, value); - return this; - } - - @Override - protected void _setShort(int index, int value) { - buffer.setShort(index, value); - } - - @Override - public ByteBuf setShortLE(int index, int value) { - buffer.setShortLE(index, value); - return this; - } - - @Override - protected void _setShortLE(int index, int value) { - buffer.setShortLE(index, value); - } - - @Override - public ByteBuf setMedium(int index, int value) { - _setMedium(index, value); - return this; - } - - @Override - protected void _setMedium(int index, int value) { - buffer.setMedium(index, value); - } - - @Override - public ByteBuf setMediumLE(int index, int value) { - buffer.setMediumLE(index, value); - return this; - } - - @Override - protected void _setMediumLE(int index, int value) { - buffer.setMediumLE(index, value); - } - - @Override - public ByteBuf setInt(int index, int value) { - _setInt(index, value); - return this; - } - - @Override - protected void _setInt(int index, int value) { - buffer.setInt(index, value); - } - - @Override - public ByteBuf setIntLE(int index, int value) { - buffer.setIntLE(index, value); - return this; - } - - @Override - protected void _setIntLE(int index, int value) { - buffer.setIntLE(index, value); - } - - @Override - public ByteBuf setLong(int index, long value) { - _setLong(index, value); - return this; - } - - @Override - protected void _setLong(int index, long value) { - buffer.setLong(index, value); - } - - @Override - public ByteBuf setLongLE(int index, long value) { - buffer.setLongLE(index, value); - return this; - } - - @Override - protected void _setLongLE(int index, long value) { - buffer.setLongLE(index, value); - } - - @Override - public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) { - buffer.setBytes(index, src, srcIndex, length); - return this; - } - - @Override - public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { - buffer.setBytes(index, src, srcIndex, length); - return this; - } - - @Override - public ByteBuf setBytes(int index, ByteBuffer src) { - buffer.setBytes(index, src); - return this; - } - - @Override - public int setBytes(int index, FileChannel in, long position, int length) throws IOException { - return buffer.setBytes(index, in, position, length); - } - - @Override - public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { - buffer.getBytes(index, out, length); - return this; - } - - @Override - public int getBytes(int index, GatheringByteChannel out, int length) throws IOException { - return buffer.getBytes(index, out, length); - } - - @Override - public int setBytes(int index, InputStream in, int length) throws IOException { - return buffer.setBytes(index, in, length); - } - - @Override - public int setBytes(int index, ScatteringByteChannel in, int length) throws IOException { - return buffer.setBytes(index, in, length); - } - - @Override - public int getBytes(int index, FileChannel out, long position, int length) throws IOException { - return buffer.getBytes(index, out, position, length); - } - - @Override - public int nioBufferCount() { - return buffer.nioBufferCount(); - } - - @Override - public ByteBuffer[] nioBuffers(int index, int length) { - return buffer.nioBuffers(index, length); - } - - @Override - public ByteBuffer internalNioBuffer(int index, int length) { - return nioBuffer(index, length); - } - - @Override - public int forEachByte(int index, int length, ByteProcessor processor) { - return buffer.forEachByte(index, length, processor); - } - - @Override - public int forEachByteDesc(int index, int length, ByteProcessor processor) { - return buffer.forEachByteDesc(index, length, processor); - } - - @Override - public final int refCnt() { - return unwrap().refCnt(); - } - - @Override - public final ByteBuf touch() { - unwrap().touch(); - return this; - } - - @Override - public final ByteBuf touch(Object hint) { - unwrap().touch(hint); - return this; - } - - @Override - public final ByteBuf retain() { - unwrap().retain(); - return this; - } - - @Override - public final ByteBuf retain(int increment) { - unwrap().retain(increment); - return this; - } - - @Override - public boolean release() { - return release(1); - } - - @Override - public boolean release(int decrement) { - boolean released = unwrap().release(decrement); - return released; - } -} diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java deleted file mode 100644 index 9319d15aaa9a9..0000000000000 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java +++ /dev/null @@ -1,634 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -import io.netty.util.internal.PlatformDependent; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.FileChannel; -import java.nio.channels.GatheringByteChannel; -import java.nio.channels.ScatteringByteChannel; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BoundsChecking; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.patch.ArrowByteBufAllocator; -import org.apache.arrow.memory.util.LargeMemoryUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.util.VisibleForTesting; - -/** Netty specific wrapper over ArrowBuf for use in Netty framework. */ -public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable { - - private final ArrowBuf arrowBuf; - private final ArrowByteBufAllocator arrowByteBufAllocator; - private long length; - private final long address; - - /** - * Constructs a new instance. - * - * @param arrowBuf The buffer to wrap. - * @param bufferAllocator The allocator for the buffer. - * @param length The length of this buffer. - * @deprecated Use {@link #NettyArrowBuf(ArrowBuf, BufferAllocator, long)} instead. - */ - @Deprecated(forRemoval = true) - public NettyArrowBuf( - final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final int length) { - this(arrowBuf, bufferAllocator, (long) length); - } - - /** - * Constructs a new instance. - * - * @param arrowBuf The buffer to wrap. - * @param bufferAllocator The allocator for the buffer. - * @param length The length of this buffer. - */ - public NettyArrowBuf( - final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final long length) { - super((int) length); - this.arrowBuf = arrowBuf; - this.arrowByteBufAllocator = new ArrowByteBufAllocator(bufferAllocator); - this.length = length; - this.address = arrowBuf.memoryAddress(); - } - - @Override - public ByteBuf copy() { - throw new UnsupportedOperationException(); - } - - @Override - public ByteBuf copy(int index, int length) { - throw new UnsupportedOperationException(); - } - - @Override - public ByteBuf retain() { - arrowBuf.getReferenceManager().retain(); - return this; - } - - public ArrowBuf arrowBuf() { - return arrowBuf; - } - - @Override - public ByteBuf retain(final int increment) { - arrowBuf.getReferenceManager().retain(increment); - return this; - } - - @Override - public boolean isDirect() { - return true; - } - - @Override - public synchronized ByteBuf capacity(int newCapacity) { - if (newCapacity == length) { - return this; - } - Preconditions.checkArgument(newCapacity >= 0); - if (newCapacity < length) { - length = newCapacity; - return this; - } - throw new UnsupportedOperationException( - "Buffers don't support resizing that increases the size."); - } - - @Override - public ByteBuf unwrap() { - - // According to Netty's ByteBuf interface, unwrap() should return null if the buffer cannot be - // unwrapped - // https://github.com/netty/netty/blob/9fe796e10a433b6cd20ad78b2c39cd56b86ccd2e/buffer/src/main/java/io/netty/buffer/ByteBuf.java#L305 - - // Throwing here breaks toString() in AbstractByteBuf - // Since toString() is used to build debug / error messages, this can cause strange behavior - - return null; - } - - @Override - public int refCnt() { - return arrowBuf.getReferenceManager().getRefCount(); - } - - @Override - public ArrowByteBufAllocator alloc() { - return arrowByteBufAllocator; - } - - @Override - public boolean hasArray() { - return false; - } - - @Override - public byte[] array() { - throw new UnsupportedOperationException("Operation not supported on direct buffer"); - } - - @Override - public int arrayOffset() { - throw new UnsupportedOperationException("Operation not supported on direct buffer"); - } - - @Override - public boolean hasMemoryAddress() { - return true; - } - - @Override - public long memoryAddress() { - return this.address; - } - - @Override - public ByteBuf touch() { - return this; - } - - @Override - public ByteBuf touch(Object hint) { - return this; - } - - @Override - public int capacity() { - return (int) Math.min(Integer.MAX_VALUE, arrowBuf.capacity()); - } - - @Override - public NettyArrowBuf slice() { - return unwrapBuffer(arrowBuf.slice(readerIndex, writerIndex - readerIndex)); - } - - @Override - public NettyArrowBuf slice(int index, int length) { - return unwrapBuffer(arrowBuf.slice(index, length)); - } - - @Override - public void close() { - arrowBuf.close(); - } - - @Override - public boolean release() { - return arrowBuf.getReferenceManager().release(); - } - - @Override - public boolean release(int decrement) { - return arrowBuf.getReferenceManager().release(decrement); - } - - @Override - public NettyArrowBuf readerIndex(int readerIndex) { - super.readerIndex(readerIndex); - return this; - } - - @Override - public NettyArrowBuf writerIndex(int writerIndex) { - super.writerIndex(writerIndex); - return this; - } - - @Override - public int nioBufferCount() { - return 1; - } - - @Override - public ByteBuffer internalNioBuffer(int index, int length) { - ByteBuffer nioBuf = getDirectBuffer(index); - // Follows convention from other ByteBuf implementations. - return (ByteBuffer) nioBuf.clear().limit(length); - } - - @Override - public ByteBuffer[] nioBuffers() { - return new ByteBuffer[] {nioBuffer()}; - } - - @Override - public ByteBuffer[] nioBuffers(int index, int length) { - return new ByteBuffer[] {nioBuffer(index, length)}; - } - - @Override - public ByteBuffer nioBuffer() { - return nioBuffer(readerIndex(), readableBytes()); - } - - /** - * Returns a buffer that is zero positioned but points to a slice of the original buffer starting - * at given index. - */ - @Override - public ByteBuffer nioBuffer(int index, int length) { - chk(index, length); - final ByteBuffer buffer = getDirectBuffer(index); - buffer.limit(length); - return buffer; - } - - /** - * Returns a buffer that is zero positioned but points to a slice of the original buffer starting - * at given index. - */ - public ByteBuffer nioBuffer(long index, int length) { - chk(index, length); - final ByteBuffer buffer = getDirectBuffer(index); - buffer.limit(length); - return buffer; - } - - /** - * Get this ArrowBuf as a direct {@link ByteBuffer}. - * - * @return ByteBuffer - */ - private ByteBuffer getDirectBuffer(long index) { - return PlatformDependent.directBuffer( - addr(index), LargeMemoryUtil.checkedCastToInt(length - index)); - } - - @Override - public ByteBuf getBytes(int index, ByteBuffer dst) { - arrowBuf.getBytes(index, dst); - return this; - } - - @Override - public ByteBuf setBytes(int index, ByteBuffer src) { - arrowBuf.setBytes(index, src); - return this; - } - - @Override - public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) { - arrowBuf.getBytes(index, dst, dstIndex, length); - return this; - } - - @Override - public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) { - arrowBuf.setBytes(index, src, srcIndex, length); - return this; - } - - /** - * Determine if the requested {@code index} and {@code length} will fit within {@code capacity}. - * - * @param index The starting index. - * @param length The length which will be utilized (starting from {@code index}). - * @param capacity The capacity that {@code index + length} is allowed to be within. - * @return {@code true} if the requested {@code index} and {@code length} will fit within {@code - * capacity}. {@code false} if this would result in an index out of bounds exception. - */ - private static boolean isOutOfBounds(int index, int length, int capacity) { - return (index | length | (index + length) | (capacity - (index + length))) < 0; - } - - @Override - public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { - chk(index, length); - Preconditions.checkArgument(dst != null, "Expecting valid dst ByteBuffer"); - if (isOutOfBounds(dstIndex, length, dst.capacity())) { - throw new IndexOutOfBoundsException("dstIndex: " + dstIndex + " length: " + length); - } else { - final long srcAddress = addr(index); - if (dst.hasMemoryAddress()) { - final long dstAddress = dst.memoryAddress() + (long) dstIndex; - PlatformDependent.copyMemory(srcAddress, dstAddress, (long) length); - } else if (dst.hasArray()) { - dstIndex += dst.arrayOffset(); - PlatformDependent.copyMemory(srcAddress, dst.array(), dstIndex, (long) length); - } else { - dst.setBytes(dstIndex, this, index, length); - } - } - return this; - } - - @Override - public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { - chk(index, length); - Preconditions.checkArgument(src != null, "Expecting valid src ByteBuffer"); - if (isOutOfBounds(srcIndex, length, src.capacity())) { - throw new IndexOutOfBoundsException("srcIndex: " + srcIndex + " length: " + length); - } else { - if (length != 0) { - final long dstAddress = addr(index); - if (src.hasMemoryAddress()) { - final long srcAddress = src.memoryAddress() + (long) srcIndex; - PlatformDependent.copyMemory(srcAddress, dstAddress, (long) length); - } else if (src.hasArray()) { - srcIndex += src.arrayOffset(); - PlatformDependent.copyMemory(src.array(), srcIndex, dstAddress, (long) length); - } else { - src.getBytes(srcIndex, this, index, length); - } - } - } - return this; - } - - @Override - public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { - arrowBuf.getBytes(index, out, length); - return this; - } - - @Override - public int setBytes(int index, InputStream in, int length) throws IOException { - return arrowBuf.setBytes(index, in, length); - } - - @Override - public int getBytes(int index, GatheringByteChannel out, int length) throws IOException { - Preconditions.checkArgument(out != null, "expecting valid gathering byte channel"); - chk(index, length); - if (length == 0) { - return 0; - } else { - final ByteBuffer tmpBuf = getDirectBuffer(index); - tmpBuf.clear().limit(length); - return out.write(tmpBuf); - } - } - - @Override - public int getBytes(int index, FileChannel out, long position, int length) throws IOException { - chk(index, length); - if (length == 0) { - return 0; - } else { - final ByteBuffer tmpBuf = getDirectBuffer(index); - tmpBuf.clear().limit(length); - return out.write(tmpBuf, position); - } - } - - @Override - public int setBytes(int index, ScatteringByteChannel in, int length) throws IOException { - return (int) in.read(nioBuffers(index, length)); - } - - @Override - public int setBytes(int index, FileChannel in, long position, int length) throws IOException { - return (int) in.read(nioBuffers(index, length)); - } - - @Override - public ByteOrder order() { - return ByteOrder.LITTLE_ENDIAN; - } - - @Override - public ByteBuf order(ByteOrder endianness) { - return this; - } - - @Override - protected int _getUnsignedMedium(int index) { - return getUnsignedMedium(index); - } - - @Override - protected int _getUnsignedMediumLE(int index) { - this.chk(index, 3); - long addr = this.addr(index); - return (PlatformDependent.getByte(addr) & 255) - | (Short.reverseBytes(PlatformDependent.getShort(addr + 1L)) & '\uffff') << 8; - } - - /*-------------------------------------------------* - | | - | get() APIs | - | | - *-------------------------------------------------*/ - - @Override - protected byte _getByte(int index) { - return getByte(index); - } - - @Override - public byte getByte(int index) { - return arrowBuf.getByte(index); - } - - @Override - protected short _getShortLE(int index) { - short s = getShort(index); - return Short.reverseBytes(s); - } - - @Override - protected short _getShort(int index) { - return getShort(index); - } - - @Override - public short getShort(int index) { - return arrowBuf.getShort(index); - } - - @Override - protected int _getIntLE(int index) { - int value = getInt(index); - return Integer.reverseBytes(value); - } - - @Override - protected int _getInt(int index) { - return getInt(index); - } - - @Override - public int getInt(int index) { - return arrowBuf.getInt(index); - } - - @Override - protected long _getLongLE(int index) { - long value = getLong(index); - return Long.reverseBytes(value); - } - - @Override - protected long _getLong(int index) { - return getLong(index); - } - - @Override - public long getLong(int index) { - return arrowBuf.getLong(index); - } - - /*-------------------------------------------------* - | | - | set() APIs | - | | - *-------------------------------------------------*/ - - @Override - protected void _setByte(int index, int value) { - setByte(index, value); - } - - @Override - public NettyArrowBuf setByte(int index, int value) { - arrowBuf.setByte(index, value); - return this; - } - - @Override - protected void _setShortLE(int index, int value) { - this.chk(index, 2); - PlatformDependent.putShort(this.addr(index), Short.reverseBytes((short) value)); - } - - @Override - protected void _setShort(int index, int value) { - setShort(index, value); - } - - @Override - public NettyArrowBuf setShort(int index, int value) { - arrowBuf.setShort(index, value); - return this; - } - - private long addr(long index) { - return address + index; - } - - /** - * Helper function to do bounds checking at a particular index for particular length of data. - * - * @param index index (0 based relative to this ArrowBuf) - * @param fieldLength provided length of data for get/set - */ - private void chk(long index, long fieldLength) { - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - // check reference count - ensureAccessible(); - // check bounds - if (fieldLength < 0) { - throw new IllegalArgumentException("length: " + fieldLength + " (expected: >= 0)"); - } - if (index < 0 || index > capacity() - fieldLength) { - throw new IndexOutOfBoundsException( - String.format( - "index: %d, length: %d (expected: range(0, %d))", index, fieldLength, capacity())); - } - } - } - - @Override - protected void _setMedium(int index, int value) { - setMedium(index, value); - } - - @Override - protected void _setMediumLE(int index, int value) { - this.chk(index, 3); - long addr = this.addr(index); - PlatformDependent.putByte(addr, (byte) value); - PlatformDependent.putShort(addr + 1L, Short.reverseBytes((short) (value >>> 8))); - } - - @Override - public NettyArrowBuf setMedium(int index, int value) { - chk(index, 3); - final long addr = addr(index); - // we need to store 3 bytes starting from least significant byte - // and ignoring the most significant byte - // since arrow memory format is little endian, we will - // first store the first 2 bytes followed by third byte - // example: if the 4 byte int value is ABCD where A is MSB - // D is LSB then we effectively want to store DCB in increasing - // address to get Little Endian byte order - // (short)value will give us CD and PlatformDependent.putShort() - // will store them in LE order as DC starting at address addr - // in order to get B, we do ABCD >>> 16 = 00AB => (byte)AB which - // gives B. We store this at address addr + 2. So finally we get - // DCB - PlatformDependent.putShort(addr, (short) value); - PlatformDependent.putByte(addr + 2, (byte) (value >>> 16)); - return this; - } - - @Override - @VisibleForTesting - protected void _setInt(int index, int value) { - setInt(index, value); - } - - @Override - @VisibleForTesting - protected void _setIntLE(int index, int value) { - this.chk(index, 4); - PlatformDependent.putInt(this.addr(index), Integer.reverseBytes(value)); - } - - @Override - public NettyArrowBuf setInt(int index, int value) { - arrowBuf.setInt(index, value); - return this; - } - - @Override - protected void _setLong(int index, long value) { - setLong(index, value); - } - - @Override - public void _setLongLE(int index, long value) { - this.chk(index, 8); - PlatformDependent.putLong(this.addr(index), Long.reverseBytes(value)); - } - - @Override - public NettyArrowBuf setLong(int index, long value) { - arrowBuf.setLong(index, value); - return this; - } - - /** unwrap arrow buffer into a netty buffer. */ - public static NettyArrowBuf unwrapBuffer(ArrowBuf buf) { - final NettyArrowBuf nettyArrowBuf = - new NettyArrowBuf( - buf, - buf.getReferenceManager().getAllocator(), - LargeMemoryUtil.checkedCastToInt(buf.capacity())); - nettyArrowBuf.readerIndex(LargeMemoryUtil.checkedCastToInt(buf.readerIndex())); - nettyArrowBuf.writerIndex(LargeMemoryUtil.checkedCastToInt(buf.writerIndex())); - return nettyArrowBuf; - } -} diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java deleted file mode 100644 index b3d79c34cea0b..0000000000000 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -import io.netty.util.internal.OutOfDirectMemoryError; -import io.netty.util.internal.StringUtil; -import java.lang.reflect.Field; -import java.nio.ByteBuffer; -import java.util.concurrent.atomic.AtomicLong; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.AssertionUtil; -import org.apache.arrow.memory.util.LargeMemoryUtil; - -/** - * The base allocator that we use for all of Arrow's memory management. Returns - * UnsafeDirectLittleEndian buffers. - */ -public class PooledByteBufAllocatorL { - - private static final org.slf4j.Logger memoryLogger = - org.slf4j.LoggerFactory.getLogger("arrow.allocator"); - - private static final int MEMORY_LOGGER_FREQUENCY_SECONDS = 60; - public final UnsafeDirectLittleEndian empty; - private final AtomicLong hugeBufferSize = new AtomicLong(0); - private final AtomicLong hugeBufferCount = new AtomicLong(0); - private final AtomicLong normalBufferSize = new AtomicLong(0); - private final AtomicLong normalBufferCount = new AtomicLong(0); - private final InnerAllocator allocator; - - public PooledByteBufAllocatorL() { - allocator = new InnerAllocator(); - empty = new UnsafeDirectLittleEndian(new DuplicatedByteBuf(Unpooled.EMPTY_BUFFER)); - } - - /** Returns a {@linkplain UnsafeDirectLittleEndian} of the given size. */ - public UnsafeDirectLittleEndian allocate(long size) { - try { - return allocator.directBuffer(LargeMemoryUtil.checkedCastToInt(size), Integer.MAX_VALUE); - } catch (OutOfMemoryError e) { - /* - * OutOfDirectMemoryError is thrown by Netty when we exceed the direct memory limit defined by - * -XX:MaxDirectMemorySize. OutOfMemoryError with "Direct buffer memory" message is thrown by - * java.nio.Bits when we exceed the direct memory limit. This should never be hit in practice - * as Netty is expected to throw an OutOfDirectMemoryError first. - */ - if (e instanceof OutOfDirectMemoryError || "Direct buffer memory".equals(e.getMessage())) { - throw new OutOfMemoryException("Failure allocating buffer.", e); - } - throw e; - } - } - - public int getChunkSize() { - return allocator.chunkSize(); - } - - public long getHugeBufferSize() { - return hugeBufferSize.get(); - } - - public long getHugeBufferCount() { - return hugeBufferCount.get(); - } - - public long getNormalBufferSize() { - return normalBufferSize.get(); - } - - public long getNormalBufferCount() { - return normalBufferSize.get(); - } - - private static class AccountedUnsafeDirectLittleEndian extends UnsafeDirectLittleEndian { - - private final long initialCapacity; - private final AtomicLong count; - private final AtomicLong size; - - private AccountedUnsafeDirectLittleEndian(LargeBuffer buf, AtomicLong count, AtomicLong size) { - super(buf); - this.initialCapacity = buf.capacity(); - this.count = count; - this.size = size; - } - - private AccountedUnsafeDirectLittleEndian( - PooledUnsafeDirectByteBuf buf, AtomicLong count, AtomicLong size) { - super(buf); - this.initialCapacity = buf.capacity(); - this.count = count; - this.size = size; - } - - @Override - public ByteBuf copy() { - throw new UnsupportedOperationException("copy method is not supported"); - } - - @Override - public ByteBuf copy(int index, int length) { - throw new UnsupportedOperationException("copy method is not supported"); - } - - @Override - public boolean release(int decrement) { - boolean released = super.release(decrement); - if (released) { - count.decrementAndGet(); - size.addAndGet(-initialCapacity); - } - return released; - } - } - - private class InnerAllocator extends PooledByteBufAllocator { - - private final PoolArena[] directArenas; - private final MemoryStatusThread statusThread; - - public InnerAllocator() { - super(true); - - try { - Field f = PooledByteBufAllocator.class.getDeclaredField("directArenas"); - f.setAccessible(true); - this.directArenas = (PoolArena[]) f.get(this); - } catch (Exception e) { - throw new RuntimeException( - "Failure while initializing allocator. Unable to retrieve direct arenas field.", e); - } - - if (memoryLogger.isTraceEnabled()) { - statusThread = new MemoryStatusThread(this); - statusThread.start(); - } else { - statusThread = null; - } - } - - private UnsafeDirectLittleEndian newDirectBufferL(int initialCapacity, int maxCapacity) { - PoolThreadCache cache = threadCache(); - PoolArena directArena = cache.directArena; - - if (directArena != null) { - - if (initialCapacity > chunkSize()) { - // This is beyond chunk size so we'll allocate separately. - ByteBuf buf = UnpooledByteBufAllocator.DEFAULT.directBuffer(initialCapacity, maxCapacity); - - hugeBufferSize.addAndGet(buf.capacity()); - hugeBufferCount.incrementAndGet(); - - // logger.debug("Allocating huge buffer of size {}", initialCapacity, new Exception()); - return new AccountedUnsafeDirectLittleEndian( - new LargeBuffer(buf), hugeBufferCount, hugeBufferSize); - } else { - // within chunk, use arena. - ByteBuf buf = directArena.allocate(cache, initialCapacity, maxCapacity); - if (!(buf instanceof PooledUnsafeDirectByteBuf)) { - fail(); - } - - if (!AssertionUtil.ASSERT_ENABLED) { - return new UnsafeDirectLittleEndian((PooledUnsafeDirectByteBuf) buf); - } - - normalBufferSize.addAndGet(buf.capacity()); - normalBufferCount.incrementAndGet(); - - return new AccountedUnsafeDirectLittleEndian( - (PooledUnsafeDirectByteBuf) buf, normalBufferCount, normalBufferSize); - } - - } else { - throw fail(); - } - } - - private UnsupportedOperationException fail() { - return new UnsupportedOperationException( - "Arrow requires that the JVM used supports access sun.misc.Unsafe. This platform " - + "didn't provide that functionality."); - } - - @Override - public UnsafeDirectLittleEndian directBuffer(int initialCapacity, int maxCapacity) { - if (initialCapacity == 0 && maxCapacity == 0) { - newDirectBuffer(initialCapacity, maxCapacity); - } - validate(initialCapacity, maxCapacity); - return newDirectBufferL(initialCapacity, maxCapacity); - } - - @Override - public ByteBuf heapBuffer(int initialCapacity, int maxCapacity) { - throw new UnsupportedOperationException("Arrow doesn't support using heap buffers."); - } - - private void validate(int initialCapacity, int maxCapacity) { - if (initialCapacity < 0) { - throw new IllegalArgumentException( - "initialCapacity: " + initialCapacity + " (expected: 0+)"); - } - if (initialCapacity > maxCapacity) { - throw new IllegalArgumentException( - String.format( - "initialCapacity: %d (expected: not greater than maxCapacity(%d)", - initialCapacity, maxCapacity)); - } - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(); - buf.append(directArenas.length); - buf.append(" direct arena(s):"); - buf.append(StringUtil.NEWLINE); - for (PoolArena a : directArenas) { - buf.append(a); - } - - buf.append("Large buffers outstanding: "); - buf.append(hugeBufferCount.get()); - buf.append(" totaling "); - buf.append(hugeBufferSize.get()); - buf.append(" bytes."); - buf.append('\n'); - buf.append("Normal buffers outstanding: "); - buf.append(normalBufferCount.get()); - buf.append(" totaling "); - buf.append(normalBufferSize.get()); - buf.append(" bytes."); - return buf.toString(); - } - - private class MemoryStatusThread extends Thread { - private final InnerAllocator allocator; - - public MemoryStatusThread(InnerAllocator allocator) { - super("allocation.logger"); - this.setDaemon(true); - this.allocator = allocator; - } - - @Override - public void run() { - while (true) { - memoryLogger.trace("Memory Usage: \n{}", allocator); - try { - Thread.sleep(MEMORY_LOGGER_FREQUENCY_SECONDS * 1000); - } catch (InterruptedException e) { - return; - } - } - } - } - } -} diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java deleted file mode 100644 index 9dd5b58d248e8..0000000000000 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -import io.netty.util.internal.PlatformDependent; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteOrder; -import java.util.concurrent.atomic.AtomicLong; - -/** - * The underlying class we use for little-endian access to memory. Is used underneath ArrowBufs to - * abstract away the Netty classes and underlying Netty memory management. - */ -public class UnsafeDirectLittleEndian extends WrappedByteBuf { - private static final AtomicLong ID_GENERATOR = new AtomicLong(0); - public final long id = ID_GENERATOR.incrementAndGet(); - private final AbstractByteBuf wrapped; - private final long memoryAddress; - - /** - * Constructs a new instance. - * - * @param buf The buffer to wrap - */ - public UnsafeDirectLittleEndian(AbstractByteBuf buf) { - super(buf); - - this.wrapped = buf; - this.memoryAddress = buf.memoryAddress(); - } - - private long addr(int index) { - return memoryAddress + index; - } - - @Override - public long getLong(int index) { - // wrapped.checkIndex(index, 8); - long v = PlatformDependent.getLong(addr(index)); - return v; - } - - @Override - public float getFloat(int index) { - return Float.intBitsToFloat(getInt(index)); - } - - @Override - public ByteBuf slice() { - return slice(this.readerIndex(), readableBytes()); - } - - @Override - public ByteBuf slice(int index, int length) { - return new SlicedByteBuf(this, index, length); - } - - @Override - public ByteBuf order(ByteOrder endianness) { - return this; - } - - @Override - public double getDouble(int index) { - return Double.longBitsToDouble(getLong(index)); - } - - @Override - public char getChar(int index) { - return (char) getShort(index); - } - - @Override - public long getUnsignedInt(int index) { - return getInt(index) & 0xFFFFFFFFL; - } - - @Override - public int getInt(int index) { - int v = PlatformDependent.getInt(addr(index)); - return v; - } - - @Override - public int getUnsignedShort(int index) { - return getShort(index) & 0xFFFF; - } - - @Override - public short getShort(int index) { - short v = PlatformDependent.getShort(addr(index)); - return v; - } - - @Override - public ByteBuf setShort(int index, int value) { - wrapped.checkIndex(index, 2); - setShort_(index, value); - return this; - } - - @Override - public ByteBuf setInt(int index, int value) { - wrapped.checkIndex(index, 4); - setInt_(index, value); - return this; - } - - @Override - public ByteBuf setLong(int index, long value) { - wrapped.checkIndex(index, 8); - setLong_(index, value); - return this; - } - - @Override - public ByteBuf setChar(int index, int value) { - setShort(index, value); - return this; - } - - @Override - public ByteBuf setFloat(int index, float value) { - setInt(index, Float.floatToRawIntBits(value)); - return this; - } - - @Override - public ByteBuf setDouble(int index, double value) { - setLong(index, Double.doubleToRawLongBits(value)); - return this; - } - - @Override - public ByteBuf writeShort(int value) { - wrapped.ensureWritable(2); - setShort_(wrapped.writerIndex, value); - wrapped.writerIndex += 2; - return this; - } - - @Override - public ByteBuf writeInt(int value) { - wrapped.ensureWritable(4); - setInt_(wrapped.writerIndex, value); - wrapped.writerIndex += 4; - return this; - } - - @Override - public ByteBuf writeLong(long value) { - wrapped.ensureWritable(8); - setLong_(wrapped.writerIndex, value); - wrapped.writerIndex += 8; - return this; - } - - @Override - public ByteBuf writeChar(int value) { - writeShort(value); - return this; - } - - @Override - public ByteBuf writeFloat(float value) { - writeInt(Float.floatToRawIntBits(value)); - return this; - } - - @Override - public ByteBuf writeDouble(double value) { - writeLong(Double.doubleToRawLongBits(value)); - return this; - } - - private void setShort_(int index, int value) { - PlatformDependent.putShort(addr(index), (short) value); - } - - private void setInt_(int index, int value) { - PlatformDependent.putInt(addr(index), value); - } - - private void setLong_(int index, long value) { - PlatformDependent.putLong(addr(index), value); - } - - @Override - public byte getByte(int index) { - return PlatformDependent.getByte(addr(index)); - } - - @Override - public ByteBuf setByte(int index, int value) { - PlatformDependent.putByte(addr(index), (byte) value); - return this; - } - - @Override - public boolean release() { - return release(1); - } - - @Override - public int setBytes(int index, InputStream in, int length) throws IOException { - wrapped.checkIndex(index, length); - byte[] tmp = new byte[length]; - int readBytes = in.read(tmp); - if (readBytes > 0) { - PlatformDependent.copyMemory(tmp, 0, addr(index), readBytes); - } - return readBytes; - } - - @Override - public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { - wrapped.checkIndex(index, length); - if (length != 0) { - byte[] tmp = new byte[length]; - PlatformDependent.copyMemory(addr(index), tmp, 0, length); - out.write(tmp); - } - return this; - } - - @Override - public int hashCode() { - return System.identityHashCode(this); - } - - @Override - public boolean equals(Object obj) { - return this == obj; - } -} diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/org/apache/arrow/memory/patch/ArrowByteBufAllocator.java b/java/memory/memory-netty-buffer-patch/src/main/java/org/apache/arrow/memory/patch/ArrowByteBufAllocator.java deleted file mode 100644 index 930ad95145e03..0000000000000 --- a/java/memory/memory-netty-buffer-patch/src/main/java/org/apache/arrow/memory/patch/ArrowByteBufAllocator.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.patch; - -import io.netty.buffer.AbstractByteBufAllocator; -import io.netty.buffer.ByteBuf; -import io.netty.buffer.CompositeByteBuf; -import io.netty.buffer.ExpandableByteBuf; -import io.netty.buffer.NettyArrowBuf; -import org.apache.arrow.memory.BufferAllocator; - -/** - * An implementation of ByteBufAllocator that wraps a Arrow BufferAllocator. This allows the RPC - * layer to be accounted and managed using Arrow's BufferAllocator infrastructure. The only thin - * different from a typical BufferAllocator is the signature and the fact that this Allocator - * returns ExpandableByteBufs which enable otherwise non-expandable ArrowBufs to be expandable. - * - * @deprecated This class may be removed in a future release. - */ -@Deprecated -public class ArrowByteBufAllocator extends AbstractByteBufAllocator { - - private static final int DEFAULT_BUFFER_SIZE = 4096; - private static final int DEFAULT_MAX_COMPOSITE_COMPONENTS = 16; - - private final BufferAllocator allocator; - - public ArrowByteBufAllocator(BufferAllocator allocator) { - this.allocator = allocator; - } - - public BufferAllocator unwrap() { - return allocator; - } - - @Override - public ByteBuf buffer() { - return buffer(DEFAULT_BUFFER_SIZE); - } - - @Override - public ByteBuf buffer(int initialCapacity) { - return new ExpandableByteBuf( - NettyArrowBuf.unwrapBuffer(allocator.buffer(initialCapacity)), allocator); - } - - @Override - public ByteBuf buffer(int initialCapacity, int maxCapacity) { - return buffer(initialCapacity); - } - - @Override - public ByteBuf ioBuffer() { - return buffer(); - } - - @Override - public ByteBuf ioBuffer(int initialCapacity) { - return buffer(initialCapacity); - } - - @Override - public ByteBuf ioBuffer(int initialCapacity, int maxCapacity) { - return buffer(initialCapacity); - } - - @Override - public ByteBuf directBuffer() { - return buffer(); - } - - @Override - public ByteBuf directBuffer(int initialCapacity) { - return NettyArrowBuf.unwrapBuffer(allocator.buffer(initialCapacity)); - } - - @Override - public ByteBuf directBuffer(int initialCapacity, int maxCapacity) { - return buffer(initialCapacity, maxCapacity); - } - - @Override - public CompositeByteBuf compositeBuffer() { - return compositeBuffer(DEFAULT_MAX_COMPOSITE_COMPONENTS); - } - - @Override - public CompositeByteBuf compositeBuffer(int maxNumComponents) { - return new CompositeByteBuf(this, true, maxNumComponents); - } - - @Override - public CompositeByteBuf compositeDirectBuffer() { - return compositeBuffer(); - } - - @Override - public CompositeByteBuf compositeDirectBuffer(int maxNumComponents) { - return compositeBuffer(maxNumComponents); - } - - @Override - public boolean isDirectBufferPooled() { - return false; - } - - @Override - public ByteBuf heapBuffer() { - throw fail(); - } - - @Override - public ByteBuf heapBuffer(int initialCapacity) { - throw fail(); - } - - @Override - public ByteBuf heapBuffer(int initialCapacity, int maxCapacity) { - throw fail(); - } - - @Override - public CompositeByteBuf compositeHeapBuffer() { - throw fail(); - } - - @Override - public CompositeByteBuf compositeHeapBuffer(int maxNumComponents) { - throw fail(); - } - - @Override - protected ByteBuf newHeapBuffer(int initialCapacity, int maxCapacity) { - throw fail(); - } - - @Override - protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) { - return buffer(initialCapacity, maxCapacity); - } - - private RuntimeException fail() { - throw new UnsupportedOperationException("Allocator doesn't support heap-based memory."); - } -} diff --git a/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java b/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java deleted file mode 100644 index e548bbf0c2187..0000000000000 --- a/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import org.junit.jupiter.api.Test; - -public class TestUnsafeDirectLittleEndian { - - @Test - @SuppressWarnings("CatchAndPrintStackTrace") - public void testPrimitiveGetSet() { - ByteBuf byteBuf = Unpooled.directBuffer(64); - UnsafeDirectLittleEndian unsafeDirect = getUnsafeDirectLittleEndian(byteBuf); - - assertEquals(Byte.MAX_VALUE, unsafeDirect.getByte(0)); - assertEquals(-1, unsafeDirect.getByte(1)); - assertEquals(Short.MAX_VALUE, unsafeDirect.getShort(2)); - assertEquals(-2, unsafeDirect.getShort(4)); - assertEquals((char) 65534, unsafeDirect.getChar(4)); - assertEquals(Integer.MAX_VALUE, unsafeDirect.getInt(8)); - assertEquals(-66052, unsafeDirect.getInt(12)); - assertEquals(4294901244L, unsafeDirect.getUnsignedInt(12)); - assertEquals(Long.MAX_VALUE, unsafeDirect.getLong(16)); - assertEquals(-4295098372L, unsafeDirect.getLong(24)); - assertEquals(1.23F, unsafeDirect.getFloat(32), 0.0); - assertEquals(-1.23F, unsafeDirect.getFloat(36), 0.0); - assertEquals(1.234567D, unsafeDirect.getDouble(40), 0.0); - assertEquals(-1.234567D, unsafeDirect.getDouble(48), 0.0); - - byte[] inBytes = "1234567".getBytes(StandardCharsets.UTF_8); - try (ByteArrayInputStream bais = new ByteArrayInputStream(inBytes); - ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - assertEquals(5, unsafeDirect.setBytes(56, bais, 5)); - unsafeDirect.getBytes(56, baos, 5); - assertEquals("12345", new String(baos.toByteArray(), StandardCharsets.UTF_8)); - } catch (IOException e) { - e.printStackTrace(); - } - } - - private static UnsafeDirectLittleEndian getUnsafeDirectLittleEndian(ByteBuf byteBuf) { - UnsafeDirectLittleEndian unsafeDirect = new UnsafeDirectLittleEndian(new LargeBuffer(byteBuf)); - - unsafeDirect.setByte(0, Byte.MAX_VALUE); - unsafeDirect.setByte(1, -1); // 0xFF - unsafeDirect.setShort(2, Short.MAX_VALUE); - unsafeDirect.setShort(4, -2); // 0xFFFE - unsafeDirect.setInt(8, Integer.MAX_VALUE); - unsafeDirect.setInt(12, -66052); // 0xFFFE FDFC - unsafeDirect.setLong(16, Long.MAX_VALUE); - unsafeDirect.setLong(24, -4295098372L); // 0xFFFF FFFE FFFD FFFC - unsafeDirect.setFloat(32, 1.23F); - unsafeDirect.setFloat(36, -1.23F); - unsafeDirect.setDouble(40, 1.234567D); - unsafeDirect.setDouble(48, -1.234567D); - return unsafeDirect; - } -} diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml deleted file mode 100644 index 6d660da117379..0000000000000 --- a/java/memory/memory-netty/pom.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-memory - 19.0.0-SNAPSHOT - - - arrow-memory-netty - Arrow Memory - Netty - Netty allocator and utils for allocating memory in Arrow - - - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - arrow-memory-netty-buffer-patch - ${project.version} - - - io.netty - netty-buffer - provided - - - io.netty - netty-common - - - org.slf4j - slf4j-api - test - - - ch.qos.logback - logback-core - test - - - org.immutables - value-annotations - - - - - - - integration-tests - - - - org.apache.maven.plugins - maven-failsafe-plugin - - - default-it - - integration-test - verify - - - - - - - - - diff --git a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/DefaultAllocationManagerFactory.java b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/DefaultAllocationManagerFactory.java deleted file mode 100644 index 985999cab9180..0000000000000 --- a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/DefaultAllocationManagerFactory.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.netty; - -import org.apache.arrow.memory.AllocationManager; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; - -/** The default Allocation Manager Factory for a module. */ -public class DefaultAllocationManagerFactory implements AllocationManager.Factory { - - public static final AllocationManager.Factory FACTORY = NettyAllocationManager.FACTORY; - - @Override - public AllocationManager create(BufferAllocator accountingAllocator, long size) { - return FACTORY.create(accountingAllocator, size); - } - - @Override - public ArrowBuf empty() { - return FACTORY.empty(); - } -} diff --git a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java deleted file mode 100644 index 5b44096ab0efa..0000000000000 --- a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.netty; - -import io.netty.buffer.PooledByteBufAllocatorL; -import io.netty.buffer.UnsafeDirectLittleEndian; -import io.netty.util.internal.PlatformDependent; -import org.apache.arrow.memory.AllocationManager; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; - -/** - * The default implementation of {@link AllocationManager}. The implementation is responsible for - * managing when memory is allocated and returned to the Netty-based PooledByteBufAllocatorL. - */ -public class NettyAllocationManager extends AllocationManager { - - public static final AllocationManager.Factory FACTORY = - new AllocationManager.Factory() { - - @Override - public AllocationManager create(BufferAllocator accountingAllocator, long size) { - return new NettyAllocationManager(accountingAllocator, size); - } - - @Override - public ArrowBuf empty() { - return EMPTY_BUFFER; - } - }; - - /** - * The default cut-off value for switching allocation strategies. If the request size is not - * greater than the cut-off value, we will allocate memory by {@link PooledByteBufAllocatorL} - * APIs, otherwise, we will use {@link PlatformDependent} APIs. - */ - public static final int DEFAULT_ALLOCATION_CUTOFF_VALUE = Integer.MAX_VALUE; - - private static final PooledByteBufAllocatorL INNER_ALLOCATOR = new PooledByteBufAllocatorL(); - static final UnsafeDirectLittleEndian EMPTY = INNER_ALLOCATOR.empty; - static final ArrowBuf EMPTY_BUFFER = - new ArrowBuf(ReferenceManager.NO_OP, null, 0, NettyAllocationManager.EMPTY.memoryAddress()); - static final long CHUNK_SIZE = INNER_ALLOCATOR.getChunkSize(); - - private final long allocatedSize; - private final UnsafeDirectLittleEndian memoryChunk; - private final long allocatedAddress; - - /** The cut-off value for switching allocation strategies. */ - NettyAllocationManager( - BufferAllocator accountingAllocator, long requestedSize, int allocationCutOffValue) { - super(accountingAllocator); - - if (requestedSize > allocationCutOffValue) { - this.memoryChunk = null; - this.allocatedAddress = PlatformDependent.allocateMemory(requestedSize); - this.allocatedSize = requestedSize; - } else { - this.memoryChunk = INNER_ALLOCATOR.allocate(requestedSize); - this.allocatedAddress = memoryChunk.memoryAddress(); - this.allocatedSize = memoryChunk.capacity(); - } - } - - NettyAllocationManager(BufferAllocator accountingAllocator, long requestedSize) { - this(accountingAllocator, requestedSize, DEFAULT_ALLOCATION_CUTOFF_VALUE); - } - - /** - * Get the underlying memory chunk managed by this AllocationManager. - * - * @return the underlying memory chunk if the request size is not greater than the cutoff value - * provided in the constructor , or null otherwise. - * @deprecated this method will be removed in a future release. - */ - @Deprecated - UnsafeDirectLittleEndian getMemoryChunk() { - return memoryChunk; - } - - @Override - protected long memoryAddress() { - return allocatedAddress; - } - - @Override - protected void release0() { - if (memoryChunk == null) { - PlatformDependent.freeMemory(allocatedAddress); - } else { - memoryChunk.release(); - } - } - - /** - * Returns the underlying memory chunk size managed. - * - *

    NettyAllocationManager rounds requested size up to the next power of two. - */ - @Override - public long getSize() { - return allocatedSize; - } -} diff --git a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestExpandableByteBuf.java b/java/memory/memory-netty/src/test/java/io/netty/buffer/TestExpandableByteBuf.java deleted file mode 100644 index 45aa0f38a590b..0000000000000 --- a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestExpandableByteBuf.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -public class TestExpandableByteBuf { - - @Test - public void testCapacity() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf); - ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator); - ByteBuf newByteBuf = expandableByteBuf.capacity(31); - int capacity = newByteBuf.capacity(); - assertEquals(32, capacity); - } - } - - @Test - public void testCapacity1() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf); - ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator); - ByteBuf newByteBuf = expandableByteBuf.capacity(32); - int capacity = newByteBuf.capacity(); - assertEquals(32, capacity); - } - } - - @Test - public void testSetAndGetIntValues() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf); - ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator); - int[] intVals = - new int[] { - Integer.MIN_VALUE, - Short.MIN_VALUE - 1, - Short.MIN_VALUE, - 0, - Short.MAX_VALUE, - Short.MAX_VALUE + 1, - Integer.MAX_VALUE - }; - for (int intValue : intVals) { - expandableByteBuf.setInt(0, intValue); - assertEquals(expandableByteBuf.getInt(0), intValue); - assertEquals(expandableByteBuf.getIntLE(0), Integer.reverseBytes(intValue)); - } - } - } - - @Test - public void testSetAndGetLongValues() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf); - ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator); - long[] longVals = new long[] {Long.MIN_VALUE, 0, Long.MAX_VALUE}; - for (long longValue : longVals) { - expandableByteBuf.setLong(0, longValue); - assertEquals(expandableByteBuf.getLong(0), longValue); - assertEquals(expandableByteBuf.getLongLE(0), Long.reverseBytes(longValue)); - } - } - } - - @Test - public void testSetAndGetShortValues() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf); - ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator); - short[] shortVals = new short[] {Short.MIN_VALUE, 0, Short.MAX_VALUE}; - for (short shortValue : shortVals) { - expandableByteBuf.setShort(0, shortValue); - assertEquals(expandableByteBuf.getShort(0), shortValue); - assertEquals(expandableByteBuf.getShortLE(0), Short.reverseBytes(shortValue)); - } - } - } - - @Test - public void testSetAndGetByteValues() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf srcByteBuf = NettyArrowBuf.unwrapBuffer(buf); - ExpandableByteBuf expandableByteBuf = new ExpandableByteBuf(srcByteBuf, allocator); - byte[] byteVals = new byte[] {Byte.MIN_VALUE, 0, Byte.MAX_VALUE}; - for (short byteValue : byteVals) { - expandableByteBuf.setByte(0, byteValue); - assertEquals(expandableByteBuf.getByte(0), byteValue); - } - } - } -} diff --git a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java b/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java deleted file mode 100644 index 19d793d0fc036..0000000000000 --- a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.netty.buffer; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; - -import java.nio.ByteBuffer; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.patch.ArrowByteBufAllocator; -import org.junit.jupiter.api.Test; - -public class TestNettyArrowBuf { - - @Test - public void testSliceWithoutArgs() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf); - nettyBuf.writerIndex(20); - nettyBuf.readerIndex(10); - NettyArrowBuf slicedBuffer = nettyBuf.slice(); - int readableBytes = slicedBuffer.readableBytes(); - assertEquals(10, readableBytes); - } - } - - @Test - public void testNioBuffer() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf); - ByteBuffer byteBuffer = nettyBuf.nioBuffer(4, 6); - // Nio Buffers should always be 0 indexed - assertEquals(0, byteBuffer.position()); - assertEquals(6, byteBuffer.limit()); - // Underlying buffer has size 32 excluding 4 should have capacity of 28. - assertEquals(28, byteBuffer.capacity()); - } - } - - @Test - public void testInternalNioBuffer() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf); - ByteBuffer byteBuffer = nettyBuf.internalNioBuffer(4, 6); - assertEquals(0, byteBuffer.position()); - assertEquals(6, byteBuffer.limit()); - // Underlying buffer has size 32 excluding 4 should have capacity of 28. - assertEquals(28, byteBuffer.capacity()); - } - } - - @Test - public void testSetLEValues() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf); - int[] intVals = - new int[] { - Integer.MIN_VALUE, - Short.MIN_VALUE - 1, - Short.MIN_VALUE, - 0, - Short.MAX_VALUE, - Short.MAX_VALUE + 1, - Integer.MAX_VALUE - }; - for (int intValue : intVals) { - nettyBuf._setInt(0, intValue); - assertEquals(nettyBuf._getIntLE(0), Integer.reverseBytes(intValue)); - } - - long[] longVals = new long[] {Long.MIN_VALUE, 0, Long.MAX_VALUE}; - for (long longValue : longVals) { - nettyBuf._setLong(0, longValue); - assertEquals(nettyBuf._getLongLE(0), Long.reverseBytes(longValue)); - } - - short[] shortVals = new short[] {Short.MIN_VALUE, 0, Short.MAX_VALUE}; - for (short shortValue : shortVals) { - nettyBuf._setShort(0, shortValue); - assertEquals(nettyBuf._getShortLE(0), Short.reverseBytes(shortValue)); - } - } - } - - @Test - public void testSetCompositeBuffer() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20); - NettyArrowBuf buf2 = NettyArrowBuf.unwrapBuffer(allocator.buffer(20))) { - CompositeByteBuf byteBufs = - new CompositeByteBuf(new ArrowByteBufAllocator(allocator), true, 1); - int expected = 4; - buf2.setInt(0, expected); - buf2.writerIndex(4); - byteBufs.addComponent(true, buf2); - NettyArrowBuf.unwrapBuffer(buf).setBytes(0, byteBufs, 4); - int actual = buf.getInt(0); - assertEquals(expected, actual); - } - } - - @Test - public void testGetCompositeBuffer() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - CompositeByteBuf byteBufs = - new CompositeByteBuf(new ArrowByteBufAllocator(allocator), true, 1); - int expected = 4; - buf.setInt(0, expected); - NettyArrowBuf buf2 = NettyArrowBuf.unwrapBuffer(allocator.buffer(20)); - // composite buffers are a bit weird, need to jump hoops - // to set capacity. - byteBufs.addComponent(true, buf2); - byteBufs.capacity(20); - NettyArrowBuf.unwrapBuffer(buf).getBytes(0, byteBufs, 4); - int actual = byteBufs.getInt(0); - assertEquals(expected, actual); - byteBufs.component(0).release(); - } - } - - @Test - public void testUnwrapReturnsNull() { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(20)) { - NettyArrowBuf nettyBuf = NettyArrowBuf.unwrapBuffer(buf); - // NettyArrowBuf cannot be unwrapped, so unwrap() should return null per the Netty ByteBuf API - assertNull(nettyBuf.unwrap()); - } - } -} diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/ITTestLargeArrowBuf.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/ITTestLargeArrowBuf.java deleted file mode 100644 index 1029ed738407c..0000000000000 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/ITTestLargeArrowBuf.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.netty; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Integration test for large (more than 2GB) {@link org.apache.arrow.memory.ArrowBuf}. To run this - * test, please make sure there is at least 4GB memory in the system. - */ -public class ITTestLargeArrowBuf { - private static final Logger logger = LoggerFactory.getLogger(ITTestLargeArrowBuf.class); - - private void run(long bufSize) { - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - ArrowBuf largeBuf = allocator.buffer(bufSize)) { - assertEquals(bufSize, largeBuf.capacity()); - logger.trace("Successfully allocated a buffer with capacity {}", largeBuf.capacity()); - - for (long i = 0; i < bufSize / 8; i++) { - largeBuf.setLong(i * 8, i); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully written {} long words", i + 1); - } - } - logger.trace("Successfully written {} long words", bufSize / 8); - - for (long i = 0; i < bufSize / 8; i++) { - long val = largeBuf.getLong(i * 8); - assertEquals(i, val); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully read {} long words", i + 1); - } - } - logger.trace("Successfully read {} long words", bufSize / 8); - } - logger.trace("Successfully released the large buffer."); - } - - @Test - public void testLargeArrowBuf() { - run(4 * 1024 * 1024 * 1024L); - } - - @Test - public void testMaxIntArrowBuf() { - run(Integer.MAX_VALUE); - } -} diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestAllocationManagerNetty.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestAllocationManagerNetty.java deleted file mode 100644 index 655129909a535..0000000000000 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestAllocationManagerNetty.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.netty; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.apache.arrow.memory.AllocationManager; -import org.apache.arrow.memory.DefaultAllocationManagerOption; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link AllocationManager}. */ -public class TestAllocationManagerNetty { - - @Test - public void testAllocationManagerType() { - // test netty allocation manager type - System.setProperty( - DefaultAllocationManagerOption.ALLOCATION_MANAGER_TYPE_PROPERTY_NAME, "Netty"); - DefaultAllocationManagerOption.AllocationManagerType mgrType = - DefaultAllocationManagerOption.getDefaultAllocationManagerType(); - - assertEquals(DefaultAllocationManagerOption.AllocationManagerType.Netty, mgrType); - } -} diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEmptyArrowBuf.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEmptyArrowBuf.java deleted file mode 100644 index 3ce0705b4ead0..0000000000000 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEmptyArrowBuf.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.netty; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; - -import io.netty.buffer.PooledByteBufAllocatorL; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -public class TestEmptyArrowBuf { - - private static final int MAX_ALLOCATION = 8 * 1024; - private static RootAllocator allocator; - - @BeforeAll - public static void beforeClass() { - allocator = new RootAllocator(MAX_ALLOCATION); - } - - /** Ensure the allocator is closed. */ - @AfterAll - public static void afterClass() { - if (allocator != null) { - allocator.close(); - } - } - - @Test - public void testZeroBuf() { - // Exercise the historical log inside the empty ArrowBuf. This is initialized statically, and - // there is a circular - // dependency between ArrowBuf and BaseAllocator, so if the initialization happens in the wrong - // order, the - // historical log will be null even though RootAllocator.DEBUG is true. - allocator.getEmpty().print(new StringBuilder(), 0, RootAllocator.Verbosity.LOG_WITH_STACKTRACE); - } - - @Test - public void testEmptyArrowBuf() { - ArrowBuf buf = - new ArrowBuf( - ReferenceManager.NO_OP, - null, - 1024, - new PooledByteBufAllocatorL().empty.memoryAddress()); - - buf.getReferenceManager().retain(); - buf.getReferenceManager().retain(8); - assertEquals(1024, buf.capacity()); - assertEquals(1, buf.getReferenceManager().getRefCount()); - assertEquals(0, buf.getActualMemoryConsumed()); - - for (int i = 0; i < 10; i++) { - buf.setByte(i, i); - } - assertEquals(0, buf.getActualMemoryConsumed()); - assertEquals(0, buf.getReferenceManager().getSize()); - assertEquals(0, buf.getReferenceManager().getAccountedSize()); - assertFalse(buf.getReferenceManager().release()); - assertFalse(buf.getReferenceManager().release(2)); - assertEquals(0, buf.getReferenceManager().getAllocator().getLimit()); - assertEquals( - buf, buf.getReferenceManager().transferOwnership(buf, allocator).getTransferredBuffer()); - assertEquals(0, buf.readerIndex()); - assertEquals(0, buf.writerIndex()); - assertEquals(1, buf.refCnt()); - - ArrowBuf derive = buf.getReferenceManager().deriveBuffer(buf, 0, 100); - assertEquals(derive, buf); - assertEquals(1, buf.refCnt()); - assertEquals(1, derive.refCnt()); - - buf.close(); - } -} diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java deleted file mode 100644 index eb5c3a43ab8f0..0000000000000 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.netty; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import io.netty.buffer.ByteBuf; -import io.netty.buffer.NettyArrowBuf; -import java.nio.ByteOrder; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -public class TestEndianness { - - @Test - public void testNativeEndian() { - final BufferAllocator a = new RootAllocator(10000); - final ByteBuf b = NettyArrowBuf.unwrapBuffer(a.buffer(4)); - b.setInt(0, 35); - if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) { - assertEquals(35, b.getByte(0)); - assertEquals(0, b.getByte(1)); - assertEquals(0, b.getByte(2)); - assertEquals(0, b.getByte(3)); - } else { - assertEquals(0, b.getByte(0)); - assertEquals(0, b.getByte(1)); - assertEquals(0, b.getByte(2)); - assertEquals(35, b.getByte(3)); - } - b.release(); - a.close(); - } -} diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocationManager.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocationManager.java deleted file mode 100644 index 2144b2d930276..0000000000000 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocationManager.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.netty; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; - -import org.apache.arrow.memory.AllocationManager; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.BufferLedger; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link NettyAllocationManager}. */ -public class TestNettyAllocationManager { - - static int CUSTOMIZED_ALLOCATION_CUTOFF_VALUE = 1024; - - private RootAllocator createCustomizedAllocator() { - return new RootAllocator( - RootAllocator.configBuilder() - .allocationManagerFactory( - new AllocationManager.Factory() { - @Override - public AllocationManager create(BufferAllocator accountingAllocator, long size) { - return new NettyAllocationManager( - accountingAllocator, size, CUSTOMIZED_ALLOCATION_CUTOFF_VALUE); - } - - @Override - public ArrowBuf empty() { - return null; - } - }) - .build()); - } - - private void readWriteArrowBuf(ArrowBuf buffer) { - // write buffer - for (long i = 0; i < buffer.capacity() / 8; i++) { - buffer.setLong(i * 8, i); - } - - // read buffer - for (long i = 0; i < buffer.capacity() / 8; i++) { - long val = buffer.getLong(i * 8); - assertEquals(i, val); - } - } - - /** Test the allocation strategy for small buffers.. */ - @Test - public void testSmallBufferAllocation() { - final long bufSize = CUSTOMIZED_ALLOCATION_CUTOFF_VALUE - 512L; - try (RootAllocator allocator = createCustomizedAllocator(); - ArrowBuf buffer = allocator.buffer(bufSize)) { - - assertInstanceOf(BufferLedger.class, buffer.getReferenceManager()); - BufferLedger bufferLedger = (BufferLedger) buffer.getReferenceManager(); - - // make sure we are using netty allocation manager - AllocationManager allocMgr = bufferLedger.getAllocationManager(); - assertInstanceOf(NettyAllocationManager.class, allocMgr); - NettyAllocationManager nettyMgr = (NettyAllocationManager) allocMgr; - - // for the small buffer allocation strategy, the chunk is not null - assertNotNull(nettyMgr.getMemoryChunk()); - - readWriteArrowBuf(buffer); - } - } - - /** Test the allocation strategy for large buffers.. */ - @Test - public void testLargeBufferAllocation() { - final long bufSize = CUSTOMIZED_ALLOCATION_CUTOFF_VALUE + 1024L; - try (RootAllocator allocator = createCustomizedAllocator(); - ArrowBuf buffer = allocator.buffer(bufSize)) { - assertInstanceOf(BufferLedger.class, buffer.getReferenceManager()); - BufferLedger bufferLedger = (BufferLedger) buffer.getReferenceManager(); - - // make sure we are using netty allocation manager - AllocationManager allocMgr = bufferLedger.getAllocationManager(); - assertInstanceOf(NettyAllocationManager.class, allocMgr); - NettyAllocationManager nettyMgr = (NettyAllocationManager) allocMgr; - - // for the large buffer allocation strategy, the chunk is null - assertNull(nettyMgr.getMemoryChunk()); - - readWriteArrowBuf(buffer); - } - } -} diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java deleted file mode 100644 index 5be42ecb089c9..0000000000000 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.netty; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -import ch.qos.logback.classic.Level; -import ch.qos.logback.classic.Logger; -import ch.qos.logback.classic.spi.ILoggingEvent; -import ch.qos.logback.core.read.ListAppender; -import io.netty.buffer.PooledByteBufAllocatorL; -import java.util.Collections; -import java.util.stream.Collectors; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.ReferenceManager; -import org.junit.jupiter.api.Test; -import org.slf4j.LoggerFactory; - -/** Test netty allocators. */ -public class TestNettyAllocator { - - @Test - @SuppressWarnings("SynchronizeOnNonFinalField") - public void testMemoryUsage() { - ListAppender memoryLogsAppender = new ListAppender<>(); - memoryLogsAppender.list = Collections.synchronizedList(memoryLogsAppender.list); - Logger logger = (Logger) LoggerFactory.getLogger("arrow.allocator"); - try { - logger.setLevel(Level.TRACE); - logger.addAppender(memoryLogsAppender); - memoryLogsAppender.start(); - try (ArrowBuf buf = - new ArrowBuf( - ReferenceManager.NO_OP, - null, - 1024, - new PooledByteBufAllocatorL().empty.memoryAddress())) { - buf.memoryAddress(); - } - boolean result = false; - long startTime = System.currentTimeMillis(); - while ((System.currentTimeMillis() - startTime) - < 10000) { // 10 seconds maximum for time to read logs - // Lock on the list backing the appender since a background thread might try to add more - // logs - // while stream() is iterating over list elements. This would throw a flakey - // ConcurrentModificationException. - synchronized (memoryLogsAppender.list) { - result = - memoryLogsAppender.list.stream() - .anyMatch( - log -> - log.toString().contains("Memory Usage: \n") - && log.toString().contains("Large buffers outstanding: ") - && log.toString().contains("Normal buffers outstanding: ") - && log.getLevel().equals(Level.TRACE)); - } - if (result) { - break; - } - } - synchronized (memoryLogsAppender.list) { - assertTrue( - result, - "Log messages are:\n" - + memoryLogsAppender.list.stream() - .map(ILoggingEvent::toString) - .collect(Collectors.joining("\n"))); - } - - } finally { - memoryLogsAppender.stop(); - logger.detachAppender(memoryLogsAppender); - logger.setLevel(null); - } - } -} diff --git a/java/memory/memory-netty/src/test/resources/logback.xml b/java/memory/memory-netty/src/test/resources/logback.xml deleted file mode 100644 index 4c54d18a210ff..0000000000000 --- a/java/memory/memory-netty/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml deleted file mode 100644 index 92dc0c9fe5dc2..0000000000000 --- a/java/memory/memory-unsafe/pom.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-memory - 19.0.0-SNAPSHOT - - - arrow-memory-unsafe - Arrow Memory - Unsafe - Allocator and utils for allocating memory in Arrow based on sun.misc.Unsafe - - - - org.apache.arrow - arrow-memory-core - - - org.immutables - value-annotations - - - - diff --git a/java/memory/memory-unsafe/src/main/java/module-info.java b/java/memory/memory-unsafe/src/main/java/module-info.java deleted file mode 100644 index 526ebbdabbf8e..0000000000000 --- a/java/memory/memory-unsafe/src/main/java/module-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.memory.unsafe { - exports org.apache.arrow.memory.unsafe to - org.apache.arrow.memory.core; - - requires org.apache.arrow.memory.core; -} diff --git a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/DefaultAllocationManagerFactory.java b/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/DefaultAllocationManagerFactory.java deleted file mode 100644 index aed20209ffae5..0000000000000 --- a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/DefaultAllocationManagerFactory.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.unsafe; - -import org.apache.arrow.memory.AllocationManager; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; - -/** The default Allocation Manager Factory for a module. */ -public class DefaultAllocationManagerFactory implements AllocationManager.Factory { - - public static final AllocationManager.Factory FACTORY = UnsafeAllocationManager.FACTORY; - - @Override - public AllocationManager create(BufferAllocator accountingAllocator, long size) { - return FACTORY.create(accountingAllocator, size); - } - - @Override - public ArrowBuf empty() { - return UnsafeAllocationManager.FACTORY.empty(); - } -} diff --git a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java b/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java deleted file mode 100644 index 67d7e0d2af7cb..0000000000000 --- a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.unsafe; - -import org.apache.arrow.memory.AllocationManager; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.util.MemoryUtil; - -/** Allocation manager based on unsafe API. */ -public final class UnsafeAllocationManager extends AllocationManager { - - private static final ArrowBuf EMPTY = - new ArrowBuf(ReferenceManager.NO_OP, null, 0, MemoryUtil.allocateMemory(0)); - - public static final AllocationManager.Factory FACTORY = - new Factory() { - @Override - public AllocationManager create(BufferAllocator accountingAllocator, long size) { - return new UnsafeAllocationManager(accountingAllocator, size); - } - - @Override - public ArrowBuf empty() { - return EMPTY; - } - }; - - private final long allocatedSize; - - private final long allocatedAddress; - - UnsafeAllocationManager(BufferAllocator accountingAllocator, long requestedSize) { - super(accountingAllocator); - allocatedAddress = MemoryUtil.allocateMemory(requestedSize); - allocatedSize = requestedSize; - } - - @Override - public long getSize() { - return allocatedSize; - } - - @Override - protected long memoryAddress() { - return allocatedAddress; - } - - @Override - protected void release0() { - MemoryUtil.freeMemory(allocatedAddress); - } -} diff --git a/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestAllocationManagerUnsafe.java b/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestAllocationManagerUnsafe.java deleted file mode 100644 index 0187d63046f41..0000000000000 --- a/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestAllocationManagerUnsafe.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.unsafe; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.apache.arrow.memory.AllocationManager; -import org.apache.arrow.memory.DefaultAllocationManagerOption; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link AllocationManager}. */ -public class TestAllocationManagerUnsafe { - - @Test - public void testAllocationManagerType() { - - // test unsafe allocation manager type - System.setProperty( - DefaultAllocationManagerOption.ALLOCATION_MANAGER_TYPE_PROPERTY_NAME, "Unsafe"); - DefaultAllocationManagerOption.AllocationManagerType mgrType = - DefaultAllocationManagerOption.getDefaultAllocationManagerType(); - - assertEquals(DefaultAllocationManagerOption.AllocationManagerType.Unsafe, mgrType); - } -} diff --git a/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestUnsafeAllocationManager.java b/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestUnsafeAllocationManager.java deleted file mode 100644 index 947570f76b82e..0000000000000 --- a/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestUnsafeAllocationManager.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.unsafe; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; - -import org.apache.arrow.memory.AllocationManager; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.BufferLedger; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link UnsafeAllocationManager}. */ -public class TestUnsafeAllocationManager { - - private BufferAllocator createUnsafeAllocator() { - return new RootAllocator( - RootAllocator.configBuilder() - .allocationManagerFactory(UnsafeAllocationManager.FACTORY) - .build()); - } - - private void readWriteArrowBuf(ArrowBuf buffer) { - // write buffer - for (long i = 0; i < buffer.capacity() / 8; i++) { - buffer.setLong(i * 8, i); - } - - // read buffer - for (long i = 0; i < buffer.capacity() / 8; i++) { - long val = buffer.getLong(i * 8); - assertEquals(i, val); - } - } - - /** Test the memory allocation for {@link UnsafeAllocationManager}. */ - @Test - public void testBufferAllocation() { - final long bufSize = 4096L; - try (BufferAllocator allocator = createUnsafeAllocator(); - ArrowBuf buffer = allocator.buffer(bufSize)) { - assertInstanceOf(BufferLedger.class, buffer.getReferenceManager()); - BufferLedger bufferLedger = (BufferLedger) buffer.getReferenceManager(); - - // make sure we are using unsafe allocation manager - AllocationManager allocMgr = bufferLedger.getAllocationManager(); - assertInstanceOf(UnsafeAllocationManager.class, allocMgr); - UnsafeAllocationManager unsafeMgr = (UnsafeAllocationManager) allocMgr; - - assertEquals(bufSize, unsafeMgr.getSize()); - readWriteArrowBuf(buffer); - } - } -} diff --git a/java/memory/memory-unsafe/src/test/resources/logback.xml b/java/memory/memory-unsafe/src/test/resources/logback.xml deleted file mode 100644 index 4c54d18a210ff..0000000000000 --- a/java/memory/memory-unsafe/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - diff --git a/java/memory/pom.xml b/java/memory/pom.xml deleted file mode 100644 index bc34c260505f9..0000000000000 --- a/java/memory/pom.xml +++ /dev/null @@ -1,37 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - arrow-memory - pom - Arrow Memory - - - memory-core - memory-unsafe - memory-netty-buffer-patch - memory-netty - - diff --git a/java/performance/pom.xml b/java/performance/pom.xml deleted file mode 100644 index 3f18188e3a3d2..0000000000000 --- a/java/performance/pom.xml +++ /dev/null @@ -1,176 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - arrow-performance - jar - Arrow Performance Benchmarks - JMH Performance benchmarks for other Arrow libraries. - - - 1.37 - benchmarks - true - .* - 1 - - 5 - 5 - - jmh-result.json - json - - - - - org.openjdk.jmh - jmh-core - ${jmh.version} - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - arrow-memory-netty - runtime - - - org.apache.avro - avro - ${dep.avro.version} - - - org.apache.arrow - arrow-avro - - - com.h2database - h2 - 2.3.232 - runtime - - - org.apache.arrow - arrow-jdbc - - - org.apache.arrow - arrow-algorithm - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - org.openjdk.jmh - jmh-generator-annprocess - ${jmh.version} - - - - - - org.apache.maven.plugins - maven-shade-plugin - - - - shade - - package - - ${uberjar.name} - false - - - org.openjdk.jmh.Main - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - org.codehaus.mojo - exec-maven-plugin - - ${skip.perf.benchmarks} - test - java - - -classpath - - org.openjdk.jmh.Main - ${benchmark.filter} - -f - ${benchmark.forks} - -jvmArgs - ${benchmark.jvmargs} - -wi - ${benchmark.warmups} - -i - ${benchmark.runs} - ${benchmark.list} - -rff - ${benchmark.resultfile} - -rf - ${benchmark.resultformat} - - - - - run-java-benchmarks - - exec - - integration-test - - - - - - diff --git a/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java deleted file mode 100644 index 6032a79ecec77..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.adapter.avro.AvroToArrow; -import org.apache.arrow.adapter.avro.AvroToArrowConfig; -import org.apache.arrow.adapter.avro.AvroToArrowConfigBuilder; -import org.apache.arrow.adapter.avro.AvroToArrowVectorIterator; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.avro.Schema; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.io.BinaryDecoder; -import org.apache.avro.io.BinaryEncoder; -import org.apache.avro.io.DatumWriter; -import org.apache.avro.io.Decoder; -import org.apache.avro.io.DecoderFactory; -import org.apache.avro.io.EncoderFactory; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for avro adapter. */ -@State(Scope.Benchmark) -public class AvroAdapterBenchmarks { - - private final int valueCount = 3000; - - private AvroToArrowConfig config; - - private Schema schema; - private BinaryDecoder decoder; - - /** Setup benchmarks. */ - @Setup - public void prepare() throws Exception { - BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - config = new AvroToArrowConfigBuilder(allocator).build(); - - String schemaStr = - "{\n" - + " \"namespace\": \"org.apache.arrow.avro\",\n" - + " \"type\": \"record\",\n" - + " \"name\": \"testBenchmark\",\n" - + " \"fields\": [\n" - + " {\"name\": \"f0\", \"type\": \"string\"},\n" - + " {\"name\": \"f1\", \"type\": \"int\"},\n" - + " {\"name\": \"f2\", \"type\": \"long\"},\n" - + " {\"name\": \"f3\", \"type\": \"boolean\"},\n" - + " {\"name\": \"f4\", \"type\": \"float\"}\n" - + " ]\n" - + "}"; - schema = new Schema.Parser().parse(schemaStr); - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - BinaryEncoder encoder = new EncoderFactory().directBinaryEncoder(out, null); - DatumWriter writer = new GenericDatumWriter(schema); - - for (int i = 0; i < valueCount; i++) { - GenericRecord record = new GenericData.Record(schema); - record.put(0, "test" + i); - record.put(1, i); - record.put(2, i + 1L); - record.put(3, i % 2 == 0); - record.put(4, i + 0.1f); - writer.write(record, encoder); - } - - decoder = - new DecoderFactory().directBinaryDecoder(new ByteArrayInputStream(out.toByteArray()), null); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - config.getAllocator().close(); - } - - /** - * Test {@link AvroToArrow#avroToArrowIterator(Schema, Decoder, AvroToArrowConfig)}. - * - * @return useless. To avoid DCE by JIT. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int testAvroToArrow() throws Exception { - decoder.inputStream().reset(); - int sum = 0; - try (AvroToArrowVectorIterator iter = - AvroToArrow.avroToArrowIterator(schema, decoder, config)) { - while (iter.hasNext()) { - VectorSchemaRoot root = iter.next(); - IntVector intVector = (IntVector) root.getVector("f1"); - for (int i = 0; i < intVector.getValueCount(); i++) { - sum += intVector.get(i); - } - root.close(); - } - } - return sum; - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(AvroAdapterBenchmarks.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } -} diff --git a/java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java deleted file mode 100644 index a2eb9674e4d37..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java +++ /dev/null @@ -1,356 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.adapter.jdbc; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.Statement; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer; -import org.apache.arrow.adapter.jdbc.consumer.BitConsumer; -import org.apache.arrow.adapter.jdbc.consumer.IntConsumer; -import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; -import org.apache.arrow.adapter.jdbc.consumer.VarCharConsumer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for Jdbc adapter. */ -public class JdbcAdapterBenchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VALUE_COUNT = 3000; - - private static final String CREATE_STATEMENT = - "CREATE TABLE test_table (f0 INT, f1 LONG, f2 VARCHAR, f3 BOOLEAN);"; - private static final String INSERT_STATEMENT = - "INSERT INTO test_table (f0, f1, f2, f3) VALUES (?, ?, ?, ?);"; - private static final String QUERY = "SELECT f0, f1, f2, f3 FROM test_table;"; - private static final String DROP_STATEMENT = "DROP TABLE test_table;"; - - private static final String URL = "jdbc:h2:mem:JdbcAdapterBenchmarks"; - private static final String DRIVER = "org.h2.Driver"; - - /** State object for the jdbc e2e benchmark. */ - @State(Scope.Benchmark) - public static class JdbcState { - - private Connection conn = null; - - private ResultSet resultSet = null; - - private BufferAllocator allocator; - - private Statement statement; - - private JdbcToArrowConfig config; - - @Setup(Level.Trial) - public void prepareState() throws Exception { - allocator = new RootAllocator(Integer.MAX_VALUE); - config = - new JdbcToArrowConfigBuilder().setAllocator(allocator).setTargetBatchSize(1024).build(); - Class.forName(DRIVER); - conn = DriverManager.getConnection(URL); - - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(CREATE_STATEMENT); - } - - for (int i = 0; i < VALUE_COUNT; i++) { - // Insert data - try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { - - stmt.setInt(1, i); - stmt.setLong(2, i); - stmt.setString(3, "test" + i); - stmt.setBoolean(4, i % 2 == 0); - stmt.executeUpdate(); - } - } - } - - @Setup(Level.Invocation) - public void prepareInvoke() throws Exception { - statement = conn.createStatement(); - resultSet = statement.executeQuery(QUERY); - } - - @TearDown(Level.Invocation) - public void tearDownInvoke() throws Exception { - resultSet.close(); - statement.close(); - } - - @TearDown(Level.Trial) - public void tearDownState() throws Exception { - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(DROP_STATEMENT); - } - allocator.close(); - } - } - - /** State object for the consume benchmark. */ - @State(Scope.Benchmark) - public static class ConsumeState { - - private static final boolean NULLABLE = true; - - private Connection conn = null; - - private ResultSet resultSet = null; - - private BufferAllocator allocator; - - private Statement statement; - - private IntVector intVector; - - private BigIntVector longVector; - - private VarCharVector varCharVector; - - private BitVector bitVector; - - private JdbcConsumer intConsumer; - - private JdbcConsumer longConsumer; - - private JdbcConsumer varCharConsumer; - - private JdbcConsumer bitConsumer; - - private JdbcToArrowConfig config; - - @Setup(Level.Trial) - public void prepare() throws Exception { - allocator = new RootAllocator(Integer.MAX_VALUE); - config = - new JdbcToArrowConfigBuilder().setAllocator(allocator).setTargetBatchSize(1024).build(); - - Class.forName(DRIVER); - conn = DriverManager.getConnection(URL); - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(CREATE_STATEMENT); - } - - for (int i = 0; i < VALUE_COUNT; i++) { - // Insert data - try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { - - stmt.setInt(1, i); - stmt.setLong(2, i); - stmt.setString(3, "test" + i); - stmt.setBoolean(4, i % 2 == 0); - stmt.executeUpdate(); - } - } - - statement = conn.createStatement(); - resultSet = statement.executeQuery(QUERY); - resultSet.next(); - - intVector = new IntVector("", allocator); - intVector.allocateNew(VALUE_COUNT); - intConsumer = IntConsumer.createConsumer(intVector, 1, NULLABLE); - - longVector = new BigIntVector("", allocator); - longVector.allocateNew(VALUE_COUNT); - longConsumer = BigIntConsumer.createConsumer(longVector, 2, NULLABLE); - - varCharVector = new VarCharVector("", allocator); - varCharVector.allocateNew(VALUE_COUNT); - varCharConsumer = VarCharConsumer.createConsumer(varCharVector, 3, NULLABLE); - - bitVector = new BitVector("", allocator); - bitVector.allocateNew(VALUE_COUNT); - bitConsumer = BitConsumer.createConsumer(bitVector, 4, NULLABLE); - } - - @TearDown(Level.Trial) - public void tearDown() throws Exception { - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(DROP_STATEMENT); - } - - resultSet.close(); - statement.close(); - conn.close(); - - intVector.close(); - intConsumer.close(); - - longVector.close(); - longConsumer.close(); - - varCharVector.close(); - varCharConsumer.close(); - - bitVector.close(); - bitConsumer.close(); - - allocator.close(); - } - } - - /** State object for the jdbc row consume benchmark. */ - @State(Scope.Benchmark) - public static class RowConsumeState { - - private Connection conn = null; - - private ResultSet resultSet = null; - - private BufferAllocator allocator; - - private Statement statement; - - private JdbcToArrowConfig config; - - private ArrowVectorIterator iter; - - private VectorSchemaRoot root; - - @Setup(Level.Trial) - public void prepareState() throws Exception { - allocator = new RootAllocator(Integer.MAX_VALUE); - config = - new JdbcToArrowConfigBuilder() - .setAllocator(allocator) - .setTargetBatchSize(VALUE_COUNT) - .build(); - Class.forName(DRIVER); - conn = DriverManager.getConnection(URL); - - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(CREATE_STATEMENT); - } - - for (int i = 0; i < VALUE_COUNT; i++) { - // Insert data - try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { - - stmt.setInt(1, i); - stmt.setLong(2, i); - stmt.setString(3, "test" + i); - stmt.setBoolean(4, i % 2 == 0); - stmt.executeUpdate(); - } - } - } - - @Setup(Level.Invocation) - public void prepareInvoke() throws Exception { - statement = conn.createStatement(); - resultSet = statement.executeQuery(QUERY); - - iter = JdbcToArrow.sqlToArrowVectorIterator(resultSet, config); - root = iter.next(); - iter.compositeConsumer.resetVectorSchemaRoot(root); - } - - @TearDown(Level.Invocation) - public void tearDownInvoke() throws Exception { - resultSet.close(); - statement.close(); - iter.close(); - } - - @TearDown(Level.Trial) - public void tearDownState() throws Exception { - try (Statement stmt = conn.createStatement()) { - stmt.executeUpdate(DROP_STATEMENT); - } - allocator.close(); - } - } - - /** - * Test {@link JdbcToArrow#sqlToArrowVectorIterator(ResultSet, JdbcToArrowConfig)}. - * - * @return useless. To avoid DCE by JIT. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public int testJdbcToArrow(JdbcState state) throws Exception { - int valueCount = 0; - try (ArrowVectorIterator iter = - JdbcToArrow.sqlToArrowVectorIterator(state.resultSet, state.config)) { - while (iter.hasNext()) { - VectorSchemaRoot root = iter.next(); - IntVector intVector = (IntVector) root.getFieldVectors().get(0); - valueCount += intVector.getValueCount(); - root.close(); - } - } - return valueCount; - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void consumeBenchmark(ConsumeState state) throws Exception { - state.intConsumer.resetValueVector(state.intVector); - state.longConsumer.resetValueVector(state.longVector); - state.varCharConsumer.resetValueVector(state.varCharVector); - state.bitConsumer.resetValueVector(state.bitVector); - for (int i = 0; i < VALUE_COUNT; i++) { - state.intConsumer.consume(state.resultSet); - state.longConsumer.consume(state.resultSet); - state.varCharConsumer.consume(state.resultSet); - state.bitConsumer.consume(state.resultSet); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void consumeRowsBenchmark(RowConsumeState state) throws Exception { - for (int i = 0; i < VALUE_COUNT; i++) { - state.iter.compositeConsumer.consume(state.resultSet); - } - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(JdbcAdapterBenchmarks.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java deleted file mode 100644 index 83d555560d372..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.algorithm.search; - -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link ParallelSearcher}. */ -public class ParallelSearcherBenchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VECTOR_LENGTH = 1024 * 1024; - - /** State object for the benchmarks. */ - @State(Scope.Benchmark) - public static class SearchState { - - @Param({"1", "2", "5", "10", "20", "50", "100"}) - int numThreads; - - BufferAllocator allocator; - - ExecutorService threadPool; - - IntVector targetVector; - - IntVector keyVector; - - ParallelSearcher searcher; - - @Setup(Level.Trial) - public void prepare() { - allocator = new RootAllocator(Integer.MAX_VALUE); - targetVector = new IntVector("target vector", allocator); - targetVector.allocateNew(VECTOR_LENGTH); - keyVector = new IntVector("key vector", allocator); - keyVector.allocateNew(1); - threadPool = Executors.newFixedThreadPool(numThreads); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - targetVector.set(i, i); - } - targetVector.setValueCount(VECTOR_LENGTH); - - keyVector.set(0, VECTOR_LENGTH / 3); - keyVector.setValueCount(1); - } - - @Setup(Level.Invocation) - public void prepareInvoke() { - searcher = new ParallelSearcher<>(targetVector, threadPool, numThreads); - } - - @TearDown(Level.Trial) - public void tearDownState() { - targetVector.close(); - keyVector.close(); - allocator.close(); - threadPool.shutdown(); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void searchBenchmark(SearchState state) throws Exception { - state.searcher.search(state.keyVector, 0); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(ParallelSearcherBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java deleted file mode 100644 index 1154809cae753..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.rounding.RoundingPolicy; -import org.apache.arrow.memory.rounding.SegmentRoundingPolicy; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for allocators. */ -public class AllocatorBenchmarks { - - /** Benchmark for the default allocator. */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void defaultAllocatorBenchmark() { - final int bufferSize = 1024; - final int numBuffers = 1024; - - try (RootAllocator allocator = new RootAllocator(numBuffers * bufferSize)) { - ArrowBuf[] buffers = new ArrowBuf[numBuffers]; - - for (int i = 0; i < numBuffers; i++) { - buffers[i] = allocator.buffer(bufferSize); - } - - for (int i = 0; i < numBuffers; i++) { - buffers[i].close(); - } - } - } - - /** Benchmark for allocator with segment rounding policy. */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void segmentRoundingPolicyBenchmark() { - final long bufferSize = 1024L; - final int numBuffers = 1024; - final long segmentSize = 1024L; - - RoundingPolicy policy = new SegmentRoundingPolicy(segmentSize); - try (RootAllocator allocator = - new RootAllocator(AllocationListener.NOOP, bufferSize * numBuffers, policy)) { - ArrowBuf[] buffers = new ArrowBuf[numBuffers]; - - for (int i = 0; i < numBuffers; i++) { - buffers[i] = allocator.buffer(bufferSize); - } - - for (int i = 0; i < numBuffers; i++) { - buffers[i].close(); - } - } - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(AllocatorBenchmarks.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } -} diff --git a/java/performance/src/main/java/org/apache/arrow/memory/ArrowBufBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/ArrowBufBenchmarks.java deleted file mode 100644 index 8aaee28e77417..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/memory/ArrowBufBenchmarks.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory; - -import java.util.concurrent.TimeUnit; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link ArrowBuf}. */ -@State(Scope.Benchmark) -public class ArrowBufBenchmarks { - - private static final int BUFFER_CAPACITY = 1024 * 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private ArrowBuf buffer; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - buffer = allocator.buffer(BUFFER_CAPACITY); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - buffer.close(); - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public void setZero() { - buffer.setZero(0, BUFFER_CAPACITY); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(ArrowBufBenchmarks.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } -} diff --git a/java/performance/src/main/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java deleted file mode 100644 index 3d82b46b094c6..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link ArrowBufPointer}. */ -@State(Scope.Benchmark) -public class ArrowBufPointerBenchmarks { - - private static final int BUFFER_CAPACITY = 1000; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private ArrowBuf buffer1; - - private ArrowBuf buffer2; - - private ArrowBufPointer pointer1; - - private ArrowBufPointer pointer2; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - buffer1 = allocator.buffer(BUFFER_CAPACITY); - buffer2 = allocator.buffer(BUFFER_CAPACITY); - - for (int i = 0; i < BUFFER_CAPACITY; i++) { - buffer1.setByte(i, i); - buffer2.setByte(i, i); - } - - // make the last bytes different - buffer1.setByte(BUFFER_CAPACITY - 1, 12); - buffer1.setByte(BUFFER_CAPACITY - 1, 123); - - pointer1 = new ArrowBufPointer(buffer1, 0, BUFFER_CAPACITY); - pointer2 = new ArrowBufPointer(buffer2, 0, BUFFER_CAPACITY); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - buffer1.close(); - buffer2.close(); - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int compareBenchmark() { - return pointer1.compareTo(pointer2); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(ArrowBufPointerBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } -} diff --git a/java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java deleted file mode 100644 index c5f1036d69127..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.memory.util; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link ByteFunctionHelpers}. */ -public class ByteFunctionHelpersBenchmarks { - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - /** - * State object for the {@link ByteFunctionHelpersBenchmarks#arrowBufEquals(ArrowEqualState)} - * benchmark. - */ - @State(Scope.Benchmark) - public static class ArrowEqualState { - // checkstyle:off: MissingJavadocMethod - private static final int BUFFER_CAPACITY = 7; - - private BufferAllocator allocator; - - private ArrowBuf buffer1; - - private ArrowBuf buffer2; - - @Setup(Level.Trial) - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - buffer1 = allocator.buffer(BUFFER_CAPACITY); - buffer2 = allocator.buffer(BUFFER_CAPACITY); - - for (int i = 0; i < BUFFER_CAPACITY; i++) { - buffer1.setByte(i, i); - buffer2.setByte(i, i); - } - } - - @TearDown(Level.Trial) - public void tearDown() { - buffer1.close(); - buffer2.close(); - allocator.close(); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public void arrowBufEquals(ArrowEqualState state) { - ByteFunctionHelpers.equal( - state.buffer1, - 0, - ArrowEqualState.BUFFER_CAPACITY - 1, - state.buffer2, - 0, - ArrowEqualState.BUFFER_CAPACITY - 1); - } - - /** - * State object for the {@link - * ByteFunctionHelpersBenchmarks#arrowBufArrayEquals(ArrowArrayEqualState)} benchmark. - */ - @State(Scope.Benchmark) - public static class ArrowArrayEqualState { - - private static final int BUFFER_CAPACITY = 1024; - - private BufferAllocator allocator; - - private ArrowBuf buffer1; - - private byte[] buffer2; - - @Setup(Level.Trial) - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - buffer1 = allocator.buffer(BUFFER_CAPACITY); - buffer2 = new byte[BUFFER_CAPACITY]; - - for (int i = 0; i < BUFFER_CAPACITY; i++) { - buffer1.setByte(i, i); - buffer2[i] = (byte) i; - } - } - - @TearDown(Level.Trial) - public void tearDown() { - buffer1.close(); - allocator.close(); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int arrowBufArrayEquals(ArrowArrayEqualState state) { - return ByteFunctionHelpers.compare( - state.buffer1, - 0, - ArrowArrayEqualState.BUFFER_CAPACITY, - state.buffer2, - 0, - ArrowArrayEqualState.BUFFER_CAPACITY); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(ByteFunctionHelpersBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java deleted file mode 100644 index bd53b13823622..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link BaseValueVector}. */ -@State(Scope.Benchmark) -public class BaseValueVectorBenchmarks { - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private IntVector vector; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - vector = new IntVector("vector", allocator); - vector.allocateNew(VECTOR_LENGTH); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - vector.close(); - allocator.close(); - } - - /** - * Test {@link BaseValueVector#computeCombinedBufferSize(int, int)}. - * - * @return useless. To avoid DCE by JIT. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int testComputeCombinedBufferSize() { - int totalSize = 0; - for (int i = 0; i < VECTOR_LENGTH; i++) { - totalSize += vector.computeCombinedBufferSize(i, 4); - } - return totalSize; - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(BaseValueVectorBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java deleted file mode 100644 index f794efe91b88b..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link BitVectorHelper}. */ -public class BitVectorHelperBenchmarks { - // checkstyle:off: MissingJavadocMethod - - /** State object for general benchmarks. */ - @State(Scope.Benchmark) - public static class BenchmarkState { - - private static final int VALIDITY_BUFFER_CAPACITY = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private ArrowBuf validityBuffer; - - private ArrowBuf oneBitValidityBuffer; - - /** Setup benchmarks. */ - @Setup(Level.Trial) - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - validityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8); - - for (int i = 0; i < VALIDITY_BUFFER_CAPACITY; i++) { - if (i % 7 == 0) { - BitVectorHelper.setBit(validityBuffer, i); - } else { - BitVectorHelper.unsetBit(validityBuffer, i); - } - } - - // only one 1 bit in the middle of the buffer - oneBitValidityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8); - oneBitValidityBuffer.setZero(0, VALIDITY_BUFFER_CAPACITY / 8); - BitVectorHelper.setBit(oneBitValidityBuffer, VALIDITY_BUFFER_CAPACITY / 2); - } - - /** Tear down benchmarks. */ - @TearDown(Level.Trial) - public void tearDown() { - validityBuffer.close(); - oneBitValidityBuffer.close(); - allocator.close(); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int getNullCountBenchmark(BenchmarkState state) { - return BitVectorHelper.getNullCount( - state.validityBuffer, BenchmarkState.VALIDITY_BUFFER_CAPACITY); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public boolean allBitsNullBenchmark(BenchmarkState state) { - return BitVectorHelper.checkAllBitsEqualTo( - state.oneBitValidityBuffer, BenchmarkState.VALIDITY_BUFFER_CAPACITY, true); - } - - /** State object for {@link #loadValidityBufferAllOne(NonNullableValidityBufferState)}.. */ - @State(Scope.Benchmark) - public static class NonNullableValidityBufferState { - - private static final int VALIDITY_BUFFER_CAPACITY = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private ArrowBuf validityBuffer; - - private ArrowBuf loadResult; - - private ArrowFieldNode fieldNode; - - /** Setup benchmarks. */ - @Setup(Level.Trial) - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - validityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8); - - for (int i = 0; i < VALIDITY_BUFFER_CAPACITY; i++) { - BitVectorHelper.setBit(validityBuffer, i); - } - - fieldNode = new ArrowFieldNode(VALIDITY_BUFFER_CAPACITY, 0); - } - - @TearDown(Level.Invocation) - public void tearDownInvoke() { - loadResult.close(); - } - - /** Tear down benchmarks. */ - @TearDown(Level.Trial) - public void tearDown() { - validityBuffer.close(); - allocator.close(); - } - } - - /** - * Benchmark for {@link BitVectorHelper#loadValidityBuffer(ArrowFieldNode, ArrowBuf, - * BufferAllocator)} when all elements are not null. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public void loadValidityBufferAllOne(NonNullableValidityBufferState state) { - state.loadResult = - BitVectorHelper.loadValidityBuffer(state.fieldNode, state.validityBuffer, state.allocator); - } - - /** State object for {@link #setValidityBitBenchmark(ClearBitStateState)}. */ - @State(Scope.Benchmark) - public static class ClearBitStateState { - - private static final int VALIDITY_BUFFER_CAPACITY = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private ArrowBuf validityBuffer; - - private int bitToSet = 0; - - /** Setup benchmarks. */ - @Setup(Level.Trial) - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - validityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8); - } - - /** Tear down benchmarks. */ - @TearDown(Level.Trial) - public void tearDown() { - validityBuffer.close(); - allocator.close(); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void setValidityBitBenchmark(ClearBitStateState state) { - for (int i = 0; i < ClearBitStateState.VALIDITY_BUFFER_CAPACITY; i++) { - BitVectorHelper.setValidityBit(state.validityBuffer, i, state.bitToSet); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void setValidityBitToZeroBenchmark(ClearBitStateState state) { - for (int i = 0; i < ClearBitStateState.VALIDITY_BUFFER_CAPACITY; i++) { - BitVectorHelper.unsetBit(state.validityBuffer, i); - } - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(BitVectorHelperBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java deleted file mode 100644 index 6cb54bc62a3a6..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.math.BigDecimal; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link DecimalVector}. */ -@State(Scope.Benchmark) -public class DecimalVectorBenchmarks { - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private DecimalVector vector; - - private ArrowBuf fromBuf; - - byte[] fromByteArray; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - vector = new DecimalVector("vector", allocator, 38, 16); - vector.allocateNew(VECTOR_LENGTH); - - fromBuf = allocator.buffer(VECTOR_LENGTH * DecimalVector.TYPE_WIDTH); - for (int i = 0; i < VECTOR_LENGTH; i++) { - byte[] bytes = BigDecimal.valueOf(i).unscaledValue().toByteArray(); - fromBuf.setBytes(i * DecimalVector.TYPE_WIDTH, bytes); - } - - fromByteArray = new byte[DecimalVector.TYPE_WIDTH]; - fromBuf.getBytes(0, fromByteArray); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - fromBuf.close(); - vector.close(); - allocator.close(); - } - - /** Test writing on {@link DecimalVector} from arrow buf. */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void setBigEndianArrowBufBenchmark() { - int offset = 0; - - for (int i = 0; i < VECTOR_LENGTH; i++) { - vector.setBigEndianSafe(i, offset, fromBuf, DecimalVector.TYPE_WIDTH); - offset += 8; - } - } - - /** Test writing on {@link DecimalVector} from byte array. */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void setBigEndianByteArrayBenchmark() { - for (int i = 0; i < VECTOR_LENGTH; i++) { - vector.setBigEndian(i, fromByteArray); - } - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(DecimalVectorBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java deleted file mode 100644 index f2b6a134bdb57..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BoundsChecking; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link Float8Vector}. */ -@State(Scope.Benchmark) -public class Float8Benchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private Float8Vector vector; - - private Float8Vector fromVector; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - vector = new Float8Vector("vector", allocator); - vector.allocateNew(VECTOR_LENGTH); - - fromVector = new Float8Vector("vector", allocator); - fromVector.allocateNew(VECTOR_LENGTH); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 3 == 0) { - fromVector.setNull(i); - } else { - fromVector.set(i, i * i); - } - } - fromVector.setValueCount(VECTOR_LENGTH); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - vector.close(); - fromVector.close(); - allocator.close(); - } - - /** - * Test reading/writing on {@link Float8Vector}. The performance of this benchmark is influenced - * by the states of two flags: 1. The flag for boundary checking. For details, please see {@link - * BoundsChecking}. 2. The flag for null checking in get methods. For details, please see {@link - * NullCheckingForGet}. - * - * @return useless. To avoid DCE by JIT. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public double readWriteBenchmark() { - double sum = 0; - for (int i = 0; i < VECTOR_LENGTH; i++) { - vector.set(i, i + 10.0); - sum += vector.get(i); - } - return sum; - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void copyFromBenchmark() { - for (int i = 0; i < VECTOR_LENGTH; i++) { - vector.copyFrom(i, i, (Float8Vector) fromVector); - } - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(Float8Benchmarks.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java deleted file mode 100644 index a7e6789889277..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.compare.ApproxEqualsVisitor; -import org.apache.arrow.vector.compare.Range; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for floating point vectors. */ -@State(Scope.Benchmark) -public class FloatingPointBenchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private Float4Vector floatVector1; - - private Float4Vector floatVector2; - - private Float8Vector doubleVector1; - - private Float8Vector doubleVector2; - - private ApproxEqualsVisitor floatVisitor; - - private ApproxEqualsVisitor doubleVisitor; - - private Range range; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - floatVector1 = new Float4Vector("vector", allocator); - floatVector2 = new Float4Vector("vector", allocator); - doubleVector1 = new Float8Vector("vector", allocator); - doubleVector2 = new Float8Vector("vector", allocator); - - floatVector1.allocateNew(VECTOR_LENGTH); - floatVector2.allocateNew(VECTOR_LENGTH); - doubleVector1.allocateNew(VECTOR_LENGTH); - doubleVector2.allocateNew(VECTOR_LENGTH); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 3 == 0) { - floatVector1.setNull(i); - floatVector2.setNull(i); - doubleVector1.setNull(i); - doubleVector2.setNull(i); - } else { - floatVector1.set(i, i * i); - floatVector2.set(i, i * i); - doubleVector1.set(i, i * i); - doubleVector2.set(i, i * i); - } - } - floatVector1.setValueCount(VECTOR_LENGTH); - floatVector2.setValueCount(VECTOR_LENGTH); - doubleVector1.setValueCount(VECTOR_LENGTH); - doubleVector2.setValueCount(VECTOR_LENGTH); - - floatVisitor = new ApproxEqualsVisitor(floatVector1, floatVector2, 0.01f, 0.01); - doubleVisitor = new ApproxEqualsVisitor(doubleVector1, doubleVector2, 0.01f, 0.01); - range = new Range(0, 0, VECTOR_LENGTH); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - floatVector1.close(); - floatVector2.close(); - doubleVector1.close(); - doubleVector2.close(); - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public int approxEqualsBenchmark() { - boolean floatResult = floatVisitor.visit(floatVector1, range); - boolean doubleResult = doubleVisitor.visit(doubleVector1, range); - return (floatResult ? 1 : 0) + (doubleResult ? 1 : 0); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(FloatingPointBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java deleted file mode 100644 index 315f1f83ccdee..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.impl.IntWriterImpl; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link IntVector}. */ -@State(Scope.Benchmark) -public class IntBenchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private IntVector vector; - - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - vector = new IntVector("vector", allocator); - vector.allocateNew(VECTOR_LENGTH); - vector.setValueCount(VECTOR_LENGTH); - } - - @TearDown - public void tearDown() { - vector.close(); - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void setWithValueHolder() { - for (int i = 0; i < VECTOR_LENGTH; i++) { - NullableIntHolder holder = new NullableIntHolder(); - holder.isSet = i % 3 == 0 ? 0 : 1; - if (holder.isSet == 1) { - holder.value = i; - } - vector.setSafe(i, holder); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void setIntDirectly() { - for (int i = 0; i < VECTOR_LENGTH; i++) { - vector.setSafe(i, i % 3 == 0 ? 0 : 1, i); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void setWithWriter() { - IntWriterImpl writer = new IntWriterImpl(vector); - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 3 != 0) { - writer.writeInt(i); - } - } - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(IntBenchmarks.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java deleted file mode 100644 index b3924a7dd9461..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link VarCharVector}. */ -@State(Scope.Benchmark) -public class VarCharBenchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private VarCharVector vector; - - private VarCharVector fromVector; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - vector = new VarCharVector("vector", allocator); - vector.allocateNew(ALLOCATOR_CAPACITY / 4, VECTOR_LENGTH); - - fromVector = new VarCharVector("vector", allocator); - fromVector.allocateNew(ALLOCATOR_CAPACITY / 4, VECTOR_LENGTH); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 3 == 0) { - fromVector.setNull(i); - } else { - fromVector.set(i, String.valueOf(i * 1000).getBytes()); - } - } - fromVector.setValueCount(VECTOR_LENGTH); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - vector.close(); - fromVector.close(); - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void copyFromBenchmark() { - for (int i = 0; i < VECTOR_LENGTH; i++) { - vector.copyFrom(i, i, fromVector); - } - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(VarCharBenchmarks.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java deleted file mode 100644 index 0bce6569d268f..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.holders.NullableVarCharHolder; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link BaseVariableWidthVector}. */ -@State(Scope.Benchmark) -public class VariableWidthVectorBenchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VECTOR_CAPACITY = 16 * 1024; - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private static byte[] bytes = VariableWidthVectorBenchmarks.class.getName().getBytes(); - private ArrowBuf arrowBuff; - - private BufferAllocator allocator; - - private VarCharVector vector; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - vector = new VarCharVector("vector", allocator); - vector.allocateNew(VECTOR_CAPACITY, VECTOR_LENGTH); - arrowBuff = allocator.buffer(VECTOR_LENGTH); - arrowBuff.setBytes(0, bytes, 0, bytes.length); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - arrowBuff.close(); - vector.close(); - allocator.close(); - } - - /** - * Test {@link BaseVariableWidthVector#getValueCapacity()}. - * - * @return useless. To avoid DCE by JIT. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int getValueCapacity() { - return vector.getValueCapacity(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public int setSafeFromArray() { - for (int i = 0; i < 500; ++i) { - vector.setSafe(i * 40, bytes); - } - return vector.getBufferSize(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MILLISECONDS) - public int setSafeFromNullableVarcharHolder() { - NullableVarCharHolder nvch = new NullableVarCharHolder(); - nvch.buffer = arrowBuff; - nvch.start = 0; - nvch.end = bytes.length; - for (int i = 0; i < 50; ++i) { - nvch.isSet = 0; - for (int j = 0; j < 9; ++j) { - int idx = 10 * i + j; - vector.setSafe(idx, nvch); - } - nvch.isSet = 1; - vector.setSafe(10 * (i + 1), nvch); - } - return vector.getBufferSize(); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(VariableWidthVectorBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java deleted file mode 100644 index 85a4b33a4e5da..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link VectorLoader}. */ -public class VectorLoaderBenchmark { - // checkstyle:off: MissingJavadocMethod - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private static final int VECTOR_COUNT = 10; - - /** State for vector load benchmark. */ - @State(Scope.Benchmark) - public static class LoadState { - - private BufferAllocator allocator; - - private VarCharVector[] vectors; - - private ArrowRecordBatch recordBatch; - - private VectorSchemaRoot root; - - private VectorLoader loader; - - /** Setup benchmarks. */ - @Setup(Level.Trial) - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - } - - @Setup(Level.Invocation) - public void prepareInvoke() { - vectors = new VarCharVector[VECTOR_COUNT]; - for (int i = 0; i < VECTOR_COUNT; i++) { - vectors[i] = new VarCharVector("vector", allocator); - vectors[i].allocateNew(100, 10); - } - - root = VectorSchemaRoot.of(vectors); - VectorUnloader unloader = new VectorUnloader(root); - recordBatch = unloader.getRecordBatch(); - - loader = new VectorLoader(root); - } - - @TearDown(Level.Invocation) - public void tearDownInvoke() { - recordBatch.close(); - root.close(); - } - - /** Tear down benchmarks. */ - @TearDown(Level.Trial) - public void tearDown() { - allocator.close(); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void loadBenchmark(LoadState state) { - state.loader.load(state.recordBatch); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(VectorLoaderBenchmark.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java deleted file mode 100644 index 2f7edc6f8aa17..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link VectorUnloader}. */ -@State(Scope.Benchmark) -public class VectorUnloaderBenchmark { - // checkstyle:off: MissingJavadocMethod - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private static final int VECTOR_COUNT = 10; - - private BufferAllocator allocator; - - private VarCharVector[] vectors; - - private VectorUnloader unloader; - - private ArrowRecordBatch recordBatch; - - /** Setup benchmarks. */ - @Setup(Level.Trial) - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - } - - @Setup(Level.Invocation) - public void prepareInvoke() { - vectors = new VarCharVector[VECTOR_COUNT]; - for (int i = 0; i < VECTOR_COUNT; i++) { - vectors[i] = new VarCharVector("vector", allocator); - vectors[i].allocateNew(100, 10); - } - - unloader = new VectorUnloader(VectorSchemaRoot.of(vectors)); - } - - @TearDown(Level.Invocation) - public void tearDownInvoke() { - if (recordBatch != null) { - recordBatch.close(); - } - for (int i = 0; i < VECTOR_COUNT; i++) { - vectors[i].close(); - } - } - - /** Tear down benchmarks. */ - @TearDown(Level.Trial) - public void tearDown() { - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public void unloadBenchmark() { - recordBatch = unloader.getRecordBatch(); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(VectorUnloaderBenchmark.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java deleted file mode 100644 index c24399dbca388..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.dictionary; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link DictionaryEncoder}. */ -@State(Scope.Benchmark) -public class DictionaryEncoderBenchmarks { - - private BufferAllocator allocator; - - private static final int DATA_SIZE = 1000; - private static final int KEY_SIZE = 100; - - private static final int KEY_LENGTH = 10; - - private List keys = new ArrayList<>(); - - private VarCharVector vector; - - private VarCharVector dictionaryVector; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - - for (int i = 0; i < KEY_SIZE; i++) { - keys.add(generateUniqueKey(KEY_LENGTH)); - } - - allocator = new RootAllocator(10 * 1024 * 1024); - - vector = new VarCharVector("vector", allocator); - dictionaryVector = new VarCharVector("dict", allocator); - - vector.allocateNew(10240, DATA_SIZE); - vector.setValueCount(DATA_SIZE); - for (int i = 0; i < DATA_SIZE; i++) { - byte[] value = keys.get(generateRandomIndex(KEY_SIZE)).getBytes(StandardCharsets.UTF_8); - vector.setSafe(i, value, 0, value.length); - } - - dictionaryVector.allocateNew(1024, 100); - dictionaryVector.setValueCount(100); - for (int i = 0; i < KEY_SIZE; i++) { - byte[] value = keys.get(i).getBytes(StandardCharsets.UTF_8); - dictionaryVector.setSafe(i, value, 0, value.length); - } - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - vector.close(); - dictionaryVector.close(); - keys.clear(); - allocator.close(); - } - - /** - * Test encode for {@link DictionaryEncoder}. - * - * @return useless. To avoid DCE by JIT. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int testEncode() { - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary); - encoded.close(); - return 0; - } - - private int generateRandomIndex(int max) { - Random random = new Random(); - return random.nextInt(max); - } - - private String generateUniqueKey(int length) { - String str = "abcdefghijklmnopqrstuvwxyz"; - Random random = new Random(); - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < length; i++) { - int number = random.nextInt(26); - sb.append(str.charAt(number)); - } - if (keys.contains(sb.toString())) { - return generateUniqueKey(length); - } - return sb.toString(); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(DictionaryEncoderBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java deleted file mode 100644 index 4698d0acf0000..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.util.concurrent.TimeUnit; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link WriteChannel}. */ -public class WriteChannelBenchmark { - // checkstyle:off: MissingJavadocMethod - - /** State object for align benchmark. */ - @State(Scope.Benchmark) - public static class AlignState { - - private ByteArrayOutputStream baos; - - private WriteChannel writeChannel; - - @Param({"1", "2", "3", "4", "5", "6", "7"}) - public int alignSize; - - @Setup(Level.Invocation) - public void prepareInvoke() throws IOException { - baos = new ByteArrayOutputStream(8); - writeChannel = new WriteChannel(Channels.newChannel(baos)); - writeChannel.write(new byte[8 - alignSize]); - } - - @TearDown(Level.Invocation) - public void tearDownInvoke() throws IOException { - writeChannel.close(); - baos.close(); - } - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public void alignBenchmark(AlignState state) throws IOException { - state.writeChannel.align(); - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(WriteChannelBenchmark.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java deleted file mode 100644 index 9b2250ee72f17..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VarCharVector; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link ArrowRecordBatch}. */ -@State(Scope.Benchmark) -public class ArrowRecordBatchBenchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VECTOR_CAPACITY = 16 * 1024; - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private VarCharVector vector; - - private List nodes; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - vector = new VarCharVector("vector", allocator); - vector.allocateNew(VECTOR_CAPACITY, VECTOR_LENGTH); - - nodes = new ArrayList<>(); - nodes.add(new ArrowFieldNode(VECTOR_LENGTH, 0)); - nodes.add(new ArrowFieldNode(VECTOR_LENGTH, 0)); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - vector.close(); - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public long createAndGetLength() { - try (ArrowRecordBatch batch = - new ArrowRecordBatch(VECTOR_LENGTH, nodes, vector.getFieldBuffers())) { - return batch.computeBodyLength(); - } - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder() - .include(ArrowRecordBatchBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java deleted file mode 100644 index bb68d1848a4e2..0000000000000 --- a/java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.nio.charset.StandardCharsets; -import java.util.concurrent.TimeUnit; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** Benchmarks for {@link TransferPair}. */ -@State(Scope.Benchmark) -public class TransferPairBenchmarks { - // checkstyle:off: MissingJavadocMethod - - private static final int VECTOR_LENGTH = 1024; - - private static final int ALLOCATOR_CAPACITY = 1024 * 1024; - - private BufferAllocator allocator; - - private IntVector intVector; - - private VarCharVector varCharVector; - - /** Setup benchmarks. */ - @Setup - public void prepare() { - allocator = new RootAllocator(ALLOCATOR_CAPACITY); - intVector = new IntVector("intVector", allocator); - varCharVector = new VarCharVector("varcharVector", allocator); - - intVector.allocateNew(VECTOR_LENGTH); - varCharVector.allocateNew(VECTOR_LENGTH); - - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i % 3 == 0) { - intVector.setNull(i); - varCharVector.setNull(i); - } else { - intVector.setSafe(i, i * i); - varCharVector.setSafe(i, ("teststring" + i).getBytes(StandardCharsets.UTF_8)); - } - } - intVector.setValueCount(VECTOR_LENGTH); - varCharVector.setValueCount(VECTOR_LENGTH); - } - - /** Tear down benchmarks. */ - @TearDown - public void tearDown() { - intVector.close(); - varCharVector.close(); - allocator.close(); - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public int splitAndTransferIntVector() { - IntVector toVector = new IntVector("intVector", allocator); - toVector.setValueCount(VECTOR_LENGTH); - TransferPair transferPair = intVector.makeTransferPair(toVector); - transferPair.splitAndTransfer(0, VECTOR_LENGTH); - toVector.close(); - return 0; - } - - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.MICROSECONDS) - public int splitAndTransferVarcharVector() { - VarCharVector toVector = new VarCharVector("varcharVector", allocator); - toVector.setValueCount(VECTOR_LENGTH); - TransferPair transferPair = varCharVector.makeTransferPair(toVector); - transferPair.splitAndTransfer(0, VECTOR_LENGTH); - toVector.close(); - return 0; - } - - public static void main(String[] args) throws RunnerException { - Options opt = - new OptionsBuilder().include(TransferPairBenchmarks.class.getSimpleName()).forks(1).build(); - - new Runner(opt).run(); - } - // checkstyle:on: MissingJavadocMethod -} diff --git a/java/pom.xml b/java/pom.xml deleted file mode 100644 index f2c8d8f1f6513..0000000000000 --- a/java/pom.xml +++ /dev/null @@ -1,1336 +0,0 @@ - - - - 4.0.0 - - - org.apache - apache - 33 - - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - pom - - Apache Arrow Java Root POM - Apache Arrow is open source, in-memory columnar data structures and low-overhead messaging - https://arrow.apache.org/ - - - - Developer List - dev-subscribe@arrow.apache.org - dev-unsubscribe@arrow.apache.org - dev@arrow.apache.org - https://lists.apache.org/list.html?dev@arrow.apache.org - - - Commits List - commits-subscribe@arrow.apache.org - commits-unsubscribe@arrow.apache.org - commits@arrow.apache.org - https://lists.apache.org/list.html?commits@arrow.apache.org - - - Issues List - issues-subscribe@arrow.apache.org - issues-unsubscribe@arrow.apache.org - https://lists.apache.org/list.html?issues@arrow.apache.org - - - GitHub List - github-subscribe@arrow.apache.org - github-unsubscribe@arrow.apache.org - https://lists.apache.org/list.html?github@arrow.apache.org - - - - - bom - format - memory - vector - tools - adapter/jdbc - flight - performance - algorithm - adapter/avro - compression - - - - scm:git:https://github.com/apache/arrow.git - scm:git:https://github.com/apache/arrow.git - main - https://github.com/apache/arrow/tree/${project.scm.tag} - - - - GitHub - https://github.com/apache/arrow/issues - - - - ${project.build.directory}/generated-sources - 1.9.0 - 5.11.3 - 2.0.16 - 33.3.1-jre - 4.1.115.Final - 1.65.0 - 3.25.5 - 2.18.1 - 3.4.1 - 24.3.25 - 1.12.0 - - 2 - 10.20.1 - true - 2.31.0 - 5.14.2 - 3.48.2 - 1.5.12 - none - -Xdoclint:none - - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - 11 - 11 - 11 - 11 - - 3.2.2 - - - - - - - org.apache.arrow - arrow-bom - ${project.version} - pom - import - - - org.checkerframework - checker-qual - ${checker.framework.version} - provided - - - com.google.flatbuffers - flatbuffers-java - ${dep.fbs.version} - - - com.google.errorprone - error_prone_annotations - ${error_prone_core.version} - provided - - - org.slf4j - slf4j-api - ${dep.slf4j.version} - - - org.slf4j - slf4j-jdk14 - ${dep.slf4j.version} - - - javax.annotation - javax.annotation-api - 1.3.2 - - - org.assertj - assertj-core - 3.26.3 - test - - - org.immutables - value-annotations - 2.10.1 - provided - - - org.hamcrest - hamcrest - 3.0 - - - com.fasterxml.jackson - jackson-bom - ${dep.jackson-bom.version} - pom - import - - - com.google.guava - guava-bom - ${dep.guava-bom.version} - pom - import - - - io.netty - netty-bom - ${dep.netty-bom.version} - pom - import - - - io.grpc - grpc-bom - ${dep.grpc-bom.version} - pom - import - - - com.google.protobuf - protobuf-bom - ${dep.protobuf-bom.version} - pom - import - - - ch.qos.logback - logback-classic - ${logback.version} - - - ch.qos.logback - logback-core - ${logback.version} - - - - - - - - org.slf4j - jul-to-slf4j - ${dep.slf4j.version} - test - - - - org.slf4j - jcl-over-slf4j - ${dep.slf4j.version} - test - - - - org.slf4j - log4j-over-slf4j - ${dep.slf4j.version} - test - - - - org.junit.jupiter - junit-jupiter-engine - ${dep.junit.jupiter.version} - test - - - org.junit.jupiter - junit-jupiter-api - ${dep.junit.jupiter.version} - test - - - org.junit.jupiter - junit-jupiter-params - ${dep.junit.jupiter.version} - test - - - org.mockito - mockito-junit-jupiter - 5.14.2 - test - - - ch.qos.logback - logback-classic - ${logback.version} - test - - - de.huxhorn.lilith - de.huxhorn.lilith.logback.appender.multiplex-classic - 8.3.0 - test - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - true - false - - - org.immutables - value - 2.10.1 - - - - - - maven-surefire-plugin - - ${surefire.add-opens.argLine} - true - true - ${forkCount} - true - - ${project.build.directory} - true - UTC - - 1048576 - - false - - - - maven-failsafe-plugin - - ${surefire.add-opens.argLine} - - ${project.build.directory} - true - UTC - - - - - org.jacoco - jacoco-maven-plugin - 0.8.12 - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.apache.maven.plugins - maven-antrun-plugin - [1.6,) - - run - - - - - - - - - org.apache.maven.plugins - maven-enforcer-plugin - [1.2,) - - enforce - - - - - - - - - org.apache.maven.plugins - maven-remote-resources-plugin - [1.1,) - - process - - - - - - - - - org.apache.rat - apache-rat-plugin - [0.10,) - - check - - - - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - [0,) - - check - - - - - - - - - org.apache.drill.tools - drill-fmpp-maven-plugin - [1.0,) - - generate - - - - - false - true - - - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - - **/module-info.java - - arrow-memory-netty-buffer-patch,arrow-memory-netty,flight-sql-jdbc-core,flight-integration-tests,arrow-performance - - - - com.gradle - develocity-maven-extension - - - - - - arrow-git.properties - properties/flight.properties - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - - testingData - - ${arrow.test.dataRoot} - - - - - - - - - - - org.xolstice.maven.plugins - protobuf-maven-plugin - 0.6.1 - - com.google.protobuf:protoc:${dep.protobuf-bom.version}:exe:${os.detected.classifier} - grpc-java - io.grpc:protoc-gen-grpc-java:${dep.grpc-bom.version}:exe:${os.detected.classifier} - @generated=omit - - - - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 - - - org.codehaus.mojo - build-helper-maven-plugin - 3.6.0 - - - org.codehaus.mojo - properties-maven-plugin - 1.2.1 - - - org.codehaus.mojo - exec-maven-plugin - 3.5.0 - - - org.codehaus.mojo - versions-maven-plugin - 2.18.0 - - - pl.project13.maven - git-commit-id-plugin - 4.9.10 - - - org.cyclonedx - cyclonedx-maven-plugin - 2.9.0 - - - org.apache.drill.tools - drill-fmpp-maven-plugin - 1.21.2 - - - - - - - org.apache.rat - apache-rat-plugin - - false - - **/dependency-reduced-pom.xml - **/*.log - **/*.css - **/*.js - **/*.md - **/*.eps - **/*.json - **/*.seq - **/*.parquet - **/*.sql - **/arrow-git.properties - **/*.csv - **/*.csvh - **/*.csvh-test - **/*.tsv - **/*.txt - **/*.ssv - **/arrow-*.conf - **/.buildpath - **/*.proto - **/*.fmpp - **/target/** - **/*.tdd - **/*.project - **/TAGS - **/*.checkstyle - **/.classpath - **/.factorypath - **/.settings/** - .*/** - **/*.patch - **/*.pb.cc - **/*.pb.h - **/*.linux - **/client/build/** - **/*.tbl - **/*.iml - **/flight.properties - **/*.idea/** - - - - - rat-checks - - check - - validate - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - **/logging.properties - **/logback-test.xml - **/logback.out.xml - **/logback.xml - - - - org.apache.arrow - ${username} - https://arrow.apache.org/ - - - - - - - test-jar - - - true - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - 2048m - true - - - - maven-enforcer-plugin - - - avoid_bad_dependencies - - enforce - - verify - - - - - commons-logging - javax.servlet:servlet-api - org.mortbay.jetty:servlet-api - org.mortbay.jetty:servlet-api-2.5 - log4j:log4j - - org.immutables:value - org.checkerframework:checker - - junit:junit:4.* - org.junit.vintage:junit-vintage-engine - - - - - - - - - pl.project13.maven - git-commit-id-plugin - - dd.MM.yyyy '@' HH:mm:ss z - false - false - true - false - - false - false - 7 - -dirty - true - - - - - for-jars - - revision - - true - - target/classes/arrow-git.properties - - - - for-source-tarball - - revision - - false - - ./arrow-git.properties - - - - - - - org.apache.maven.plugins - maven-checkstyle-plugin - - **/module-info.java - dev/checkstyle/checkstyle.xml - dev/license/asf-java.license - dev/checkstyle/suppressions.xml - true - UTF-8 - true - ${checkstyle.failOnViolation} - ${checkstyle.failOnViolation} - warning - xml - ${project.build.directory}/test/checkstyle-errors.xml - false - - - - com.puppycrawl.tools - checkstyle - ${checkstyle.version} - - - org.slf4j - jcl-over-slf4j - ${dep.slf4j.version} - - - - - validate - - check - - validate - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - - analyze-only - - verify - - true - true - - - javax.annotation:javax.annotation-api:* - org.apache.hadoop:hadoop-client-api - - - - - - - org.cyclonedx - cyclonedx-maven-plugin - - - - makeBom - - package - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - org.apache.maven.plugins - maven-site-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - - ${maven.multiModuleProjectDirectory}/dev/license/asf-xml.license - (<configuration|<project) - - - - - - 1.7 - - - - ${maven.multiModuleProjectDirectory}/dev/license/asf-java.license - package - - - - - - spotless-check - - check - - - - - - - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - - **/module-info.java - - arrow-memory-netty-buffer-patch,arrow-memory-netty,flight-sql-jdbc-core,flight-integration-tests,arrow-performance - - - - - - - javadoc - - - - - aggregate - - - aggregate - - false - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - org.apache.maven.plugins - maven-site-plugin - - - - - - - - arrow-c-data - - c - - - - - - arrow-jni - - - adapter/orc - gandiva - dataset - c - - - - - - shade-flatbuffers - - shade-format-flatbuffers - - - - - - error-prone - - - !m2e.version - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -XDcompilePolicy=simple - -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-source|format/src/main/java/org/apache/arrow/flatbuf)/.* - -J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED - -J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED - -J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED - -J--add-exports=jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED - -J--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED - -J--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED - -J--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED - -J--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED - -J--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED - -J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED - - - - com.google.errorprone - error_prone_core - ${error_prone_core.version} - - - - - - - - - - code-coverage - - - - - org.jacoco - jacoco-maven-plugin - - - default-prepare-agent - - prepare-agent - - - - default-prepare-agent-integration - - prepare-agent-integration - - - - - - - - - - org.jacoco - jacoco-maven-plugin - - - - - - report - - false - - - - - - - - - windows - - [17,] - - windows - - - - - - maven-surefire-plugin - - false - - - - - - - - generate-libs-cdata-all-os - - java-dist - - - - - org.codehaus.mojo - exec-maven-plugin - - - cdata-cmake - - exec - - generate-resources - - cmake - -S java - -B java-jni - -DARROW_JAVA_JNI_ENABLE_C=ON - -DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF - -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_INSTALL_PREFIX=${arrow.c.jni.dist.dir} - ../ - - - - cdata-build - - exec - - generate-resources - - cmake - --build java-jni --target install --config Release - ../ - - - - - - - - - - generate-libs-jni-macos-linux - - java-dist - false - ON - ON - ON - ON - OFF - ON - ON - ON - - - - - org.codehaus.mojo - exec-maven-plugin - - - jni-cpp-cmake - - exec - - generate-resources - - cmake - -S cpp - -B cpp-jni - -DARROW_BUILD_SHARED=OFF - -DARROW_CSV=${ARROW_DATASET} - -DARROW_DATASET=${ARROW_DATASET} - -DARROW_DEPENDENCY_SOURCE=BUNDLED - -DARROW_DEPENDENCY_USE_SHARED=OFF - -DARROW_FILESYSTEM=ON - -DARROW_GANDIVA=${ARROW_GANDIVA} - -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON - -DARROW_JSON=${ARROW_DATASET} - -DARROW_ORC=${ARROW_ORC} - -DARROW_PARQUET=${ARROW_PARQUET} - -DARROW_S3=ON - -DARROW_SUBSTRAIT=${ARROW_DATASET} - -DARROW_USE_CCACHE=ON - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_INSTALL_PREFIX=java-dist - -DCMAKE_UNITY_BUILD=ON - ../ - ${cpp.dependencies.builded} - - - - jni-cpp-build - - exec - - generate-resources - - cmake - --build cpp-jni --target install --config Release - ../ - ${cpp.dependencies.builded} - - - - jni-cmake - - exec - - generate-resources - - cmake - -S java - -B java-jni - -DARROW_JAVA_JNI_ENABLE_C=${ARROW_JAVA_JNI_ENABLE_C} - -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_JAVA_JNI_ENABLE_DATASET} - -DARROW_JAVA_JNI_ENABLE_GANDIVA=${ARROW_JAVA_JNI_ENABLE_GANDIVA} - -DARROW_JAVA_JNI_ENABLE_ORC=${ARROW_JAVA_JNI_ENABLE_ORC} - -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON - -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_INSTALL_PREFIX=${arrow.dataset.jni.dist.dir} - -DCMAKE_PREFIX_PATH=${project.basedir}/../java-dist/lib/${os.detected.arch}/cmake - -DProtobuf_USE_STATIC_LIBS=ON - -DProtobuf_ROOT=${project.basedir}/../cpp-jni/protobuf_ep-install - ../ - - - - jni-build - - exec - - generate-resources - - cmake - --build java-jni --target install --config Release - ../ - - - - - - - - - - generate-libs-jni-windows - - java-dist - false - ON - OFF - ON - ON - OFF - ON - OFF - ON - - - - - org.codehaus.mojo - exec-maven-plugin - - - jni-cpp-cmake - - exec - - generate-resources - - cmake - -S cpp - -B cpp-jni - -DARROW_BUILD_SHARED=OFF - -DARROW_CSV=${ARROW_DATASET} - -DARROW_DATASET=${ARROW_DATASET} - -DARROW_DEPENDENCY_USE_SHARED=OFF - -DARROW_FILESYSTEM=ON - -DARROW_GANDIVA=${ARROW_GANDIVA} - -DARROW_JSON=${ARROW_DATASET} - -DARROW_ORC=${ARROW_ORC} - -DARROW_PARQUET=${ARROW_PARQUET} - -DARROW_S3=ON - -DARROW_SUBSTRAIT=${ARROW_DATASET} - -DARROW_USE_CCACHE=ON - -DARROW_WITH_BROTLI=ON - -DARROW_WITH_LZ4=ON - -DARROW_WITH_SNAPPY=ON - -DARROW_WITH_ZLIB=ON - -DARROW_WITH_ZSTD=ON - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_INSTALL_PREFIX=java-dist - -DCMAKE_UNITY_BUILD=ON - -GNinja - ../ - ${cpp.dependencies.builded} - - - - jni-cpp-build - - exec - - generate-resources - - ninja - install - ../cpp-jni - ${cpp.dependencies.builded} - - - - jni-cmake - - exec - - generate-resources - - cmake - -S java - -B java-jni - -DARROW_JAVA_JNI_ENABLE_C=${ARROW_JAVA_JNI_ENABLE_C} - -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_JAVA_JNI_ENABLE_DATASET} - -DARROW_JAVA_JNI_ENABLE_GANDIVA=${ARROW_JAVA_JNI_ENABLE_GANDIVA} - -DARROW_JAVA_JNI_ENABLE_ORC=${ARROW_JAVA_JNI_ENABLE_ORC} - -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON - -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_INSTALL_PREFIX=${arrow.dataset.jni.dist.dir} - -DCMAKE_PREFIX_PATH=${project.basedir}/../java-dist/lib/${os.detected.arch}/cmake - ../ - - - - jni-build - - exec - - generate-resources - - cmake - --build java-jni --target install --config Release - ../ - - - - - - - - - - - cross-jdk-testing - - - arrow.test.jdk-version - - - - - - maven-enforcer-plugin - - - check-jdk-version-property - - enforce - - validate - - - - arrow.test.jdk-version - "JDK version used for test must be specified." - ^\d{2,} - "JDK version used for test must be 11, 17, 21, ..." - - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - ${arrow.test.jdk-version} - - - - - org.apache.maven.plugins - maven-failsafe-plugin - - - ${arrow.test.jdk-version} - - - - - - - - diff --git a/java/tools/pom.xml b/java/tools/pom.xml deleted file mode 100644 index f06ded294a763..0000000000000 --- a/java/tools/pom.xml +++ /dev/null @@ -1,142 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - arrow-tools - Arrow Tools - Java applications for working with Arrow ValueVectors. - - - - org.apache.arrow - arrow-memory-core - - - org.apache.arrow - arrow-vector - ${arrow.vector.classifier} - - - org.apache.arrow - arrow-compression - - - org.immutables - value-annotations - - - com.google.guava - guava - test - - - commons-cli - commons-cli - 1.9.0 - - - ch.qos.logback - logback-classic - test - - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - org.slf4j - slf4j-api - - - org.apache.arrow - arrow-memory-netty - runtime - - - org.apache.arrow - arrow-vector - ${project.version} - tests - test-jar - test - - - - - - - maven-shade-plugin - - - make-assembly - - shade - - package - - false - true - jar-with-dependencies - - - - **/module-info.class - - - - - - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - verify - - - com.fasterxml.jackson.core:* - - - - - - - - diff --git a/java/tools/src/main/java/module-info.java b/java/tools/src/main/java/module-info.java deleted file mode 100644 index 4f0817f9a11f6..0000000000000 --- a/java/tools/src/main/java/module-info.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.tools { - exports org.apache.arrow.tools; - - requires com.fasterxml.jackson.databind; - requires org.apache.arrow.compression; - requires org.apache.arrow.memory.core; - requires org.apache.arrow.vector; - requires org.apache.commons.cli; - requires org.slf4j; -} diff --git a/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java b/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java deleted file mode 100644 index 13b81e7a19b6b..0000000000000 --- a/java/tools/src/main/java/org/apache/arrow/tools/EchoServer.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import java.io.IOException; -import java.net.ServerSocket; -import java.net.Socket; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Simple server that echoes back data received. */ -public class EchoServer { - private static final Logger LOGGER = LoggerFactory.getLogger(EchoServer.class); - private final ServerSocket serverSocket; - private boolean closed = false; - - /** Constructs a new instance that binds to the given port. */ - public EchoServer(int port) throws IOException { - LOGGER.debug("Starting echo server."); - serverSocket = new ServerSocket(port); - LOGGER.debug("Running echo server on port: " + port()); - } - - /** Main method to run the server, the first argument is an optional port number. */ - public static void main(String[] args) throws Exception { - int port; - if (args.length > 0) { - port = Integer.parseInt(args[0]); - } else { - port = 8080; - } - new EchoServer(port).run(); - } - - public int port() { - return serverSocket.getLocalPort(); - } - - /** Starts the main server event loop. */ - public void run() throws IOException { - try { - Socket clientSocket = null; - ClientConnection client = null; - while (!closed) { - LOGGER.debug("Waiting to accept new client connection."); - clientSocket = serverSocket.accept(); - LOGGER.debug("Accepted new client connection."); - client = new ClientConnection(clientSocket); - try { - client.run(); - } catch (IOException e) { - LOGGER.warn("Error handling client connection.", e); - } - LOGGER.debug("Closed connection with client"); - } - } catch (java.net.SocketException ex) { - if (!closed) { - throw ex; - } - } finally { - serverSocket.close(); - LOGGER.debug("Server closed."); - } - } - - public void close() throws IOException { - closed = true; - serverSocket.close(); - } - - /** Handler for each client connection to the server. */ - public static class ClientConnection implements AutoCloseable { - public final Socket socket; - - public ClientConnection(Socket socket) { - this.socket = socket; - } - - /** Reads a record batch off the socket and writes it back out. */ - public void run() throws IOException { - // Read the entire input stream and write it back - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) { - ArrowStreamReader reader = new ArrowStreamReader(socket.getInputStream(), allocator); - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - // load the first batch before instantiating the writer so that we have any dictionaries - reader.loadNextBatch(); - ArrowStreamWriter writer = new ArrowStreamWriter(root, reader, socket.getOutputStream()); - writer.start(); - int echoed = 0; - while (true) { - int rowCount = reader.getVectorSchemaRoot().getRowCount(); - if (rowCount == 0) { - break; - } else { - writer.writeBatch(); - echoed += rowCount; - reader.loadNextBatch(); - } - } - writer.end(); - Preconditions.checkState(reader.bytesRead() == writer.bytesWritten()); - LOGGER.debug(String.format("Echoed %d records", echoed)); - reader.close(false); - writer.close(); - } - } - - @Override - public void close() throws IOException { - socket.close(); - } - } -} diff --git a/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java b/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java deleted file mode 100644 index 45205bab54655..0000000000000 --- a/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.PrintStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.PosixParser; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Application that verifies data can be round-tripped through a file. */ -public class FileRoundtrip { - private static final Logger LOGGER = LoggerFactory.getLogger(FileRoundtrip.class); - private final Options options; - private final PrintStream err; - - FileRoundtrip(PrintStream err) { - this.err = err; - this.options = new Options(); - this.options.addOption("i", "in", true, "input file"); - this.options.addOption("o", "out", true, "output file"); - } - - public static void main(String[] args) { - System.exit(new FileRoundtrip(System.err).run(args)); - } - - private File validateFile(String type, String fileName) throws IOException { - if (fileName == null) { - throw new IllegalArgumentException("missing " + type + " file parameter"); - } - File f = new File(fileName); - if (type.equals("input")) { - if (!f.exists() || f.isDirectory()) { - throw new IllegalArgumentException(type + " file not found: " + f.getAbsolutePath()); - } - } else if (type.equals("output")) { - File parentDir = f.getParentFile(); - if (parentDir != null && !parentDir.exists()) { - if (!parentDir.mkdirs()) { - throw new IOException( - "Failed to create parent directory: " + parentDir.getAbsolutePath()); - } - } - } - return f; - } - - int run(String[] args) { - try { - CommandLineParser parser = new PosixParser(); - CommandLine cmd = parser.parse(options, args, false); - - String inFileName = cmd.getOptionValue("in"); - String outFileName = cmd.getOptionValue("out"); - - File inFile = validateFile("input", inFileName); - File outFile = validateFile("output", outFileName); - - try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - FileInputStream fileInputStream = new FileInputStream(inFile); - ArrowFileReader arrowReader = - new ArrowFileReader(fileInputStream.getChannel(), allocator)) { - - VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); - Schema schema = root.getSchema(); - LOGGER.debug("Input file size: " + inFile.length()); - LOGGER.debug("Found schema: " + schema); - - try (FileOutputStream fileOutputStream = new FileOutputStream(outFile); - ArrowFileWriter arrowWriter = - new ArrowFileWriter(root, arrowReader, fileOutputStream.getChannel())) { - arrowWriter.start(); - while (true) { - if (!arrowReader.loadNextBatch()) { - break; - } else { - arrowWriter.writeBatch(); - } - } - arrowWriter.end(); - } - LOGGER.debug("Output file size: " + outFile.length()); - } - } catch (ParseException e) { - return fatalError("Invalid parameters", e); - } catch (IOException e) { - return fatalError("Error accessing files", e); - } - return 0; - } - - private int fatalError(String message, Throwable e) { - err.println(message); - LOGGER.error(message, e); - return 1; - } -} diff --git a/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java b/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java deleted file mode 100644 index e8c62680a6532..0000000000000 --- a/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; - -/** - * Converts an Arrow file to an Arrow stream. The file should be specified as the first argument and - * the output is written to standard out. - */ -public class FileToStream { - private FileToStream() {} - - /** Reads an Arrow file from in and writes it back to out. */ - public static void convert(FileInputStream in, OutputStream out) throws IOException { - BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - try (ArrowFileReader reader = new ArrowFileReader(in.getChannel(), allocator)) { - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - // load the first batch before instantiating the writer so that we have any dictionaries - // only writeBatches if we loaded one in the first place. - boolean writeBatches = reader.loadNextBatch(); - try (ArrowStreamWriter writer = new ArrowStreamWriter(root, reader, out)) { - writer.start(); - while (writeBatches) { - writer.writeBatch(); - if (!reader.loadNextBatch()) { - break; - } - } - writer.end(); - } - } - } - - /** - * Main method. The first arg is the file path. The second, optional argument, is an output file - * location (defaults to standard out). - */ - public static void main(String[] args) throws IOException { - if (args.length != 1 && args.length != 2) { - System.err.println("Usage: FileToStream [output file]"); - System.exit(1); - } - - FileInputStream in = new FileInputStream(new File(args[0])); - OutputStream out = args.length == 1 ? System.out : new FileOutputStream(new File(args[1])); - - convert(in, out); - } -} diff --git a/java/tools/src/main/java/org/apache/arrow/tools/Integration.java b/java/tools/src/main/java/org/apache/arrow/tools/Integration.java deleted file mode 100644 index 7deeae9e8bf03..0000000000000 --- a/java/tools/src/main/java/org/apache/arrow/tools/Integration.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import org.apache.arrow.compression.CommonsCompressionFactory; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.ipc.JsonFileReader; -import org.apache.arrow.vector.ipc.JsonFileWriter; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Validator; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.PosixParser; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Application for cross language integration testing. */ -public class Integration { - private static final Logger LOGGER = LoggerFactory.getLogger(Integration.class); - private final Options options; - - Integration() { - this.options = new Options(); - this.options.addOption("a", "arrow", true, "arrow file"); - this.options.addOption("j", "json", true, "json file"); - this.options.addOption( - "c", "command", true, "command to execute: " + Arrays.toString(Command.values())); - } - - /** Main method. */ - public static void main(String[] args) { - try { - new Integration().run(args); - } catch (ParseException e) { - fatalError("Invalid parameters", e); - } catch (IOException e) { - fatalError("Error accessing files", e); - } catch (RuntimeException e) { - fatalError("Incompatible files", e); - } - } - - private static void fatalError(String message, Throwable e) { - System.err.println(message); - System.err.println(e.getMessage()); - LOGGER.error(message, e); - System.exit(1); - } - - private File validateFile(String type, String fileName, boolean shouldExist) { - if (fileName == null) { - throw new IllegalArgumentException("missing " + type + " file parameter"); - } - File f = new File(fileName); - if (shouldExist && (!f.exists() || f.isDirectory())) { - throw new IllegalArgumentException(type + " file not found: " + f.getAbsolutePath()); - } - if (!shouldExist && f.exists()) { - throw new IllegalArgumentException(type + " file already exists: " + f.getAbsolutePath()); - } - return f; - } - - static void extractDictionaryEncodings(List fields, List encodings) { - for (Field field : fields) { - DictionaryEncoding encoding = field.getDictionary(); - if (encoding != null) { - encodings.add(encoding); - } - - extractDictionaryEncodings(field.getChildren(), encodings); - } - } - - void run(String[] args) throws ParseException, IOException { - CommandLineParser parser = new PosixParser(); - CommandLine cmd = parser.parse(options, args, false); - - Command command = toCommand(cmd.getOptionValue("command")); - File arrowFile = validateFile("arrow", cmd.getOptionValue("arrow"), command.arrowExists); - File jsonFile = validateFile("json", cmd.getOptionValue("json"), command.jsonExists); - command.execute(arrowFile, jsonFile); - } - - private Command toCommand(String commandName) { - try { - return Command.valueOf(commandName); - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException( - "Unknown command: " - + commandName - + " expected one of " - + Arrays.toString(Command.values())); - } - } - - /** Commands (actions) the application can perform. */ - enum Command { - ARROW_TO_JSON(true, false) { - @Override - public void execute(File arrowFile, File jsonFile) throws IOException { - try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - FileInputStream fileInputStream = new FileInputStream(arrowFile); - ArrowFileReader arrowReader = - new ArrowFileReader(fileInputStream.getChannel(), allocator)) { - VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); - Schema schema = root.getSchema(); - LOGGER.debug("Input file size: " + arrowFile.length()); - LOGGER.debug("Found schema: " + schema); - try (JsonFileWriter writer = - new JsonFileWriter(jsonFile, JsonFileWriter.config().pretty(true))) { - writer.start(schema, arrowReader); - for (ArrowBlock rbBlock : arrowReader.getRecordBlocks()) { - if (!arrowReader.loadRecordBatch(rbBlock)) { - throw new IOException("Expected to load record batch"); - } - writer.write(root); - } - } - LOGGER.debug("Output file size: " + jsonFile.length()); - } - } - }, - JSON_TO_ARROW(false, true) { - @Override - public void execute(File arrowFile, File jsonFile) throws IOException { - try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(jsonFile, allocator)) { - Schema schema = reader.start(); - LOGGER.debug("Input file size: " + jsonFile.length()); - LOGGER.debug("Found schema: " + schema); - try (FileOutputStream fileOutputStream = new FileOutputStream(arrowFile); - VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - // TODO json dictionaries - ArrowFileWriter arrowWriter = - new ArrowFileWriter(root, reader, fileOutputStream.getChannel())) { - arrowWriter.start(); - while (reader.read(root)) { - arrowWriter.writeBatch(); - } - arrowWriter.end(); - } - LOGGER.debug("Output file size: " + arrowFile.length()); - } - } - }, - VALIDATE(true, true) { - @Override - public void execute(File arrowFile, File jsonFile) throws IOException { - try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - JsonFileReader jsonReader = new JsonFileReader(jsonFile, allocator); - FileInputStream fileInputStream = new FileInputStream(arrowFile); - ArrowFileReader arrowReader = - new ArrowFileReader( - fileInputStream.getChannel(), allocator, CommonsCompressionFactory.INSTANCE)) { - Schema jsonSchema = jsonReader.start(); - VectorSchemaRoot arrowRoot = arrowReader.getVectorSchemaRoot(); - Schema arrowSchema = arrowRoot.getSchema(); - LOGGER.debug("Arrow Input file size: " + arrowFile.length()); - LOGGER.debug("ARROW schema: " + arrowSchema); - LOGGER.debug("JSON Input file size: " + jsonFile.length()); - LOGGER.debug("JSON schema: " + jsonSchema); - Validator.compareSchemas(jsonSchema, arrowSchema); - - List recordBatches = arrowReader.getRecordBlocks(); - Iterator iterator = recordBatches.iterator(); - VectorSchemaRoot jsonRoot; - int totalBatches = 0; - while ((jsonRoot = jsonReader.read()) != null && iterator.hasNext()) { - ArrowBlock rbBlock = iterator.next(); - if (!arrowReader.loadRecordBatch(rbBlock)) { - throw new IOException("Expected to load record batch"); - } - Validator.compareVectorSchemaRoot(arrowRoot, jsonRoot); - jsonRoot.close(); - totalBatches++; - } - - // Validate Dictionaries after ArrowFileReader has read batches - List encodingsJson = new ArrayList<>(); - extractDictionaryEncodings(jsonSchema.getFields(), encodingsJson); - List encodingsArrow = new ArrayList<>(); - extractDictionaryEncodings(arrowSchema.getFields(), encodingsArrow); - Validator.compareDictionaries(encodingsJson, encodingsArrow, jsonReader, arrowReader); - - boolean hasMoreJSON = jsonRoot != null; - boolean hasMoreArrow = iterator.hasNext(); - if (hasMoreJSON || hasMoreArrow) { - throw new IllegalArgumentException( - "Unexpected RecordBatches. Total: " - + totalBatches - + " J:" - + hasMoreJSON - + " " - + "A:" - + hasMoreArrow); - } - } - } - }; - - public final boolean arrowExists; - public final boolean jsonExists; - - Command(boolean arrowExists, boolean jsonExists) { - this.arrowExists = arrowExists; - this.jsonExists = jsonExists; - } - - public abstract void execute(File arrowFile, File jsonFile) throws IOException; - } -} diff --git a/java/tools/src/main/java/org/apache/arrow/tools/StreamToFile.java b/java/tools/src/main/java/org/apache/arrow/tools/StreamToFile.java deleted file mode 100644 index b004e12efd3ad..0000000000000 --- a/java/tools/src/main/java/org/apache/arrow/tools/StreamToFile.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.channels.Channels; -import org.apache.arrow.compression.CommonsCompressionFactory; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.ipc.ArrowStreamReader; - -/** Converts an Arrow stream to an Arrow file. */ -public class StreamToFile { - /** Reads an Arrow stream from in and writes it to out. */ - public static void convert(InputStream in, OutputStream out) throws IOException { - BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - try (ArrowStreamReader reader = - new ArrowStreamReader(in, allocator, CommonsCompressionFactory.INSTANCE)) { - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - // load the first batch before instantiating the writer so that we have any dictionaries. - // Only writeBatches if we load the first one. - boolean writeBatches = reader.loadNextBatch(); - try (ArrowFileWriter writer = new ArrowFileWriter(root, reader, Channels.newChannel(out))) { - writer.start(); - while (writeBatches) { - writer.writeBatch(); - if (!reader.loadNextBatch()) { - break; - } - } - writer.end(); - } - } - } - - /** - * Main method. Defaults to reading from standard in and standard out. If there are two arguments - * the first is interpreted as the input file path, the second is the output file path. - */ - public static void main(String[] args) throws IOException { - InputStream in = System.in; - OutputStream out = System.out; - if (args.length == 2) { - in = new FileInputStream(new File(args[0])); - out = new FileOutputStream(new File(args[1])); - } - convert(in, out); - } -} diff --git a/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java b/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java deleted file mode 100644 index d53db2bc54253..0000000000000 --- a/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.ViewVarBinaryVector; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; -import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.complex.writer.BigIntWriter; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.complex.writer.ViewVarBinaryWriter; -import org.apache.arrow.vector.complex.writer.ViewVarCharWriter; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.util.Text; - -public class ArrowFileTestFixtures { - static final int COUNT = 10; - - static void writeData(int count, NonNullableStructVector parent) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - IntWriter intWriter = rootWriter.integer("int"); - BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); - for (int i = 0; i < count; i++) { - intWriter.setPosition(i); - intWriter.writeInt(i); - bigIntWriter.setPosition(i); - bigIntWriter.writeBigInt(i); - } - writer.setValueCount(count); - } - - private static String generateString(int length) { - StringBuilder stringBuilder = new StringBuilder(length); - - for (int i = 0; i < length; i++) { - stringBuilder.append(i); - } - - return stringBuilder.toString(); - } - - private static byte[] generateBytes(int length) { - byte[] bytes = new byte[length]; - for (int i = 0; i < length; i++) { - bytes[i] = (byte) i; - } - return bytes; - } - - static void writeVariableWidthViewData(int count, NonNullableStructVector parent) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - ViewVarCharWriter viewVarCharWriter = rootWriter.viewVarChar("viewVarChar"); - ViewVarBinaryWriter viewVarBinaryWriter = rootWriter.viewVarBinary("viewVarBinary"); - IntWriter intWriter = rootWriter.integer("int"); - BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); - for (int i = 0; i < count; i++) { - viewVarCharWriter.setPosition(i); - viewVarCharWriter.writeViewVarChar(generateString(i)); - viewVarBinaryWriter.setPosition(i); - viewVarBinaryWriter.writeViewVarBinary(generateBytes(i)); - intWriter.setPosition(i); - intWriter.writeInt(i); - bigIntWriter.setPosition(i); - bigIntWriter.writeBigInt(i); - } - writer.setValueCount(count); - } - - static void validateOutput(File testOutFile, BufferAllocator allocator) throws Exception { - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - FileInputStream fileInputStream = new FileInputStream(testOutFile); - ArrowFileReader arrowReader = - new ArrowFileReader(fileInputStream.getChannel(), readerAllocator)) { - VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); - for (ArrowBlock rbBlock : arrowReader.getRecordBlocks()) { - if (!arrowReader.loadRecordBatch(rbBlock)) { - throw new IOException("Expected to read record batch"); - } - validateContent(COUNT, root); - } - } - } - - static void validateVariadicOutput(File testOutFile, BufferAllocator allocator, int count) - throws Exception { - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - FileInputStream fileInputStream = new FileInputStream(testOutFile); - ArrowFileReader arrowReader = - new ArrowFileReader(fileInputStream.getChannel(), readerAllocator)) { - VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); - for (ArrowBlock rbBlock : arrowReader.getRecordBlocks()) { - if (!arrowReader.loadRecordBatch(rbBlock)) { - throw new IOException("Expected to read record batch"); - } - validateVariadicContent(count, root); - } - } - } - - static void validateContent(int count, VectorSchemaRoot root) { - assertEquals(count, root.getRowCount()); - for (int i = 0; i < count; i++) { - assertEquals(i, root.getVector("int").getObject(i)); - assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - } - } - - static void validateVariadicContent(int count, VectorSchemaRoot root) { - assertEquals(count, root.getRowCount()); - ViewVarCharVector viewVarCharVector = (ViewVarCharVector) root.getVector("viewVarChar"); - ViewVarBinaryVector viewVarBinaryVector = (ViewVarBinaryVector) root.getVector("viewVarBinary"); - IntVector intVector = (IntVector) root.getVector("int"); - BigIntVector bigIntVector = (BigIntVector) root.getVector("bigInt"); - for (int i = 0; i < count; i++) { - assertEquals(new Text(generateString(i)), viewVarCharVector.getObject(i)); - assertArrayEquals(generateBytes(i), viewVarBinaryVector.get(i)); - assertEquals(i, intVector.getObject(i)); - assertEquals(Long.valueOf(i), bigIntVector.getObject(i)); - } - } - - static void write(FieldVector parent, File file) throws IOException { - VectorSchemaRoot root = new VectorSchemaRoot(parent); - try (FileOutputStream fileOutputStream = new FileOutputStream(file); - ArrowFileWriter arrowWriter = - new ArrowFileWriter(root, null, fileOutputStream.getChannel())) { - arrowWriter.writeBatch(); - } - } - - static void writeInput(File testInFile, BufferAllocator allocator) throws IOException { - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - NonNullableStructVector parent = NonNullableStructVector.empty("parent", vectorAllocator)) { - writeData(COUNT, parent); - write(parent.getChild("root"), testInFile); - } - } - - static void writeVariableWidthViewInput(File testInFile, BufferAllocator allocator, int count) - throws IOException { - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original view vectors", 0, Integer.MAX_VALUE); - NonNullableStructVector parent = NonNullableStructVector.empty("parent", vectorAllocator)) { - writeVariableWidthViewData(count, parent); - write(parent.getChild("root"), testInFile); - } - } -} diff --git a/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java b/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java deleted file mode 100644 index 239913ca4ea1e..0000000000000 --- a/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java +++ /dev/null @@ -1,298 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import static java.util.Arrays.asList; -import static org.apache.arrow.vector.types.Types.MinorType.TINYINT; -import static org.apache.arrow.vector.types.Types.MinorType.VARCHAR; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.google.common.collect.ImmutableList; -import java.io.IOException; -import java.net.Socket; -import java.net.UnknownHostException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -public class EchoServerTest { - - private static EchoServer server; - private static int serverPort; - private static Thread serverThread; - - @BeforeAll - public static void startEchoServer() throws IOException { - server = new EchoServer(0); - serverPort = server.port(); - serverThread = - new Thread() { - @Override - public void run() { - try { - server.run(); - } catch (IOException e) { - e.printStackTrace(); - } - } - }; - serverThread.start(); - } - - @AfterAll - public static void stopEchoServer() throws IOException, InterruptedException { - server.close(); - serverThread.join(); - } - - private void testEchoServer(int serverPort, Field field, TinyIntVector vector, int batches) - throws UnknownHostException, IOException { - VectorSchemaRoot root = new VectorSchemaRoot(asList(field), asList((FieldVector) vector), 0); - try (BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE); - Socket socket = new Socket("localhost", serverPort); - ArrowStreamWriter writer = new ArrowStreamWriter(root, null, socket.getOutputStream()); - ArrowStreamReader reader = new ArrowStreamReader(socket.getInputStream(), alloc)) { - writer.start(); - for (int i = 0; i < batches; i++) { - vector.allocateNew(16); - for (int j = 0; j < 8; j++) { - vector.set(j, j + i); - vector.set(j + 8, 0, (byte) (j + i)); - } - vector.setValueCount(16); - root.setRowCount(16); - writer.writeBatch(); - } - writer.end(); - - assertEquals(new Schema(asList(field)), reader.getVectorSchemaRoot().getSchema()); - - TinyIntVector readVector = - (TinyIntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0); - for (int i = 0; i < batches; i++) { - assertTrue(reader.loadNextBatch()); - assertEquals(16, reader.getVectorSchemaRoot().getRowCount()); - assertEquals(16, readVector.getValueCount()); - for (int j = 0; j < 8; j++) { - assertEquals(j + i, readVector.get(j)); - assertTrue(readVector.isNull(j + 8)); - } - } - assertFalse(reader.loadNextBatch()); - assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); - assertEquals(reader.bytesRead(), writer.bytesWritten()); - } - } - - @Test - public void basicTest() throws InterruptedException, IOException { - BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE); - - Field field = - new Field( - "testField", - new FieldType(true, new ArrowType.Int(8, true), null, null), - Collections.emptyList()); - TinyIntVector vector = - new TinyIntVector("testField", FieldType.nullable(TINYINT.getType()), alloc); - - // Try an empty stream, just the header. - testEchoServer(serverPort, field, vector, 0); - - // Try with one batch. - testEchoServer(serverPort, field, vector, 1); - - // Try with a few - testEchoServer(serverPort, field, vector, 10); - } - - @Test - public void testFlatDictionary() throws IOException { - DictionaryEncoding writeEncoding = new DictionaryEncoding(1L, false, null); - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - IntVector writeVector = - new IntVector( - "varchar", - new FieldType(true, MinorType.INT.getType(), writeEncoding, null), - allocator); - VarCharVector writeDictionaryVector = - new VarCharVector("dict", FieldType.nullable(VARCHAR.getType()), allocator)) { - - ValueVectorDataPopulator.setVector(writeVector, 0, 1, null, 2, 1, 2); - ValueVectorDataPopulator.setVector( - writeDictionaryVector, - "foo".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8), - "baz".getBytes(StandardCharsets.UTF_8)); - - List fields = ImmutableList.of(writeVector.getField()); - List vectors = ImmutableList.of((FieldVector) writeVector); - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 6); - - DictionaryProvider writeProvider = - new MapDictionaryProvider(new Dictionary(writeDictionaryVector, writeEncoding)); - - try (Socket socket = new Socket("localhost", serverPort); - ArrowStreamWriter writer = - new ArrowStreamWriter(root, writeProvider, socket.getOutputStream()); - ArrowStreamReader reader = new ArrowStreamReader(socket.getInputStream(), allocator)) { - writer.start(); - writer.writeBatch(); - writer.end(); - - reader.loadNextBatch(); - VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - assertEquals(6, readerRoot.getRowCount()); - - FieldVector readVector = readerRoot.getFieldVectors().get(0); - assertNotNull(readVector); - - DictionaryEncoding readEncoding = readVector.getField().getDictionary(); - assertNotNull(readEncoding); - assertEquals(1L, readEncoding.getId()); - - assertEquals(6, readVector.getValueCount()); - assertEquals(0, readVector.getObject(0)); - assertEquals(1, readVector.getObject(1)); - assertEquals(null, readVector.getObject(2)); - assertEquals(2, readVector.getObject(3)); - assertEquals(1, readVector.getObject(4)); - assertEquals(2, readVector.getObject(5)); - - Dictionary dictionary = reader.lookup(1L); - assertNotNull(dictionary); - VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector()); - assertEquals(3, dictionaryVector.getValueCount()); - assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - assertEquals(new Text("bar"), dictionaryVector.getObject(1)); - assertEquals(new Text("baz"), dictionaryVector.getObject(2)); - } - } - } - - @Test - public void testNestedDictionary() throws IOException { - DictionaryEncoding writeEncoding = new DictionaryEncoding(2L, false, null); - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - VarCharVector writeDictionaryVector = - new VarCharVector("dictionary", FieldType.nullable(VARCHAR.getType()), allocator); - ListVector writeVector = ListVector.empty("list", allocator)) { - - // data being written: - // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]] - - writeDictionaryVector.allocateNew(); - writeDictionaryVector.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - writeDictionaryVector.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - writeDictionaryVector.setValueCount(2); - - writeVector.addOrGetVector(new FieldType(true, MinorType.INT.getType(), writeEncoding, null)); - writeVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(writeVector); - listWriter.startList(); - listWriter.writeInt(0); - listWriter.writeInt(1); - listWriter.endList(); - listWriter.startList(); - listWriter.writeInt(0); - listWriter.endList(); - listWriter.startList(); - listWriter.writeInt(1); - listWriter.endList(); - listWriter.setValueCount(3); - - List fields = ImmutableList.of(writeVector.getField()); - List vectors = ImmutableList.of((FieldVector) writeVector); - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 3); - - DictionaryProvider writeProvider = - new MapDictionaryProvider(new Dictionary(writeDictionaryVector, writeEncoding)); - - try (Socket socket = new Socket("localhost", serverPort); - ArrowStreamWriter writer = - new ArrowStreamWriter(root, writeProvider, socket.getOutputStream()); - ArrowStreamReader reader = new ArrowStreamReader(socket.getInputStream(), allocator)) { - writer.start(); - writer.writeBatch(); - writer.end(); - - reader.loadNextBatch(); - VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - assertEquals(3, readerRoot.getRowCount()); - - ListVector readVector = (ListVector) readerRoot.getFieldVectors().get(0); - assertNotNull(readVector); - - assertNull(readVector.getField().getDictionary()); - DictionaryEncoding readEncoding = - readVector.getField().getChildren().get(0).getDictionary(); - assertNotNull(readEncoding); - assertEquals(2L, readEncoding.getId()); - - Field nestedField = readVector.getField().getChildren().get(0); - - DictionaryEncoding encoding = nestedField.getDictionary(); - assertNotNull(encoding); - assertEquals(2L, encoding.getId()); - assertEquals(new Int(32, true), encoding.getIndexType()); - - assertEquals(3, readVector.getValueCount()); - assertEquals(Arrays.asList(0, 1), readVector.getObject(0)); - assertEquals(Arrays.asList(0), readVector.getObject(1)); - assertEquals(Arrays.asList(1), readVector.getObject(2)); - - Dictionary readDictionary = reader.lookup(2L); - assertNotNull(readDictionary); - VarCharVector dictionaryVector = ((VarCharVector) readDictionary.getVector()); - assertEquals(2, dictionaryVector.getValueCount()); - assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - assertEquals(new Text("bar"), dictionaryVector.getObject(1)); - } - } - } -} diff --git a/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java b/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java deleted file mode 100644 index 5839f50383213..0000000000000 --- a/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import static org.apache.arrow.tools.ArrowFileTestFixtures.validateOutput; -import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.io.FileOutputStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.ipc.InvalidArrowFileException; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class TestFileRoundtrip { - - @TempDir public File testFolder; - @TempDir public File testAnotherFolder; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - @Test - public void test() throws Exception { - File testInFile = new File(testFolder, "testIn.arrow"); - File testOutFile = new File(testFolder, "testOut.arrow"); - - writeInput(testInFile, allocator); - - String[] args = {"-i", testInFile.getAbsolutePath(), "-o", testOutFile.getAbsolutePath()}; - int result = new FileRoundtrip(System.err).run(args); - assertEquals(0, result); - - validateOutput(testOutFile, allocator); - } - - @Test - public void testDiffFolder() throws Exception { - File testInFile = new File(testFolder, "testIn.arrow"); - File testOutFile = new File(testAnotherFolder, "testOut.arrow"); - - writeInput(testInFile, allocator); - - String[] args = {"-i", testInFile.getAbsolutePath(), "-o", testOutFile.getAbsolutePath()}; - int result = new FileRoundtrip(System.err).run(args); - assertEquals(0, result); - - validateOutput(testOutFile, allocator); - } - - @Test - public void testNotFoundInput() { - File testInFile = new File(testFolder, "testIn.arrow"); - File testOutFile = new File(testFolder, "testOut.arrow"); - - String[] args = {"-i", testInFile.getAbsolutePath(), "-o", testOutFile.getAbsolutePath()}; - Exception exception = - assertThrows( - IllegalArgumentException.class, - () -> { - new FileRoundtrip(System.err).run(args); - }); - - assertTrue(exception.getMessage().contains("input file not found")); - } - - @Test - public void testSmallSizeInput() throws Exception { - File testInFile = new File(testFolder, "testIn.arrow"); - File testOutFile = new File(testFolder, "testOut.arrow"); - - // create an empty file - new FileOutputStream(testInFile).close(); - - String[] args = {"-i", testInFile.getAbsolutePath(), "-o", testOutFile.getAbsolutePath()}; - Exception exception = - assertThrows( - InvalidArrowFileException.class, - () -> { - new FileRoundtrip(System.err).run(args); - }); - - assertEquals("file too small: 0", exception.getMessage()); - } -} diff --git a/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java b/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java deleted file mode 100644 index 28f9a9010f3f1..0000000000000 --- a/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.tools; - -import static org.apache.arrow.tools.ArrowFileTestFixtures.validateOutput; -import static org.apache.arrow.tools.ArrowFileTestFixtures.validateVariadicOutput; -import static org.apache.arrow.tools.ArrowFileTestFixtures.write; -import static org.apache.arrow.tools.ArrowFileTestFixtures.writeData; -import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput; -import static org.apache.arrow.tools.ArrowFileTestFixtures.writeVariableWidthViewInput; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; -import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.SerializationFeature; -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.StringReader; -import java.util.Map; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.tools.Integration.Command; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; -import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.complex.writer.BigIntWriter; -import org.apache.arrow.vector.complex.writer.Float8Writer; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -public class TestIntegration { - - @TempDir public File testFolder; - - private BufferAllocator allocator; - private ObjectMapper om = new ObjectMapper(); - - { - DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter(); - prettyPrinter.indentArraysWith(NopIndenter.instance); - om.setDefaultPrettyPrinter(prettyPrinter); - om.enable(SerializationFeature.INDENT_OUTPUT); - om.enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS); - } - - static void writeInputFloat(File testInFile, BufferAllocator allocator, double... f) - throws IOException { - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - NonNullableStructVector parent = NonNullableStructVector.empty("parent", vectorAllocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - Float8Writer floatWriter = rootWriter.float8("float"); - for (int i = 0; i < f.length; i++) { - floatWriter.setPosition(i); - floatWriter.writeFloat8(f[i]); - } - writer.setValueCount(f.length); - write(parent.getChild("root"), testInFile); - } - } - - static void writeInput2(File testInFile, BufferAllocator allocator) throws IOException { - int count = ArrowFileTestFixtures.COUNT; - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - NonNullableStructVector parent = NonNullableStructVector.empty("parent", vectorAllocator)) { - writeData(count, parent); - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - IntWriter intWriter = rootWriter.integer("int"); - BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); - intWriter.setPosition(5); - intWriter.writeInt(999); - bigIntWriter.setPosition(4); - bigIntWriter.writeBigInt(777L); - writer.setValueCount(count); - write(parent.getChild("root"), testInFile); - } - } - - @BeforeEach - public void init() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - @Test - public void testValid() throws Exception { - File testInFile = new File(testFolder, "testIn.arrow"); - File testJSONFile = new File(testFolder, "testOut.json"); - testJSONFile.delete(); - File testOutFile = new File(testFolder, "testOut.arrow"); - testOutFile.delete(); - - // generate an arrow file - writeInput(testInFile, allocator); - - Integration integration = new Integration(); - - // convert it to json - String[] args1 = { - "-arrow", - testInFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.ARROW_TO_JSON.name() - }; - integration.run(args1); - - // convert back to arrow - String[] args2 = { - "-arrow", - testOutFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.JSON_TO_ARROW.name() - }; - integration.run(args2); - - // check it is the same - validateOutput(testOutFile, allocator); - - // validate arrow against json - String[] args3 = { - "-arrow", - testInFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.VALIDATE.name() - }; - integration.run(args3); - } - - @Test - public void testJSONRoundTripWithVariableWidth() throws Exception { - File testJSONFile = - new File("../../docs/source/format/integration_json_examples/simple.json") - .getCanonicalFile(); - if (!testJSONFile.exists()) { - testJSONFile = new File("../docs/source/format/integration_json_examples/simple.json"); - } - File testOutFile = new File(testFolder, "testOut.arrow"); - File testRoundTripJSONFile = new File(testFolder, "testOut.json"); - testOutFile.delete(); - testRoundTripJSONFile.delete(); - - Integration integration = new Integration(); - - // convert to arrow - String[] args1 = { - "-arrow", - testOutFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.JSON_TO_ARROW.name() - }; - integration.run(args1); - - // convert back to json - String[] args2 = { - "-arrow", - testOutFile.getAbsolutePath(), - "-json", - testRoundTripJSONFile.getAbsolutePath(), - "-command", - Command.ARROW_TO_JSON.name() - }; - integration.run(args2); - - BufferedReader orig = readNormalized(testJSONFile); - BufferedReader rt = readNormalized(testRoundTripJSONFile); - String i; - String o; - int j = 0; - while ((i = orig.readLine()) != null && (o = rt.readLine()) != null) { - assertEquals(i, o, "line: " + j); - ++j; - } - } - - @Test - public void testJSONRoundTripWithStruct() throws Exception { - File testJSONFile = - new File("../../docs/source/format/integration_json_examples/struct.json") - .getCanonicalFile(); - if (!testJSONFile.exists()) { - testJSONFile = new File("../docs/source/format/integration_json_examples/struct.json"); - } - File testOutFile = new File(testFolder, "testOutStruct.arrow"); - File testRoundTripJSONFile = new File(testFolder, "testOutStruct.json"); - testOutFile.delete(); - testRoundTripJSONFile.delete(); - - Integration integration = new Integration(); - - // convert to arrow - String[] args1 = { - "-arrow", - testOutFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.JSON_TO_ARROW.name() - }; - integration.run(args1); - - // convert back to json - String[] args2 = { - "-arrow", - testOutFile.getAbsolutePath(), - "-json", - testRoundTripJSONFile.getAbsolutePath(), - "-command", - Command.ARROW_TO_JSON.name() - }; - integration.run(args2); - - BufferedReader orig = readNormalized(testJSONFile); - BufferedReader rt = readNormalized(testRoundTripJSONFile); - String i; - String o; - int j = 0; - while ((i = orig.readLine()) != null && (o = rt.readLine()) != null) { - assertEquals(i, o, "line: " + j); - ++j; - } - } - - private BufferedReader readNormalized(File f) throws IOException { - Map tree = om.readValue(f.getCanonicalFile(), Map.class); - String normalized = om.writeValueAsString(tree); - return new BufferedReader(new StringReader(normalized)); - } - - /** The test should not be sensitive to small variations in float representation. */ - @Test - public void testFloat() throws Exception { - File testValidInFile = new File(testFolder, "testValidFloatIn.arrow"); - File testInvalidInFile = new File(testFolder, "testAlsoValidFloatIn.arrow"); - File testJSONFile = new File(testFolder, "testValidOut.json"); - testJSONFile.delete(); - - // generate an arrow file - writeInputFloat(testValidInFile, allocator, 912.4140000000002, 912.414); - // generate a different arrow file - writeInputFloat(testInvalidInFile, allocator, 912.414, 912.4140000000002); - - Integration integration = new Integration(); - - // convert the "valid" file to json - String[] args1 = { - "-arrow", - testValidInFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.ARROW_TO_JSON.name() - }; - integration.run(args1); - - // compare the "invalid" file to the "valid" json - String[] args3 = { - "-arrow", - testInvalidInFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.VALIDATE.name() - }; - // this should fail - integration.run(args3); - } - - @Test - public void testInvalid() throws Exception { - File testValidInFile = new File(testFolder, "testValidIn.arrow"); - File testInvalidInFile = new File(testFolder, "testInvalidIn.arrow"); - File testJSONFile = new File(testFolder, "testInvalidOut.json"); - testJSONFile.delete(); - - // generate an arrow file - writeInput(testValidInFile, allocator); - // generate a different arrow file - writeInput2(testInvalidInFile, allocator); - - Integration integration = new Integration(); - - // convert the "valid" file to json - String[] args1 = { - "-arrow", - testValidInFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.ARROW_TO_JSON.name() - }; - integration.run(args1); - - // compare the "invalid" file to the "valid" json - String[] args3 = { - "-arrow", - testInvalidInFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.VALIDATE.name() - }; - // this should fail - IllegalArgumentException e = - assertThrows( - IllegalArgumentException.class, - () -> { - integration.run(args3); - }); - - assertTrue(e.getMessage().contains("Different values in column"), e.getMessage()); - assertTrue(e.getMessage().contains("999"), e.getMessage()); - } - - @Test - public void testValidateVariableWidthView() throws Exception { - final int valueCount = 256; - final int multiplier = 6; - - for (int i = 1; i < multiplier; i++) { - File testInFile = new File(testFolder, "testIn.arrow"); - File testJSONFile = new File(testFolder, "testOut.json"); - testJSONFile.delete(); - File testOutFile = new File(testFolder, "testOut.arrow"); - testOutFile.delete(); - - writeVariableWidthViewInput(testInFile, allocator, multiplier * valueCount); - - Integration integration = new Integration(); - - // convert it to json - String[] args1 = { - "-arrow", - testInFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.ARROW_TO_JSON.name() - }; - integration.run(args1); - - // convert back to arrow - String[] args2 = { - "-arrow", - testOutFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.JSON_TO_ARROW.name() - }; - integration.run(args2); - - // check it is the same - validateVariadicOutput(testOutFile, allocator, multiplier * valueCount); - - // validate arrow against json - String[] args3 = { - "-arrow", - testInFile.getAbsolutePath(), - "-json", - testJSONFile.getAbsolutePath(), - "-command", - Command.VALIDATE.name() - }; - integration.run(args3); - } - } -} diff --git a/java/tools/src/test/resources/logback.xml b/java/tools/src/test/resources/logback.xml deleted file mode 100644 index ff848da2a8be1..0000000000000 --- a/java/tools/src/test/resources/logback.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - diff --git a/java/vector/pom.xml b/java/vector/pom.xml deleted file mode 100644 index 7cd25cd43e237..0000000000000 --- a/java/vector/pom.xml +++ /dev/null @@ -1,212 +0,0 @@ - - - - 4.0.0 - - org.apache.arrow - arrow-java-root - 19.0.0-SNAPSHOT - - arrow-vector - Arrow Vectors - An off-heap reference implementation for Arrow columnar data format. - - - - org.apache.arrow - arrow-format - - - org.apache.arrow - arrow-memory-core - - - org.immutables - value-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.datatype - jackson-datatype-jsr310 - - - commons-codec - commons-codec - 1.17.1 - - - org.apache.arrow - arrow-memory-netty - test - - - org.apache.arrow - arrow-memory-unsafe - test - - - com.google.flatbuffers - flatbuffers-java - ${dep.fbs.version} - - - org.slf4j - slf4j-api - - - org.hamcrest - hamcrest - test - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - default-test - test - - - org.apache.arrow:arrow-memory-unsafe - - - - - run-unsafe - - test - - test - - - org.apache.arrow:arrow-memory-netty - - netty - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - -Werror - - - - - - org.apache.drill.tools - drill-fmpp-maven-plugin - - - generate-fmpp - - generate - - generate-sources - - src/main/codegen/config.fmpp - ${project.build.directory}/generated-sources/fmpp - src/main/codegen/templates - - - - - - org.apache.maven.plugins - maven-shade-plugin - - - - shade - - package - - - - org.apache.arrow:arrow-format - com.google.flatbuffers:* - - - false - true - shade-format-flatbuffers - - - com.google.flatbuffers - arrow.vector.com.google.flatbuffers - - - - - - - - - - - - - integration-tests - - - - org.apache.maven.plugins - maven-failsafe-plugin - - 3600 - - false - - - - - default-it - - integration-test - verify - - - - - - - - - diff --git a/java/vector/src/main/codegen/config.fmpp b/java/vector/src/main/codegen/config.fmpp deleted file mode 100644 index ef5a5072a75a7..0000000000000 --- a/java/vector/src/main/codegen/config.fmpp +++ /dev/null @@ -1,24 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -data: { - # TODO: Rename to ~valueVectorModesAndTypes for clarity. - vv: tdd(../data/ValueVectorTypes.tdd), - arrowTypes: tdd(../data/ArrowTypes.tdd) - -} -freemarkerLinks: { - includes: includes/ -} diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd deleted file mode 100644 index 5a0b30e47ee52..0000000000000 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ /dev/null @@ -1,149 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{ - types: [ - { - name: "Null", - fields: [], - complex: false - }, - { - name: "Struct_", - fields: [], - complex: true - }, - { - name: "List", - fields: [], - complex: true - }, - { - name: "LargeList", - fields: [], - complex: true - }, - { - name: "FixedSizeList", - fields: [{name: "listSize", type: int}], - complex: true - }, - { - name: "Union", - fields: [{name: "mode", type: short, valueType: UnionMode}, {name: "typeIds", type: "int[]"}], - complex: true - }, - { - name: "Map", - fields: [{name: "keysSorted", type: boolean}], - complex: true - }, - { - name: "Int", - fields: [{name: "bitWidth", type: int}, {name: "isSigned", type: boolean}], - complex: false - }, - { - name: "FloatingPoint", - fields: [{name: precision, type: short, valueType: FloatingPointPrecision}], - complex: false - }, - { - name: "Utf8", - fields: [], - complex: false - }, - { - name: "Utf8View", - fields: [], - complex: false - }, - { - name: "LargeUtf8", - fields: [], - complex: false - }, - { - name: "Binary", - fields: [], - complex: false - }, - { - name: "BinaryView", - fields: [], - complex: false - }, - { - name: "LargeBinary", - fields: [], - complex: false - }, - { - name: "FixedSizeBinary", - fields: [{name: "byteWidth", type: int}], - complex: false - } - { - name: "Bool", - fields: [], - complex: false - }, - { - name: "Decimal", - fields: [{name: "precision", type: int}, {name: "scale", type: int}, {name: "bitWidth", type: int}], - complex: false - }, - { - name: "Date", - fields: [{name: "unit", type: short, valueType: DateUnit}] - complex: false - }, - { - name: "Time", - fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "bitWidth", type: int}], - complex: false - }, - { - name: "Timestamp", - fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "timezone", type: String}] - complex: false - }, - { - name: "Interval", - fields: [{name: "unit", type: short, valueType: IntervalUnit}], - complex: false - }, - { - name: "Duration", - fields: [{name: "unit", type: short, valueType: TimeUnit}], - complex: false - }, - { - name: "ListView", - fields: [], - complex: true - }, - { - name: "LargeListView", - fields: [], - complex: true - }, - { - name: "RunEndEncoded", - fields: [], - complex: true - } - ] -} diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd deleted file mode 100644 index ad1f1b93bb3aa..0000000000000 --- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd +++ /dev/null @@ -1,218 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{ - modes: [ - {name: "Optional", prefix: "Nullable"}, - {name: "Required", prefix: ""} - ], - types: [ - { - major: "Fixed", - width: 1, - javaType: "byte", - boxedType: "Byte", - fields: [{name: "value", type: "byte"}], - minor: [ - { class: "TinyInt", valueHolder: "IntHolder" }, - { class: "UInt1", valueHolder: "UInt1Holder" } - ] - }, - { - major: "Fixed", - width: 2, - javaType: "char", - boxedType: "Character", - fields: [{name: "value", type: "char"}], - minor: [ - { class: "UInt2", valueHolder: "UInt2Holder"} - ] - }, { - major: "Fixed", - width: 2, - javaType: "short", - boxedType: "Short", - fields: [{name: "value", type: "short"}], - minor: [ - { class: "SmallInt", valueHolder: "Int2Holder"}, - ] - }, - { - major: "Fixed", - width: 2, - javaType: "short", - boxedType: "Short", - fields: [{name: "value", type: "short"}], - minor: [ - { class: "Float2", valueHolder: "Int2Holder"}, - ] - }, - { - major: "Fixed", - width: 4, - javaType: "int", - boxedType: "Integer", - fields: [{name: "value", type: "int"}], - minor: [ - { class: "Int", valueHolder: "IntHolder"}, - { class: "UInt4", valueHolder: "UInt4Holder" }, - { class: "Float4", javaType: "float" , boxedType: "Float", fields: [{name: "value", type: "float"}]}, - { class: "DateDay" }, - { class: "IntervalYear", javaType: "int", friendlyType: "Period" }, - { class: "TimeSec" }, - { class: "TimeMilli", javaType: "int", friendlyType: "LocalDateTime" } - ] - }, - { - major: "Fixed", - width: 8, - javaType: "long", - boxedType: "Long", - fields: [{name: "value", type: "long"}], - minor: [ - { class: "BigInt"}, - { class: "UInt8" }, - { class: "Float8", javaType: "double", boxedType: "Double", fields: [{name: "value", type: "double"}] }, - { class: "DateMilli", javaType: "long", friendlyType: "LocalDateTime" }, - { class: "Duration", javaType: "long", friendlyType: "Duration", - arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Duration", - typeParams: [ {name: "unit", type: "org.apache.arrow.vector.types.TimeUnit"} ], - arrowTypeConstructorParams: ["unit"]} - { class: "TimeStampSec", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" }, - { class: "TimeStampMilli", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" }, - { class: "TimeStampMicro", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" }, - { class: "TimeStampNano", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" }, - { class: "TimeStampSecTZ", javaType: "long", boxedType: "Long", - typeParams: [ {name: "timezone", type: "String"} ], - arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp", - arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.SECOND", "timezone"] }, - { class: "TimeStampMilliTZ", javaType: "long", boxedType: "Long", - typeParams: [ {name: "timezone", type: "String"} ], - arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp", - arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.MILLISECOND", "timezone"] }, - { class: "TimeStampMicroTZ", javaType: "long", boxedType: "Long", - typeParams: [ {name: "timezone", type: "String"} ], - arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp", - arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.MICROSECOND", "timezone"] }, - { class: "TimeStampNanoTZ", javaType: "long", boxedType: "Long", - typeParams: [ {name: "timezone", type: "String"} ], - arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp", - arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.NANOSECOND", "timezone"] }, - { class: "TimeMicro" }, - { class: "TimeNano" } - ] - }, - { - major: "Fixed", - width: 8, - javaType: "ArrowBuf", - boxedType: "ArrowBuf", - minor: [ - { class: "IntervalDay", millisecondsOffset: 4, friendlyType: "Duration", fields: [ {name: "days", type:"int"}, {name: "milliseconds", type:"int"}] } - ] - }, - { - major: "Fixed", - width: 16, - javaType: "ArrowBuf", - boxedType: "ArrowBuf", - minor: [ - { class: "IntervalMonthDayNano", daysOffset: 4, nanosecondsOffset: 8, friendlyType: "PeriodDuration", fields: [ {name: "months", type:"int"}, {name: "days", type:"int"}, {name: "nanoseconds", type:"long"}] } - ] - }, - - { - major: "Fixed", - width: 32, - javaType: "ArrowBuf", - boxedType: "ArrowBuf", - - minor: [ - { - class: "Decimal256", - maxPrecisionDigits: 76, nDecimalDigits: 4, friendlyType: "BigDecimal", - typeParams: [ {name: "scale", type: "int"}, { name: "precision", type: "int"}], - arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Decimal", - fields: [{name: "start", type: "long"}, {name: "buffer", type: "ArrowBuf"}] - } - ] - }, - { - major: "Fixed", - width: 16, - javaType: "ArrowBuf", - boxedType: "ArrowBuf", - - minor: [ - { - class: "Decimal", - maxPrecisionDigits: 38, nDecimalDigits: 4, friendlyType: "BigDecimal", - typeParams: [ {name: "scale", type: "int"}, { name: "precision", type: "int"}], - arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Decimal", - fields: [{name: "start", type: "long"}, {name: "buffer", type: "ArrowBuf"}] - } - ] - }, - - { - major: "Fixed", - width: -1, - javaType: "byte[]", - boxedType: "ArrowBuf", - minor: [ - { - class: "FixedSizeBinary", - typeParams: [ {name: "byteWidth", type: "int"} ], - arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary", - friendlyType: "byte[]", - fields: [{name: "buffer", type: "ArrowBuf"}], - } - ] - }, - { - major: "VarLen", - width: 4, - javaType: "int", - boxedType: "ArrowBuf", - fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "ArrowBuf"}], - minor: [ - { class: "VarBinary" , friendlyType: "byte[]" }, - { class: "VarChar" , friendlyType: "Text" }, - { class: "ViewVarBinary" , friendlyType: "byte[]" }, - { class: "ViewVarChar" , friendlyType: "Text" } - ] - }, - { - major: "VarLen", - width: 8, - javaType: "long", - boxedType: "ArrowBuf", - fields: [{name: "start", type: "long"}, {name: "end", type: "long"}, {name: "buffer", type: "ArrowBuf"}], - minor: [ - { class: "LargeVarChar" , friendlyType: "Text" } - { class: "LargeVarBinary" , friendlyType: "byte[]" } - ] - }, - { - major: "Bit", - width: 1, - javaType: "int", - boxedType: "Integer", - minor: [ - { class: "Bit" , friendlyType: "Boolean", fields: [{name: "value", type: "int"}] } - ] - } - ] -} diff --git a/java/vector/src/main/codegen/includes/license.ftl b/java/vector/src/main/codegen/includes/license.ftl deleted file mode 100644 index c6a5afeef509f..0000000000000 --- a/java/vector/src/main/codegen/includes/license.ftl +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ \ No newline at end of file diff --git a/java/vector/src/main/codegen/includes/vv_imports.ftl b/java/vector/src/main/codegen/includes/vv_imports.ftl deleted file mode 100644 index f4c72a1a6cbae..0000000000000 --- a/java/vector/src/main/codegen/includes/vv_imports.ftl +++ /dev/null @@ -1,58 +0,0 @@ -<#-- - ~ Licensed to the Apache Software Foundation (ASF) under one or more - ~ contributor license agreements. See the NOTICE file distributed with - ~ this work for additional information regarding copyright ownership. - ~ The ASF licenses this file to You under the Apache License, Version 2.0 - ~ (the "License"); you may not use this file except in compliance with - ~ the License. You may obtain a copy of the License at - ~ - ~ http://www.apache.org/licenses/LICENSE-2.0 - ~ - ~ Unless required by applicable law or agreed to in writing, software - ~ distributed under the License is distributed on an "AS IS" BASIS, - ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ~ See the License for the specific language governing permissions and - ~ limitations under the License. - --> - -import static org.apache.arrow.util.Preconditions.checkArgument; -import static org.apache.arrow.util.Preconditions.checkState; - -import com.google.flatbuffers.FlatBufferBuilder; - -import org.apache.arrow.memory.*; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.*; -import org.apache.arrow.vector.types.pojo.*; -import org.apache.arrow.vector.types.pojo.ArrowType.*; -import org.apache.arrow.vector.types.*; -import org.apache.arrow.vector.*; -import org.apache.arrow.vector.holders.*; -import org.apache.arrow.vector.util.*; -import org.apache.arrow.vector.complex.*; -import org.apache.arrow.vector.complex.reader.*; -import org.apache.arrow.vector.complex.impl.*; -import org.apache.arrow.vector.complex.writer.*; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.util.JsonStringArrayList; - -import java.util.Arrays; -import java.util.Random; -import java.util.List; - -import java.io.Closeable; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.ByteBuffer; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.time.Duration; -import java.time.LocalDateTime; -import java.time.Period; -import java.time.ZonedDateTime; - - diff --git a/java/vector/src/main/codegen/templates/AbstractFieldReader.java b/java/vector/src/main/codegen/templates/AbstractFieldReader.java deleted file mode 100644 index e3c8729469c74..0000000000000 --- a/java/vector/src/main/codegen/templates/AbstractFieldReader.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractFieldReader.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -/** - * Source code generated using FreeMarker template ${.template_name} - */ -@SuppressWarnings("unused") -abstract class AbstractFieldReader extends AbstractBaseReader implements FieldReader{ - - AbstractFieldReader(){ - super(); - } - - /** - * Returns true if the current value of the reader is not null - * @return whether the current value is set - */ - public boolean isSet() { - return true; - } - - @Override - public Field getField() { - fail("getField"); - return null; - } - - <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean", - "LocalDateTime", "Duration", "Period", "Double", "Float", - "Character", "Text", "String", "Byte", "byte[]", "PeriodDuration"] as friendlyType> - <#assign safeType=friendlyType /> - <#if safeType=="byte[]"><#assign safeType="ByteArray" /> - public ${friendlyType} read${safeType}(int arrayIndex) { - fail("read${safeType}(int arrayIndex)"); - return null; - } - - public ${friendlyType} read${safeType}() { - fail("read${safeType}()"); - return null; - } - - - public void copyAsValue(StructWriter writer) { - fail("CopyAsValue StructWriter"); - } - - public void copyAsField(String name, StructWriter writer) { - fail("CopyAsField StructWriter"); - } - - public void copyAsField(String name, ListWriter writer) { - fail("CopyAsFieldList"); - } - - public void copyAsField(String name, MapWriter writer) { - fail("CopyAsFieldMap"); - } - - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign boxedType = (minor.boxedType!type.boxedType) /> - public void read(${name}Holder holder) { - fail("${name}"); - } - - public void read(Nullable${name}Holder holder) { - fail("${name}"); - } - - public void read(int arrayIndex, ${name}Holder holder) { - fail("Repeated${name}"); - } - - public void read(int arrayIndex, Nullable${name}Holder holder) { - fail("Repeated${name}"); - } - - public void copyAsValue(${name}Writer writer) { - fail("CopyAsValue${name}"); - } - - public void copyAsField(String name, ${name}Writer writer) { - fail("CopyAsField${name}"); - } - - - public FieldReader reader(String name) { - fail("reader(String name)"); - return null; - } - - public FieldReader reader() { - fail("reader()"); - return null; - } - - public int size() { - fail("size()"); - return -1; - } - - private void fail(String name) { - throw new IllegalArgumentException(String.format("You tried to read a [%s] type when you are using a field reader of type [%s].", name, this.getClass().getSimpleName())); - } -} - - - diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java deleted file mode 100644 index 5ebfb6877fc5b..0000000000000 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractFieldWriter.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -/* - * This class is generated using freemarker and the ${.template_name} template. - * Note that changes to the AbstractFieldWriter template should also get reflected in the - * AbstractPromotableFieldWriter, ComplexWriters, UnionFixedSizeListWriter, UnionListWriter - * and UnionWriter templates and the PromotableWriter concrete code. - */ -@SuppressWarnings("unused") -abstract class AbstractFieldWriter extends AbstractBaseWriter implements FieldWriter { - - protected boolean addVectorAsNullable = true; - - /** - * Set flag to control the FieldType.nullable property when a writer creates a new vector. - * If true then vectors created will be nullable, this is the default behavior. If false then - * vectors created will be non-nullable. - * - * @param nullable Whether or not to create nullable vectors (default behavior is true) - */ - public void setAddVectorAsNullable(boolean nullable) { - addVectorAsNullable = nullable; - } - - @Override - public void start() { - throw new IllegalStateException(String.format("You tried to start when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void end() { - throw new IllegalStateException(String.format("You tried to end when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void startList() { - throw new IllegalStateException(String.format("You tried to start a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void endList() { - throw new IllegalStateException(String.format("You tried to end a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void startListView() { - throw new IllegalStateException(String.format("You tried to start a list view when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void endListView() { - throw new IllegalStateException(String.format("You tried to end a list view when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void startMap() { - throw new IllegalStateException(String.format("You tried to start a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void endMap() { - throw new IllegalStateException(String.format("You tried to end a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void startEntry() { - throw new IllegalStateException(String.format("You tried to start a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public MapWriter key() { - throw new IllegalStateException(String.format("You tried to start a map key when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public MapWriter value() { - throw new IllegalStateException(String.format("You tried to start a map value when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - @Override - public void endEntry() { - throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName())); - } - - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> - @Override - public void write(${name}Holder holder) { - fail("${name}"); - } - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - fail("${name}"); - } - - <#if minor.class?starts_with("Decimal")> - public void write${minor.class}(${friendlyType} value) { - fail("${name}"); - } - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, , ArrowType arrowType) { - fail("${name}"); - } - - public void writeBigEndianBytesTo${minor.class}(byte[] value) { - fail("${name}"); - } - - public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType) { - fail("${name}"); - } - - - <#if minor.class?ends_with("VarBinary")> - public void write${minor.class}(byte[] value) { - fail("${name}"); - } - - public void write${minor.class}(byte[] value, int offset, int length) { - fail("${name}"); - } - - public void write${minor.class}(ByteBuffer value) { - fail("${name}"); - } - - public void write${minor.class}(ByteBuffer value, int offset, int length) { - fail("${name}"); - } - - - <#if minor.class?ends_with("VarChar")> - public void write${minor.class}(${friendlyType} value) { - fail("${name}"); - } - - public void write${minor.class}(String value) { - fail("${name}"); - } - - - - - public void writeNull() { - fail("${name}"); - } - - /** - * This implementation returns {@code false}. - *

    - * Must be overridden by struct writers. - *

    - */ - @Override - public boolean isEmptyStruct() { - return false; - } - - @Override - public StructWriter struct() { - fail("Struct"); - return null; - } - - @Override - public ListWriter list() { - fail("List"); - return null; - } - - @Override - public ListWriter listView() { - fail("ListView"); - return null; - } - - @Override - public MapWriter map() { - fail("Map"); - return null; - } - - @Override - public StructWriter struct(String name) { - fail("Struct"); - return null; - } - - @Override - public ListWriter list(String name) { - fail("List"); - return null; - } - - @Override - public ListWriter listView(String name) { - fail("ListView"); - return null; - } - - @Override - public MapWriter map(String name) { - fail("Map"); - return null; - } - - @Override - public MapWriter map(boolean keysSorted) { - fail("Map"); - return null; - } - - @Override - public MapWriter map(String name, boolean keysSorted) { - fail("Map"); - return null; - } - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - <#if minor.typeParams?? > - - @Override - public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}) { - fail("${capName}(" + <#list minor.typeParams as typeParam>"${typeParam.name}: " + ${typeParam.name} + ", " + ")"); - return null; - } - - - @Override - public ${capName}Writer ${lowerName}(String name) { - fail("${capName}"); - return null; - } - - @Override - public ${capName}Writer ${lowerName}() { - fail("${capName}"); - return null; - } - - - - public void copyReader(FieldReader reader) { - fail("Copy FieldReader"); - } - - public void copyReaderToField(String name, FieldReader reader) { - fail("Copy FieldReader to STring"); - } - - private void fail(String name) { - throw new IllegalArgumentException(String.format("You tried to write a %s type when you are using a ValueWriter of type %s.", name, this.getClass().getSimpleName())); - } -} diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java deleted file mode 100644 index 06cb235f7dd99..0000000000000 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ /dev/null @@ -1,353 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractPromotableFieldWriter.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -<#function is_timestamp_tz type> - <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> - - -/* - * A FieldWriter which delegates calls to another FieldWriter. The delegate FieldWriter can be promoted to a new type - * when necessary. Classes that extend this class are responsible for handling promotion. - * - * This class is generated using freemarker and the ${.template_name} template. - * - */ -@SuppressWarnings("unused") -abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter { - /** - * Retrieve the FieldWriter, promoting if it is not a FieldWriter of the specified type - * @param type the type of the values we want to write - * @return the corresponding field writer - */ - protected FieldWriter getWriter(MinorType type) { - return getWriter(type, null); - } - - abstract protected FieldWriter getWriter(MinorType type, ArrowType arrowType); - - /** - * @return the current FieldWriter - */ - abstract protected FieldWriter getWriter(); - - @Override - public void start() { - getWriter(MinorType.STRUCT).start(); - } - - @Override - public void end() { - getWriter(MinorType.STRUCT).end(); - setPosition(idx() + 1); - } - - @Override - public void startList() { - getWriter(MinorType.LIST).startList(); - } - - @Override - public void endList() { - getWriter(MinorType.LIST).endList(); - setPosition(idx() + 1); - } - - @Override - public void startListView() { - getWriter(MinorType.LISTVIEW).startListView(); - } - - @Override - public void endListView() { - getWriter(MinorType.LISTVIEW).endListView(); - setPosition(idx() + 1); - } - - @Override - public void startMap() { - getWriter(MinorType.MAP).startMap(); - } - - @Override - public void endMap() { - getWriter(MinorType.MAP).endMap(); - setPosition(idx() + 1); - } - - @Override - public void startEntry() { - getWriter(MinorType.MAP).startEntry(); - } - - @Override - public MapWriter key() { - return getWriter(MinorType.MAP).key(); - } - - @Override - public MapWriter value() { - return getWriter(MinorType.MAP).value(); - } - - @Override - public void endEntry() { - getWriter(MinorType.MAP).endEntry(); - } - - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#if minor.class == "Decimal"> - @Override - public void write(DecimalHolder holder) { - getWriter(MinorType.DECIMAL).write(holder); - } - - public void writeDecimal(int start, ArrowBuf buffer, ArrowType arrowType) { - getWriter(MinorType.DECIMAL).writeDecimal(start, buffer, arrowType); - } - - public void writeDecimal(int start, ArrowBuf buffer) { - getWriter(MinorType.DECIMAL).writeDecimal(start, buffer); - } - - public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) { - getWriter(MinorType.DECIMAL).writeBigEndianBytesToDecimal(value, arrowType); - } - - public void writeBigEndianBytesToDecimal(byte[] value) { - getWriter(MinorType.DECIMAL).writeBigEndianBytesToDecimal(value); - } - <#elseif minor.class == "Decimal256"> - @Override - public void write(Decimal256Holder holder) { - getWriter(MinorType.DECIMAL256).write(holder); - } - - public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) { - getWriter(MinorType.DECIMAL256).writeDecimal256(start, buffer, arrowType); - } - - public void writeDecimal256(long start, ArrowBuf buffer) { - getWriter(MinorType.DECIMAL256).writeDecimal256(start, buffer); - } - public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) { - getWriter(MinorType.DECIMAL256).writeBigEndianBytesToDecimal256(value, arrowType); - } - - public void writeBigEndianBytesToDecimal256(byte[] value) { - getWriter(MinorType.DECIMAL256).writeBigEndianBytesToDecimal256(value); - } - <#elseif is_timestamp_tz(minor.class)> - @Override - public void write(${name}Holder holder) { - ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType(); - // Take the holder.timezone similar to how PromotableWriter.java:write(DecimalHolder) takes the scale from the holder. - ArrowType.Timestamp arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), holder.timezone); - getWriter(MinorType.${name?upper_case}, arrowType).write(holder); - } - - /** - * @deprecated - * The holder version should be used instead otherwise the timezone will default to UTC. - * @see #write(${name}Holder) - */ - @Deprecated - @Override - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType(); - // Assumes UTC if no timezone is provided - ArrowType.Timestamp arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), "UTC"); - getWriter(MinorType.${name?upper_case}, arrowType).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); - } - <#elseif minor.class == "Duration"> - @Override - public void write(${name}Holder holder) { - ArrowType.Duration arrowType = new ArrowType.Duration(holder.unit); - getWriter(MinorType.${name?upper_case}, arrowType).write(holder); - } - - /** - * @deprecated - * If you experience errors with using this version of the method, switch to the holder version. - * The errors occur when using an untyped or unioned PromotableWriter, because this version of the - * method does not have enough information to infer the ArrowType. - * @see #write(${name}Holder) - */ - @Deprecated - @Override - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); - } - <#elseif minor.class == "FixedSizeBinary"> - @Override - public void write(${name}Holder holder) { - ArrowType.FixedSizeBinary arrowType = new ArrowType.FixedSizeBinary(holder.byteWidth); - getWriter(MinorType.${name?upper_case}, arrowType).write(holder); - } - - /** - * @deprecated - * If you experience errors with using this version of the method, switch to the holder version. - * The errors occur when using an untyped or unioned PromotableWriter, because this version of the - * method does not have enough information to infer the ArrowType. - * @see #write(${name}Holder) - */ - @Deprecated - @Override - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); - } - <#else> - @Override - public void write(${name}Holder holder) { - getWriter(MinorType.${name?upper_case}).write(holder); - } - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, ); - } - - - <#if minor.class?ends_with("VarBinary")> - @Override - public void write${minor.class}(byte[] value) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(value); - } - - @Override - public void write${minor.class}(byte[] value, int offset, int length) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(value, offset, length); - } - - @Override - public void write${minor.class}(ByteBuffer value) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(value); - } - - @Override - public void write${minor.class}(ByteBuffer value, int offset, int length) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(value, offset, length); - } - <#elseif minor.class?ends_with("VarChar")> - @Override - public void write${minor.class}(Text value) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(value); - } - - @Override - public void write${minor.class}(String value) { - getWriter(MinorType.${name?upper_case}).write${minor.class}(value); - } - - - - public void writeNull() { - } - - @Override - public StructWriter struct() { - return getWriter(MinorType.LIST).struct(); - } - - @Override - public ListWriter list() { - return getWriter(MinorType.LIST).list(); - } - - @Override - public ListWriter listView() { - return getWriter(MinorType.LISTVIEW).listView(); - } - - @Override - public MapWriter map() { - return getWriter(MinorType.LIST).map(); - } - - @Override - public MapWriter map(boolean keysSorted) { - return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted)); - } - - @Override - public StructWriter struct(String name) { - return getWriter(MinorType.STRUCT).struct(name); - } - - @Override - public ListWriter list(String name) { - return getWriter(MinorType.STRUCT).list(name); - } - - @Override - public ListWriter listView(String name) { - return getWriter(MinorType.STRUCT).listView(name); - } - - @Override - public MapWriter map(String name) { - return getWriter(MinorType.STRUCT).map(name); - } - - @Override - public MapWriter map(String name, boolean keysSorted) { - return getWriter(MinorType.STRUCT).map(name, keysSorted); - } - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - - <#if minor.typeParams?? > - @Override - public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}) { - return getWriter(MinorType.STRUCT).${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}); - } - - - @Override - public ${capName}Writer ${lowerName}(String name) { - return getWriter(MinorType.STRUCT).${lowerName}(name); - } - - @Override - public ${capName}Writer ${lowerName}() { - return getWriter(MinorType.LIST).${lowerName}(); - } - - - - public void copyReader(FieldReader reader) { - getWriter().copyReader(reader); - } - - public void copyReaderToField(String name, FieldReader reader) { - getWriter().copyReaderToField(name, reader); - } -} diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java deleted file mode 100644 index b08d4ad0afac0..0000000000000 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/types/pojo/ArrowType.java" /> -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.types.pojo; - -import com.google.flatbuffers.FlatBufferBuilder; - -import java.util.Objects; - -import org.apache.arrow.flatbuf.Type; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.*; -import org.apache.arrow.vector.FieldVector; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -/** - * Arrow types - * Source code generated using FreeMarker template ${.template_name} - **/ -@JsonTypeInfo( - use = JsonTypeInfo.Id.NAME, - include = JsonTypeInfo.As.PROPERTY, - property = "name") -@JsonSubTypes({ -<#list arrowTypes.types as type> - @JsonSubTypes.Type(value = ArrowType.${type.name?remove_ending("_")}.class, name = "${type.name?remove_ending("_")?lower_case}"), - -}) -public abstract class ArrowType { - - public static abstract class PrimitiveType extends ArrowType { - - private PrimitiveType() { - } - - @Override - public boolean isComplex() { - return false; - } - } - - public static abstract class ComplexType extends ArrowType { - - private ComplexType() { - } - - @Override - public boolean isComplex() { - return true; - } - } - - public static enum ArrowTypeID { - <#list arrowTypes.types as type> - <#assign name = type.name> - ${name?remove_ending("_")}(Type.${name}), - - NONE(Type.NONE); - - private final byte flatbufType; - - public byte getFlatbufID() { - return this.flatbufType; - } - - private ArrowTypeID(byte flatbufType) { - this.flatbufType = flatbufType; - } - } - - @JsonIgnore - public abstract ArrowTypeID getTypeID(); - @JsonIgnore - public abstract boolean isComplex(); - public abstract int getType(FlatBufferBuilder builder); - public abstract T accept(ArrowTypeVisitor visitor); - - /** - * to visit the ArrowTypes - * - * type.accept(new ArrowTypeVisitor<Type>() { - * ... - * }); - * - */ - public static interface ArrowTypeVisitor { - <#list arrowTypes.types as type> - T visit(${type.name?remove_ending("_")} type); - - default T visit(ExtensionType type) { - return type.storageType().accept(this); - } - } - - /** - * to visit the Complex ArrowTypes and bundle Primitive ones in one case - */ - public static abstract class ComplexTypeVisitor implements ArrowTypeVisitor { - - public T visit(PrimitiveType type) { - throw new UnsupportedOperationException("Unexpected Primitive type: " + type); - } - - <#list arrowTypes.types as type> - <#if !type.complex> - public final T visit(${type.name?remove_ending("_")} type) { - return visit((PrimitiveType) type); - } - - - } - - /** - * to visit the Primitive ArrowTypes and bundle Complex ones under one case - */ - public static abstract class PrimitiveTypeVisitor implements ArrowTypeVisitor { - - public T visit(ComplexType type) { - throw new UnsupportedOperationException("Unexpected Complex type: " + type); - } - - <#list arrowTypes.types as type> - <#if type.complex> - public final T visit(${type.name?remove_ending("_")} type) { - return visit((ComplexType) type); - } - - - } - - <#list arrowTypes.types as type> - <#assign name = type.name?remove_ending("_")> - <#assign fields = type.fields> - public static class ${name} extends <#if type.complex>ComplexType<#else>PrimitiveType { - public static final ArrowTypeID TYPE_TYPE = ArrowTypeID.${name}; - <#if type.fields?size == 0> - public static final ${name} INSTANCE = new ${name}(); - <#else> - - <#list fields as field> - <#assign fieldType = field.valueType!field.type> - ${fieldType} ${field.name}; - - - - <#if type.name == "Decimal"> - // Needed to support golden file integration tests. - @JsonCreator - public static Decimal createDecimal( - @JsonProperty("precision") int precision, - @JsonProperty("scale") int scale, - @JsonProperty("bitWidth") Integer bitWidth) { - - return new Decimal(precision, scale, bitWidth == null ? 128 : bitWidth); - } - - /** - * Construct Decimal with 128 bits. - * - * This is kept mainly for the sake of backward compatibility. - * Please use {@link org.apache.arrow.vector.types.pojo.ArrowType.Decimal#Decimal(int, int, int)} instead. - * - * @deprecated This API will be removed in a future release. - */ - @Deprecated - public Decimal(int precision, int scale) { - this(precision, scale, 128); - } - - <#else> - @JsonCreator - - public ${type.name}( - <#list type.fields as field> - <#assign fieldType = field.valueType!field.type> - @JsonProperty("${field.name}") ${fieldType} ${field.name}<#if field_has_next>, - - ) { - <#list type.fields as field> - this.${field.name} = ${field.name}; - - } - - <#list fields as field> - <#assign fieldType = field.valueType!field.type> - public ${fieldType} get${field.name?cap_first}() { - return ${field.name}; - } - - - - @Override - public ArrowTypeID getTypeID() { - return TYPE_TYPE; - } - - @Override - public int getType(FlatBufferBuilder builder) { - <#list type.fields as field> - <#if field.type == "String"> - int ${field.name} = this.${field.name} == null ? -1 : builder.createString(this.${field.name}); - - <#if field.type == "int[]"> - int ${field.name} = this.${field.name} == null ? -1 : org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder, this.${field.name}); - - - org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder); - <#list type.fields as field> - <#if field.type == "String" || field.type == "int[]"> - if (this.${field.name} != null) { - org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, ${field.name}); - } - <#else> - org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, this.${field.name}<#if field.valueType??>.getFlatbufID()); - - - return org.apache.arrow.flatbuf.${type.name}.end${type.name}(builder); - } - - public String toString() { - return "${name}" - <#if fields?size != 0> - + "(" - <#list fields as field> - + <#if field.type == "int[]">java.util.Arrays.toString(${field.name})<#else>${field.name}<#if field_has_next> + ", " - - + ")" - - ; - } - - @Override - public int hashCode() { - return java.util.Arrays.deepHashCode(new Object[] {<#list type.fields as field>${field.name}<#if field_has_next>, }); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof ${name})) { - return false; - } - <#if type.fields?size == 0> - return true; - <#else> - ${type.name} that = (${type.name}) obj; - return <#list type.fields as field>Objects.deepEquals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>; - - - } - - @Override - public T accept(ArrowTypeVisitor visitor) { - return visitor.visit(this); - } - } - - - /** - * A user-defined data type that wraps an underlying storage type. - */ - public abstract static class ExtensionType extends ComplexType { - /** The on-wire type for this user-defined type. */ - public abstract ArrowType storageType(); - /** The name of this user-defined type. Used to identify the type during serialization. */ - public abstract String extensionName(); - /** Check equality of this type to another user-defined type. */ - public abstract boolean extensionEquals(ExtensionType other); - /** Save any metadata for this type. */ - public abstract String serialize(); - /** Given saved metadata and the underlying storage type, construct a new instance of the user type. */ - public abstract ArrowType deserialize(ArrowType storageType, String serializedData); - /** Construct a vector for the user type. */ - public abstract FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator); - - /** The field metadata key storing the name of the extension type. */ - public static final String EXTENSION_METADATA_KEY_NAME = "ARROW:extension:name"; - /** The field metadata key storing metadata for the extension type. */ - public static final String EXTENSION_METADATA_KEY_METADATA = "ARROW:extension:metadata"; - - @Override - public ArrowTypeID getTypeID() { - return storageType().getTypeID(); - } - - @Override - public int getType(FlatBufferBuilder builder) { - return storageType().getType(builder); - } - - public String toString() { - return "ExtensionType(" + extensionName() + ", " + storageType().toString() + ")"; - } - - @Override - public int hashCode() { - return java.util.Arrays.deepHashCode(new Object[] {storageType(), extensionName()}); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof ExtensionType)) { - return false; - } - return this.extensionEquals((ExtensionType) obj); - } - - @Override - public T accept(ArrowTypeVisitor visitor) { - return visitor.visit(this); - } - } - - private static final int defaultDecimalBitWidth = 128; - - public static org.apache.arrow.vector.types.pojo.ArrowType getTypeForField(org.apache.arrow.flatbuf.Field field) { - switch(field.typeType()) { - <#list arrowTypes.types as type> - <#assign name = type.name?remove_ending("_")> - <#assign nameLower = type.name?lower_case> - <#assign fields = type.fields> - case Type.${type.name}: { - org.apache.arrow.flatbuf.${type.name} ${nameLower}Type = (org.apache.arrow.flatbuf.${type.name}) field.type(new org.apache.arrow.flatbuf.${type.name}()); - <#list type.fields as field> - <#if field.type == "int[]"> - ${field.type} ${field.name} = new int[${nameLower}Type.${field.name}Length()]; - for (int i = 0; i< ${field.name}.length; ++i) { - ${field.name}[i] = ${nameLower}Type.${field.name}(i); - } - <#else> - ${field.type} ${field.name} = ${nameLower}Type.${field.name}(); - - - <#if type.name == "Decimal"> - if (bitWidth != defaultDecimalBitWidth && bitWidth != 256) { - throw new IllegalArgumentException("Library only supports 128-bit and 256-bit decimal values"); - } - - return new ${name}(<#list type.fields as field><#if field.valueType??>${field.valueType}.fromFlatbufID(${field.name})<#else>${field.name}<#if field_has_next>, ); - } - - default: - throw new UnsupportedOperationException("Unsupported type: " + field.typeType()); - } - } - - public static Int getInt(org.apache.arrow.flatbuf.Field field) { - org.apache.arrow.flatbuf.Int intType = (org.apache.arrow.flatbuf.Int) field.type(new org.apache.arrow.flatbuf.Int()); - return new Int(intType.bitWidth(), intType.isSigned()); - } -} - - diff --git a/java/vector/src/main/codegen/templates/BaseReader.java b/java/vector/src/main/codegen/templates/BaseReader.java deleted file mode 100644 index 85d582a53bf5d..0000000000000 --- a/java/vector/src/main/codegen/templates/BaseReader.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/reader/BaseReader.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.reader; - -<#include "/@includes/vv_imports.ftl" /> - -/** - * Source code generated using FreeMarker template ${.template_name} - */ -@SuppressWarnings("unused") -public interface BaseReader extends Positionable{ - Field getField(); - MinorType getMinorType(); - void reset(); - void read(UnionHolder holder); - void read(int index, UnionHolder holder); - void copyAsValue(UnionWriter writer); - void read(DenseUnionHolder holder); - void read(int index, DenseUnionHolder holder); - void copyAsValue(DenseUnionWriter writer); - boolean isSet(); - - public interface StructReader extends BaseReader, Iterable{ - FieldReader reader(String name); - } - - public interface RepeatedStructReader extends StructReader{ - boolean next(); - int size(); - void copyAsValue(StructWriter writer); - } - - public interface ListReader extends BaseReader{ - FieldReader reader(); - } - - public interface RepeatedListReader extends ListReader{ - boolean next(); - int size(); - void copyAsValue(ListWriter writer); - } - - public interface MapReader extends BaseReader{ - FieldReader reader(); - } - - public interface RepeatedMapReader extends MapReader{ - boolean next(); - int size(); - void copyAsValue(MapWriter writer); - } - - public interface ScalarReader extends - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, - BaseReader {} - - interface ComplexReader{ - StructReader rootAsStruct(); - ListReader rootAsList(); - boolean rootIsStruct(); - boolean ok(); - } -} - diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java deleted file mode 100644 index e952d46f1f241..0000000000000 --- a/java/vector/src/main/codegen/templates/BaseWriter.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/writer/BaseWriter.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.writer; - -<#include "/@includes/vv_imports.ftl" /> - -/* - * File generated from ${.template_name} using FreeMarker. - */ -@SuppressWarnings("unused") -public interface BaseWriter extends AutoCloseable, Positionable { - int getValueCapacity(); - void writeNull(); - - public interface StructWriter extends BaseWriter { - - Field getField(); - - /** - * Whether this writer is a struct writer and is empty (has no children). - * - *

    - * Intended only for use in determining whether to add dummy vector to - * avoid empty (zero-column) schema, as in JsonReader. - *

    - * @return whether the struct is empty - */ - boolean isEmptyStruct(); - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - <#if minor.typeParams?? > - ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}); - - ${capName}Writer ${lowerName}(String name); - - - void copyReaderToField(String name, FieldReader reader); - StructWriter struct(String name); - ListWriter list(String name); - ListWriter listView(String name); - MapWriter map(String name); - MapWriter map(String name, boolean keysSorted); - void start(); - void end(); - } - - public interface ListWriter extends BaseWriter { - void startList(); - void endList(); - void startListView(); - void endListView(); - StructWriter struct(); - ListWriter list(); - ListWriter listView(); - MapWriter map(); - MapWriter map(boolean keysSorted); - void copyReader(FieldReader reader); - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - ${capName}Writer ${lowerName}(); - - } - - public interface MapWriter extends ListWriter { - void startMap(); - void endMap(); - - void startEntry(); - void endEntry(); - - MapWriter key(); - MapWriter value(); - } - - public interface ScalarWriter extends - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, BaseWriter {} - - public interface ComplexWriter { - void allocate(); - void clear(); - void copyReader(FieldReader reader); - StructWriter rootAsStruct(); - ListWriter rootAsList(); - ListWriter rootAsListView(); - MapWriter rootAsMap(boolean keysSorted); - - void setPosition(int index); - void setValueCount(int count); - void reset(); - } - - public interface StructOrListWriter { - void start(); - void end(); - StructOrListWriter struct(String name); - /** - * @deprecated use {@link #listOfStruct()} instead. - */ - @Deprecated - StructOrListWriter listoftstruct(String name); - StructOrListWriter listOfStruct(String name); - StructOrListWriter list(String name); - boolean isStructWriter(); - boolean isListWriter(); - VarCharWriter varChar(String name); - IntWriter integer(String name); - BigIntWriter bigInt(String name); - Float4Writer float4(String name); - Float8Writer float8(String name); - BitWriter bit(String name); - VarBinaryWriter binary(String name); - } -} diff --git a/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java b/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java deleted file mode 100644 index cc0dd7b335c54..0000000000000 --- a/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<#list ["Nullable", "Single"] as mode> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${mode}CaseSensitiveStructWriter.java" /> -<#assign index = "idx()"> -<#if mode == "Single"> -<#assign containerClass = "NonNullableStructVector" /> -<#else> -<#assign containerClass = "StructVector" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> -/* - * This class is generated using FreeMarker and the ${.template_name} template. - */ -@SuppressWarnings("unused") -public class ${mode}CaseSensitiveStructWriter extends ${mode}StructWriter { - public ${mode}CaseSensitiveStructWriter(${containerClass} container) { - super(container); - } - - @Override - protected String handleCase(final String input){ - return input; - } - - @Override - protected NullableStructWriterFactory getNullableStructWriterFactory() { - return NullableStructWriterFactory.getNullableCaseSensitiveStructWriterFactoryInstance(); - } - -} - diff --git a/java/vector/src/main/codegen/templates/ComplexCopier.java b/java/vector/src/main/codegen/templates/ComplexCopier.java deleted file mode 100644 index 5adad523120da..0000000000000 --- a/java/vector/src/main/codegen/templates/ComplexCopier.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.vector.complex.impl.UnionMapReader; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.types.Types; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/ComplexCopier.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ -@SuppressWarnings("unused") -public class ComplexCopier { - - /** - * Do a deep copy of the value in input into output - * @param input field to read from - * @param output field to write to - */ - public static void copy(FieldReader input, FieldWriter output) { - writeValue(input, output); - } - - private static void writeValue(FieldReader reader, FieldWriter writer) { - final MinorType mt = reader.getMinorType(); - - switch (mt) { - - case LIST: - case LISTVIEW: - case LARGELIST: - case LARGELISTVIEW: - case FIXED_SIZE_LIST: - if (reader.isSet()) { - writer.startList(); - while (reader.next()) { - FieldReader childReader = reader.reader(); - FieldWriter childWriter = getListWriterForReader(childReader, writer); - if (childReader.isSet()) { - writeValue(childReader, childWriter); - } else { - childWriter.writeNull(); - } - } - writer.endList(); - } else { - writer.writeNull(); - } - break; - case MAP: - if (reader.isSet()) { - UnionMapReader mapReader = (UnionMapReader) reader; - writer.startMap(); - while (mapReader.next()) { - FieldReader structReader = reader.reader(); - if (structReader.isSet()) { - writer.startEntry(); - writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key())); - writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value())); - writer.endEntry(); - } else { - writer.writeNull(); - } - } - writer.endMap(); - } else { - writer.writeNull(); - } - break; - case STRUCT: - if (reader.isSet()) { - writer.start(); - for(String name : reader){ - FieldReader childReader = reader.reader(name); - if (childReader.getMinorType() != Types.MinorType.NULL) { - FieldWriter childWriter = getStructWriterForReader(childReader, writer, name); - if (childReader.isSet()) { - writeValue(childReader, childWriter); - } else { - childWriter.writeNull(); - } - } - } - writer.end(); - } else { - writer.writeNull(); - } - break; - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") > - - case ${name?upper_case}: - if (reader.isSet()) { - Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder(); - reader.read(${uncappedName}Holder); - if (${uncappedName}Holder.isSet == 1) { - writer.write${name}(<#list fields as field>${uncappedName}Holder.${field.name}<#if field_has_next>, <#if minor.class?starts_with("Decimal")>, new ArrowType.Decimal(${uncappedName}Holder.precision, ${uncappedName}Holder.scale, ${name}Holder.WIDTH * 8)); - } - } else { - writer.writeNull(); - } - break; - - - - } - } - - private static FieldWriter getStructWriterForReader(FieldReader reader, StructWriter writer, String name) { - switch (reader.getMinorType()) { - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams??> - case ${name?upper_case}: - return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}(name); - - <#if minor.class?starts_with("Decimal")> - case ${name?upper_case}: - if (reader.getField().getType() instanceof ArrowType.Decimal) { - ArrowType.Decimal type = (ArrowType.Decimal) reader.getField().getType(); - return (FieldWriter) writer.${uncappedName}(name, type.getScale(), type.getPrecision()); - } else { - return (FieldWriter) writer.${uncappedName}(name); - } - - - - case STRUCT: - return (FieldWriter) writer.struct(name); - case FIXED_SIZE_LIST: - case LIST: - return (FieldWriter) writer.list(name); - case MAP: - return (FieldWriter) writer.map(name); - case LISTVIEW: - return (FieldWriter) writer.listView(name); - default: - throw new UnsupportedOperationException(reader.getMinorType().toString()); - } - } - - private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter writer) { - switch (reader.getMinorType()) { - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") > - case ${name?upper_case}: - return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}(); - - - case STRUCT: - return (FieldWriter) writer.struct(); - case FIXED_SIZE_LIST: - case LIST: - case MAP: - case NULL: - return (FieldWriter) writer.list(); - case LISTVIEW: - return (FieldWriter) writer.listView(); - default: - throw new UnsupportedOperationException(reader.getMinorType().toString()); - } - } - - private static FieldWriter getMapWriterForReader(FieldReader reader, MapWriter writer) { - switch (reader.getMinorType()) { - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") > - case ${name?upper_case}: - return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}(); - - - case STRUCT: - return (FieldWriter) writer.struct(); - case FIXED_SIZE_LIST: - case LIST: - case NULL: - return (FieldWriter) writer.list(); - case LISTVIEW: - return (FieldWriter) writer.listView(); - case MAP: - return (FieldWriter) writer.map(false); - default: - throw new UnsupportedOperationException(reader.getMinorType().toString()); - } - } -} diff --git a/java/vector/src/main/codegen/templates/ComplexReaders.java b/java/vector/src/main/codegen/templates/ComplexReaders.java deleted file mode 100644 index 48fb6603ad5e3..0000000000000 --- a/java/vector/src/main/codegen/templates/ComplexReaders.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.lang.Override; -import java.util.List; - -import org.apache.arrow.record.TransferPair; -import org.apache.arrow.vector.complex.IndexHolder; -import org.apache.arrow.vector.complex.writer.IntervalWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; - -<@pp.dropOutputFile /> -<#list vv.types as type> -<#list type.minor as minor> -<#list [""] as mode> -<#assign lowerName = minor.class?uncap_first /> -<#if lowerName == "int" ><#assign lowerName = "integer" /> -<#assign name = minor.class?cap_first /> -<#assign javaType = (minor.javaType!type.javaType) /> -<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> -<#assign safeType=friendlyType /> -<#if safeType=="byte[]"><#assign safeType="ByteArray" /> - -<#assign hasFriendly = minor.friendlyType!"no" == "no" /> - -<#list ["Nullable"] as nullMode> -<#if mode == "" > -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${name}ReaderImpl.java" /> -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -/** - * Source code generated using FreeMarker template ${.template_name} - */ -@SuppressWarnings("unused") -public class ${name}ReaderImpl extends AbstractFieldReader { - - private final ${name}Vector vector; - - public ${name}ReaderImpl(${name}Vector vector){ - super(); - this.vector = vector; - } - - public MinorType getMinorType(){ - return vector.getMinorType(); - } - - public Field getField(){ - return vector.getField(); - } - - public boolean isSet(){ - return !vector.isNull(idx()); - } - - public void copyAsValue(${minor.class?cap_first}Writer writer){ - ${minor.class?cap_first}WriterImpl impl = (${minor.class?cap_first}WriterImpl) writer; - impl.vector.copyFromSafe(idx(), impl.idx(), vector); - } - - public void copyAsField(String name, StructWriter writer){ - ${minor.class?cap_first}WriterImpl impl = (${minor.class?cap_first}WriterImpl) writer.${lowerName}(name); - impl.vector.copyFromSafe(idx(), impl.idx(), vector); - } - - <#if nullMode != "Nullable"> - public void read(${minor.class?cap_first}Holder h){ - vector.get(idx(), h); - } - - - public void read(Nullable${minor.class?cap_first}Holder h){ - vector.get(idx(), h); - } - - public ${friendlyType} read${safeType}(){ - return vector.getObject(idx()); - } - - <#if minor.class == "TimeStampSec" || - minor.class == "TimeStampMilli" || - minor.class == "TimeStampMicro" || - minor.class == "TimeStampNano"> - @Override - public ${minor.boxedType} read${minor.boxedType}(){ - return vector.get(idx()); - } - - - public void copyValue(FieldWriter w){ - - } - - public Object readObject(){ - return (Object)vector.getObject(idx()); - } -} - - -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/reader/${name}Reader.java" /> -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.reader; - -<#include "/@includes/vv_imports.ftl" /> -/** - * Source code generated using FreeMarker template ${.template_name} - */ -@SuppressWarnings("unused") -public interface ${name}Reader extends BaseReader{ - - public void read(${minor.class?cap_first}Holder h); - public void read(Nullable${minor.class?cap_first}Holder h); - public Object readObject(); - // read friendly type - public ${friendlyType} read${safeType}(); - public boolean isSet(); - public void copyAsValue(${minor.class}Writer writer); - public void copyAsField(String name, ${minor.class}Writer writer); - -} - - - - - - - - diff --git a/java/vector/src/main/codegen/templates/ComplexWriters.java b/java/vector/src/main/codegen/templates/ComplexWriters.java deleted file mode 100644 index 2e3caae1f0f22..0000000000000 --- a/java/vector/src/main/codegen/templates/ComplexWriters.java +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<#list vv.types as type> -<#list type.minor as minor> -<#list ["Nullable"] as mode> -<#assign name = minor.class?cap_first /> -<#assign eName = name /> -<#assign javaType = (minor.javaType!type.javaType) /> -<#assign fields = minor.fields!type.fields /> -<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> - -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${eName}WriterImpl.java" /> -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -<#function is_timestamp_tz type> - <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> - - -/* - * This class is generated using FreeMarker on the ${.template_name} template. - */ -@SuppressWarnings("unused") -public class ${eName}WriterImpl extends AbstractFieldWriter { - - final ${name}Vector vector; - -<#if minor.class?ends_with("VarChar")> - private final Text textBuffer = new Text(); - - -public ${eName}WriterImpl(${name}Vector vector) { - this.vector = vector; - } - - @Override - public Field getField() { - return vector.getField(); - } - - @Override - public int getValueCapacity() { - return vector.getValueCapacity(); - } - - @Override - public void allocate() { - vector.allocateNew(); - } - - @Override - public void close() { - vector.close(); - } - - @Override - public void clear() { - vector.clear(); - } - - @Override - protected int idx() { - return super.idx(); - } - - <#if mode == "Repeated"> - - public void write(${minor.class?cap_first}Holder h) { - mutator.addSafe(idx(), h); - vector.setValueCount(idx()+1); - } - - public void write(${minor.class?cap_first}Holder h) { - mutator.addSafe(idx(), h); - vector.setValueCount(idx()+1); - } - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - mutator.addSafe(idx(), <#list fields as field>${field.name}<#if field_has_next>, ); - vector.setValueCount(idx()+1); - } - - public void setPosition(int idx) { - super.setPosition(idx); - mutator.startNewValue(idx); - } - - - <#else> - - <#if !minor.class?starts_with("Decimal")> - public void write(${minor.class}Holder h) { - vector.setSafe(idx(), h); - vector.setValueCount(idx()+1); - } - - public void write(Nullable${minor.class}Holder h) { - vector.setSafe(idx(), h); - vector.setValueCount(idx()+1); - } - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - vector.setSafe(idx(), 1<#list fields as field><#if field.include!true >, ${field.name}); - vector.setValueCount(idx()+1); - } - - - <#if minor.class?ends_with("VarChar")> - @Override - public void write${minor.class}(${friendlyType} value) { - vector.setSafe(idx(), value); - vector.setValueCount(idx()+1); - } - - @Override - public void write${minor.class}(String value) { - textBuffer.set(value); - vector.setSafe(idx(), textBuffer); - vector.setValueCount(idx()+1); - } - - - <#if minor.class?starts_with("Decimal")> - - public void write(${minor.class}Holder h){ - DecimalUtility.checkPrecisionAndScale(h.precision, h.scale, vector.getPrecision(), vector.getScale()); - vector.setSafe(idx(), h); - vector.setValueCount(idx() + 1); - } - - public void write(Nullable${minor.class}Holder h){ - if (h.isSet == 1) { - DecimalUtility.checkPrecisionAndScale(h.precision, h.scale, vector.getPrecision(), vector.getScale()); - } - vector.setSafe(idx(), h); - vector.setValueCount(idx() + 1); - } - - public void write${minor.class}(long start, ArrowBuf buffer){ - vector.setSafe(idx(), 1, start, buffer); - vector.setValueCount(idx() + 1); - } - - public void write${minor.class}(long start, ArrowBuf buffer, ArrowType arrowType){ - DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(), - ((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale()); - vector.setSafe(idx(), 1, start, buffer); - vector.setValueCount(idx() + 1); - } - - public void write${minor.class}(BigDecimal value){ - // vector.setSafe already does precision and scale checking - vector.setSafe(idx(), value); - vector.setValueCount(idx() + 1); - } - - public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType){ - DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(), - ((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale()); - vector.setBigEndianSafe(idx(), value); - vector.setValueCount(idx() + 1); - } - - public void writeBigEndianBytesTo${minor.class}(byte[] value){ - vector.setBigEndianSafe(idx(), value); - vector.setValueCount(idx() + 1); - } - - - - public void writeNull() { - vector.setNull(idx()); - vector.setValueCount(idx()+1); - } - - - <#if minor.class?ends_with("VarBinary")> - public void write${minor.class}(byte[] value) { - vector.setSafe(idx(), value); - vector.setValueCount(idx() + 1); - } - - public void write${minor.class}(byte[] value, int offset, int length) { - vector.setSafe(idx(), value, offset, length); - vector.setValueCount(idx() + 1); - } - - public void write${minor.class}(ByteBuffer value) { - vector.setSafe(idx(), value, 0, value.remaining()); - vector.setValueCount(idx() + 1); - } - - public void write${minor.class}(ByteBuffer value, int offset, int length) { - vector.setSafe(idx(), value, offset, length); - vector.setValueCount(idx() + 1); - } - -} - -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/writer/${eName}Writer.java" /> -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.writer; - -<#include "/@includes/vv_imports.ftl" /> -/* - * This class is generated using FreeMarker on the ${.template_name} template. - */ -@SuppressWarnings("unused") -public interface ${eName}Writer extends BaseWriter { - public void write(${minor.class}Holder h); - -<#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - /** - * @deprecated - * The holder version should be used instead because the plain value version does not contain enough information - * to fully specify this field type. - * @see #write(${minor.class}Holder) - */ - @Deprecated - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ); -<#if minor.class?starts_with("Decimal")> - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, , ArrowType arrowType); - - public void write${minor.class}(${friendlyType} value); - - public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType); - - /** - * @deprecated - * Use either the version that additionally takes in an ArrowType or use the holder version. - * This version does not contain enough information to fully specify this field type. - * @see #writeBigEndianBytesTo${minor.class}(byte[], ArrowType) - * @see #write(${minor.class}Holder) - */ - @Deprecated - public void writeBigEndianBytesTo${minor.class}(byte[] value); - - -<#if minor.class?ends_with("VarBinary")> - public void write${minor.class}(byte[] value); - - public void write${minor.class}(byte[] value, int offset, int length); - - public void write${minor.class}(ByteBuffer value); - - public void write${minor.class}(ByteBuffer value, int offset, int length); - - -<#if minor.class?ends_with("VarChar")> - public void write${minor.class}(${friendlyType} value); - - public void write${minor.class}(String value); - -} - - - - diff --git a/java/vector/src/main/codegen/templates/DenseUnionReader.java b/java/vector/src/main/codegen/templates/DenseUnionReader.java deleted file mode 100644 index a085e03ea64e0..0000000000000 --- a/java/vector/src/main/codegen/templates/DenseUnionReader.java +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -import org.apache.arrow.vector.complex.impl.UnionListReader; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/DenseUnionReader.java" /> - - -<#include "/@includes/license.ftl" /> - - package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> -/** - * Source code generated using FreeMarker template ${.template_name} - */ -@SuppressWarnings("unused") -public class DenseUnionReader extends AbstractFieldReader { - - private BaseReader[] readers = new BaseReader[Byte.MAX_VALUE + 1]; - public DenseUnionVector data; - - public DenseUnionReader(DenseUnionVector data) { - this.data = data; - } - - public MinorType getMinorType() { - byte typeId = data.getTypeId(idx()); - return data.getVectorByType(typeId).getMinorType(); - } - - public byte getTypeId() { - return data.getTypeId(idx()); - } - - @Override - public Field getField() { - return data.getField(); - } - - public boolean isSet(){ - return !data.isNull(idx()); - } - - public void read(DenseUnionHolder holder) { - holder.reader = this; - holder.isSet = this.isSet() ? 1 : 0; - holder.typeId = getTypeId(); - } - - public void read(int index, UnionHolder holder) { - byte typeId = data.getTypeId(index); - getList(typeId).read(index, holder); - } - - private FieldReader getReaderForIndex(int index) { - byte typeId = data.getTypeId(index); - MinorType minorType = data.getVectorByType(typeId).getMinorType(); - FieldReader reader = (FieldReader) readers[typeId]; - if (reader != null) { - return reader; - } - switch (minorType) { - case NULL: - reader = NullReader.INSTANCE; - break; - case STRUCT: - reader = (FieldReader) getStruct(typeId); - break; - case LIST: - reader = (FieldReader) getList(typeId); - break; - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> - case ${name?upper_case}: - reader = (FieldReader) get${name}(typeId); - break; - - - - default: - throw new UnsupportedOperationException("Unsupported type: " + MinorType.values()[typeId]); - } - return reader; - } - - private SingleStructReaderImpl structReader; - - private StructReader getStruct(byte typeId) { - StructReader structReader = (StructReader) readers[typeId]; - if (structReader == null) { - structReader = (SingleStructReaderImpl) data.getVectorByType(typeId).getReader(); - structReader.setPosition(idx()); - readers[typeId] = structReader; - } - return structReader; - } - - private UnionListReader listReader; - - private FieldReader getList(byte typeId) { - UnionListReader listReader = (UnionListReader) readers[typeId]; - if (listReader == null) { - listReader = new UnionListReader((ListVector) data.getVectorByType(typeId)); - listReader.setPosition(idx()); - readers[typeId] = listReader; - } - return listReader; - } - - private UnionMapReader mapReader; - - private FieldReader getMap(byte typeId) { - UnionMapReader mapReader = (UnionMapReader) readers[typeId]; - if (mapReader == null) { - mapReader = new UnionMapReader((MapVector) data.getVectorByType(typeId)); - mapReader.setPosition(idx()); - readers[typeId] = mapReader; - } - return mapReader; - } - - @Override - public java.util.Iterator iterator() { - throw new UnsupportedOperationException(); - } - - @Override - public void copyAsValue(UnionWriter writer) { - writer.data.copyFrom(idx(), writer.idx(), data); - } - - <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean", - "LocalDateTime", "Duration", "Period", "Double", "Float", - "Character", "Text", "Byte", "byte[]", "PeriodDuration"] as friendlyType> - <#assign safeType=friendlyType /> - <#if safeType=="byte[]"><#assign safeType="ByteArray" /> - - @Override - public ${friendlyType} read${safeType}() { - return getReaderForIndex(idx()).read${safeType}(); - } - - - - public int size() { - return getReaderForIndex(idx()).size(); - } - - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign uncappedName = name?uncap_first/> - <#assign boxedType = (minor.boxedType!type.boxedType) /> - <#assign javaType = (minor.javaType!type.javaType) /> - <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> - <#assign safeType=friendlyType /> - <#if safeType=="byte[]"><#assign safeType="ByteArray" /> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> - - private ${name}ReaderImpl get${name}(byte typeId) { - ${name}ReaderImpl reader = (${name}ReaderImpl) readers[typeId]; - if (reader == null) { - reader = new ${name}ReaderImpl((${name}Vector) data.getVectorByType(typeId)); - reader.setPosition(idx()); - readers[typeId] = reader; - } - return reader; - } - - public void read(Nullable${name}Holder holder){ - getReaderForIndex(idx()).read(holder); - } - - public void copyAsValue(${name}Writer writer){ - getReaderForIndex(idx()).copyAsValue(writer); - } - - - - - @Override - public void copyAsValue(ListWriter writer) { - ComplexCopier.copy(this, (FieldWriter) writer); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - byte typeId = data.getTypeId(index); - if (readers[typeId] != null) { - int offset = data.getOffset(index); - readers[typeId].setPosition(offset); - } - } - - public FieldReader reader(byte typeId, String name){ - return getStruct(typeId).reader(name); - } - - public FieldReader reader(byte typeId) { - return getList(typeId).reader(); - } - - public boolean next() { - return getReaderForIndex(idx()).next(); - } -} diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java deleted file mode 100644 index e9a9a2878b88c..0000000000000 --- a/java/vector/src/main/codegen/templates/DenseUnionVector.java +++ /dev/null @@ -1,999 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.AbstractStructVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; -import org.apache.arrow.vector.util.TransferPair; - -import java.util.Arrays; -import java.util.stream.Collectors; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/DenseUnionVector.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex; - -<#include "/@includes/vv_imports.ftl" /> -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.util.Preconditions; - -import static org.apache.arrow.vector.types.UnionMode.Dense; - - - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ -@SuppressWarnings("unused") - - -/** - * A vector which can hold values of different types. It does so by using a StructVector which contains a vector for each - * primitive type that is stored. StructVector is used in order to take advantage of its serialization/deserialization methods, - * as well as the addOrGet method. - * - * For performance reasons, DenseUnionVector stores a cached reference to each subtype vector, to avoid having to do the struct lookup - * each time the vector is accessed. - * Source code generated using FreeMarker template ${.template_name} - */ -public class DenseUnionVector extends AbstractContainerVector implements FieldVector, ValueIterableVector { - int valueCount; - - NonNullableStructVector internalStruct; - private ArrowBuf typeBuffer; - private ArrowBuf offsetBuffer; - - /** - * The key is type Id, and the value is vector. - */ - private ValueVector[] childVectors = new ValueVector[Byte.MAX_VALUE + 1]; - - /** - * The index is the type id, and the value is the type field. - */ - private Field[] typeFields = new Field[Byte.MAX_VALUE + 1]; - /** - * The index is the index into the typeFields array, and the value is the logical field id. - */ - private byte[] typeMapFields = new byte[Byte.MAX_VALUE + 1]; - - /** - * The next type id to allocate. - */ - private byte nextTypeId = 0; - - private FieldReader reader; - - private long typeBufferAllocationSizeInBytes; - private long offsetBufferAllocationSizeInBytes; - - private final FieldType fieldType; - - public static final byte TYPE_WIDTH = 1; - public static final byte OFFSET_WIDTH = 4; - - private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(/*nullable*/ false, - ArrowType.Struct.INSTANCE, /*dictionary*/ null, /*metadata*/ null); - - public static DenseUnionVector empty(String name, BufferAllocator allocator) { - FieldType fieldType = FieldType.notNullable(new ArrowType.Union( - UnionMode.Dense, null)); - return new DenseUnionVector(name, allocator, fieldType, null); - } - - public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(name, allocator, callBack); - this.fieldType = fieldType; - this.internalStruct = new NonNullableStructVector( - "internal", - allocator, - INTERNAL_STRUCT_TYPE, - callBack, - AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, - false); - this.typeBuffer = allocator.getEmpty(); - this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH; - this.offsetBuffer = allocator.getEmpty(); - this.offsetBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; - } - - public BufferAllocator getAllocator() { - return allocator; - } - - @Override - public MinorType getMinorType() { - return MinorType.DENSEUNION; - } - - @Override - public void initializeChildrenFromFields(List children) { - for (Field field : children) { - byte typeId = registerNewTypeId(field); - FieldVector vector = (FieldVector) internalStruct.add(field.getName(), field.getFieldType()); - vector.initializeChildrenFromFields(field.getChildren()); - childVectors[typeId] = vector; - } - } - - @Override - public List getChildrenFromFields() { - return internalStruct.getChildrenFromFields(); - } - - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 2) { - throw new IllegalArgumentException("Illegal buffer count for dense union with type " + getField().getFieldType() + - ", expected " + 2 + ", got: " + ownBuffers.size()); - } - - ArrowBuf buffer = ownBuffers.get(0); - typeBuffer.getReferenceManager().release(); - typeBuffer = buffer.getReferenceManager().retain(buffer, allocator); - typeBufferAllocationSizeInBytes = typeBuffer.capacity(); - - buffer = ownBuffers.get(1); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = buffer.getReferenceManager().retain(buffer, allocator); - offsetBufferAllocationSizeInBytes = offsetBuffer.capacity(); - - this.valueCount = fieldNode.getLength(); - } - - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(2); - setReaderAndWriterIndex(); - result.add(typeBuffer); - result.add(offsetBuffer); - - return result; - } - - private void setReaderAndWriterIndex() { - typeBuffer.readerIndex(0); - typeBuffer.writerIndex(valueCount * TYPE_WIDTH); - - offsetBuffer.readerIndex(0); - offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH); - } - - /** - * Get the inner vectors. - * - * @deprecated This API will be removed as the current implementations no longer support inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - */ - @Override - @Deprecated - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use geFieldBuffers"); - } - - private String fieldName(byte typeId, MinorType type) { - return type.name().toLowerCase() + typeId; - } - - private FieldType fieldType(MinorType type) { - return FieldType.nullable(type.getType()); - } - - public synchronized byte registerNewTypeId(Field field) { - if (nextTypeId == typeFields.length) { - throw new IllegalStateException("Dense union vector support at most " + - typeFields.length + " relative types. Please use union of union instead"); - } - byte typeId = nextTypeId; - if (this.fieldType != null) { - int[] typeIds = ((ArrowType.Union) this.fieldType.getType()).getTypeIds(); - if (typeIds != null) { - int thisTypeId = typeIds[nextTypeId]; - if (thisTypeId > Byte.MAX_VALUE) { - throw new IllegalStateException("Dense union vector types must be bytes. " + thisTypeId + " is too large"); - } - typeId = (byte) thisTypeId; - } - } - typeFields[typeId] = field; - typeMapFields[nextTypeId] = typeId; - this.nextTypeId += 1; - return typeId; - } - - private T addOrGet(byte typeId, MinorType minorType, Class c) { - return internalStruct.addOrGet(fieldName(typeId, minorType), fieldType(minorType), c); - } - - private T addOrGet(byte typeId, MinorType minorType, ArrowType arrowType, Class c) { - return internalStruct.addOrGet(fieldName(typeId, minorType), FieldType.nullable(arrowType), c); - } - - @Override - public long getOffsetBufferAddress() { - return offsetBuffer.memoryAddress(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getValidityBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getValidityBuffer() { throw new UnsupportedOperationException(); } - - @Override - public ArrowBuf getOffsetBuffer() { return offsetBuffer; } - - public ArrowBuf getTypeBuffer() { return typeBuffer; } - - @Override - public ArrowBuf getDataBuffer() { throw new UnsupportedOperationException(); } - - public StructVector getStruct(byte typeId) { - StructVector structVector = typeId < 0 ? null : (StructVector) childVectors[typeId]; - if (structVector == null) { - int vectorCount = internalStruct.size(); - structVector = addOrGet(typeId, MinorType.STRUCT, StructVector.class); - if (internalStruct.size() > vectorCount) { - structVector.allocateNew(); - childVectors[typeId] = structVector; - if (callBack != null) { - callBack.doWork(); - } - } - } - return structVector; - } - - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#assign lowerCaseName = name?lower_case/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> - - public ${name}Vector get${name}Vector(byte typeId<#if minor.class?starts_with("Decimal")>, ArrowType arrowType) { - ValueVector vector = typeId < 0 ? null : childVectors[typeId]; - if (vector == null) { - int vectorCount = internalStruct.size(); - vector = addOrGet(typeId, MinorType.${name?upper_case}<#if minor.class?starts_with("Decimal")>, arrowType, ${name}Vector.class); - childVectors[typeId] = vector; - if (internalStruct.size() > vectorCount) { - vector.allocateNew(); - if (callBack != null) { - callBack.doWork(); - } - } - } - return (${name}Vector) vector; - } - - - - - public ListVector getList(byte typeId) { - ListVector listVector = typeId < 0 ? null : (ListVector) childVectors[typeId]; - if (listVector == null) { - int vectorCount = internalStruct.size(); - listVector = addOrGet(typeId, MinorType.LIST, ListVector.class); - if (internalStruct.size() > vectorCount) { - listVector.allocateNew(); - childVectors[typeId] = listVector; - if (callBack != null) { - callBack.doWork(); - } - } - } - return listVector; - } - - public MapVector getMap(byte typeId) { - MapVector mapVector = typeId < 0 ? null : (MapVector) childVectors[typeId]; - if (mapVector == null) { - int vectorCount = internalStruct.size(); - mapVector = addOrGet(typeId, MinorType.MAP, MapVector.class); - if (internalStruct.size() > vectorCount) { - mapVector.allocateNew(); - childVectors[typeId] = mapVector; - if (callBack != null) { - callBack.doWork(); - } - } - } - return mapVector; - } - - public byte getTypeId(int index) { - return typeBuffer.getByte(index * TYPE_WIDTH); - } - - public ValueVector getVectorByType(byte typeId) { - return typeId < 0 ? null : childVectors[typeId]; - } - - @Override - public void allocateNew() throws OutOfMemoryException { - /* new allocation -- clear the current buffers */ - clear(); - internalStruct.allocateNew(); - try { - allocateTypeBuffer(); - allocateOffsetBuffer(); - } catch (Exception e) { - clear(); - throw e; - } - } - - @Override - public boolean allocateNewSafe() { - /* new allocation -- clear the current buffers */ - clear(); - boolean safe = internalStruct.allocateNewSafe(); - if (!safe) { return false; } - try { - allocateTypeBuffer(); - allocateOffsetBuffer(); - } catch (Exception e) { - clear(); - return false; - } - - return true; - } - - private void allocateTypeBuffer() { - typeBuffer = allocator.buffer(typeBufferAllocationSizeInBytes); - typeBuffer.readerIndex(0); - setNegative(0, typeBuffer.capacity()); - } - - private void allocateOffsetBuffer() { - offsetBuffer = allocator.buffer(offsetBufferAllocationSizeInBytes); - offsetBuffer.readerIndex(0); - offsetBuffer.setZero(0, offsetBuffer.capacity()); - } - - - @Override - public void reAlloc() { - internalStruct.reAlloc(); - reallocTypeBuffer(); - reallocOffsetBuffer(); - } - - public int getOffset(int index) { - return offsetBuffer.getInt((long) index * OFFSET_WIDTH); - } - - private void reallocTypeBuffer() { - final long currentBufferCapacity = typeBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (typeBufferAllocationSizeInBytes > 0) { - newAllocationSize = typeBufferAllocationSizeInBytes; - } else { - newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH * 2; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer((int)newAllocationSize); - newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity); - typeBuffer.getReferenceManager().release(1); - typeBuffer = newBuf; - typeBufferAllocationSizeInBytes = (int)newAllocationSize; - setNegative(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - } - - private void reallocOffsetBuffer() { - final long currentBufferCapacity = offsetBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (offsetBufferAllocationSizeInBytes > 0) { - newAllocationSize = offsetBufferAllocationSizeInBytes; - } else { - newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize); - newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - offsetBuffer.getReferenceManager().release(1); - offsetBuffer = newBuf; - offsetBufferAllocationSizeInBytes = (int) newAllocationSize; - } - - @Override - public void setInitialCapacity(int numRecords) { } - - @Override - public int getValueCapacity() { - long capacity = getTypeBufferValueCapacity(); - long offsetCapacity = getOffsetBufferValueCapacity(); - if (offsetCapacity < capacity) { - capacity = offsetCapacity; - } - long structCapacity = internalStruct.getValueCapacity(); - if (structCapacity < capacity) { - structCapacity = capacity; - } - return (int) capacity; - } - - @Override - public void close() { - clear(); - } - - @Override - public void clear() { - valueCount = 0; - typeBuffer.getReferenceManager().release(); - typeBuffer = allocator.getEmpty(); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = allocator.getEmpty(); - internalStruct.clear(); - } - - @Override - public void reset() { - valueCount = 0; - setNegative(0, typeBuffer.capacity()); - offsetBuffer.setZero(0, offsetBuffer.capacity()); - internalStruct.reset(); - } - - @Override - public Field getField() { - int childCount = (int) Arrays.stream(typeFields).filter(field -> field != null).count(); - List childFields = new ArrayList<>(childCount); - int[] typeIds = new int[childCount]; - for (int i = 0; i < typeFields.length; i++) { - if (typeFields[i] != null) { - int curIdx = childFields.size(); - typeIds[curIdx] = i; - childFields.add(typeFields[i]); - } - } - - FieldType fieldType; - if (this.fieldType == null) { - fieldType = FieldType.nullable(new ArrowType.Union(Dense, typeIds)); - } else { - final UnionMode mode = UnionMode.Dense; - fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds), - this.fieldType.getDictionary(), this.fieldType.getMetadata()); - } - - return new Field(name, fieldType, childFields); - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(name, allocator); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new org.apache.arrow.vector.complex.DenseUnionVector.TransferImpl(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return getTransferPair(field, allocator, null); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new org.apache.arrow.vector.complex.DenseUnionVector.TransferImpl(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((DenseUnionVector) target); - } - - @Override - public void copyFrom(int inIndex, int outIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - DenseUnionVector fromCast = (DenseUnionVector) from; - int inOffset = fromCast.offsetBuffer.getInt((long) inIndex * OFFSET_WIDTH); - fromCast.getReader().setPosition(inOffset); - int outOffset = offsetBuffer.getInt((long) outIndex * OFFSET_WIDTH); - getWriter().setPosition(outOffset); - ComplexCopier.copy(fromCast.reader, writer); - } - - @Override - public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from); - } - - public FieldVector addVector(byte typeId, FieldVector v) { - final String name = v.getName().isEmpty() ? fieldName(typeId, v.getMinorType()) : v.getName(); - Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name)); - final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass()); - v.makeTransferPair(newVector).transfer(); - internalStruct.putChild(name, newVector); - childVectors[typeId] = newVector; - if (callBack != null) { - callBack.doWork(); - } - return newVector; - } - - private class TransferImpl implements TransferPair { - private final TransferPair[] internalTransferPairs = new TransferPair[nextTypeId]; - private final DenseUnionVector to; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - to = new DenseUnionVector(name, allocator, null, callBack); - internalStruct.makeTransferPair(to.internalStruct); - createTransferPairs(); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - to = new DenseUnionVector(field.getName(), allocator, null, callBack); - internalStruct.makeTransferPair(to.internalStruct); - createTransferPairs(); - } - - public TransferImpl(DenseUnionVector to) { - this.to = to; - internalStruct.makeTransferPair(to.internalStruct); - createTransferPairs(); - } - - private void createTransferPairs() { - for (int i = 0; i < nextTypeId; i++) { - ValueVector srcVec = internalStruct.getVectorById(i); - ValueVector dstVec = to.internalStruct.getVectorById(i); - to.typeFields[i] = typeFields[i]; - to.typeMapFields[i] = typeMapFields[i]; - to.childVectors[i] = dstVec; - internalTransferPairs[i] = srcVec.makeTransferPair(dstVec); - } - } - - @Override - public void transfer() { - to.clear(); - - ReferenceManager refManager = typeBuffer.getReferenceManager(); - to.typeBuffer = refManager.transferOwnership(typeBuffer, to.allocator).getTransferredBuffer(); - - refManager = offsetBuffer.getReferenceManager(); - to.offsetBuffer = refManager.transferOwnership(offsetBuffer, to.allocator).getTransferredBuffer(); - - for (int i = 0; i < nextTypeId; i++) { - if (internalTransferPairs[i] != null) { - internalTransferPairs[i].transfer(); - to.childVectors[i] = internalTransferPairs[i].getTo(); - } - } - to.valueCount = valueCount; - clear(); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - to.clear(); - - // transfer type buffer - int startPoint = startIndex * TYPE_WIDTH; - int sliceLength = length * TYPE_WIDTH; - ArrowBuf slicedBuffer = typeBuffer.slice(startPoint, sliceLength); - ReferenceManager refManager = slicedBuffer.getReferenceManager(); - to.typeBuffer = refManager.transferOwnership(slicedBuffer, to.allocator).getTransferredBuffer(); - - // transfer offset buffer - while (to.offsetBuffer.capacity() < (long) length * OFFSET_WIDTH) { - to.reallocOffsetBuffer(); - } - - int [] typeCounts = new int[nextTypeId]; - int [] typeStarts = new int[nextTypeId]; - for (int i = 0; i < typeCounts.length; i++) { - typeCounts[i] = 0; - typeStarts[i] = -1; - } - - for (int i = startIndex; i < startIndex + length; i++) { - byte typeId = typeBuffer.getByte(i); - if (typeId >= 0) { - to.offsetBuffer.setInt((long) (i - startIndex) * OFFSET_WIDTH, typeCounts[typeId]); - typeCounts[typeId] += 1; - if (typeStarts[typeId] == -1) { - typeStarts[typeId] = offsetBuffer.getInt((long) i * OFFSET_WIDTH); - } - } - } - - // transfer vector values - for (int i = 0; i < nextTypeId; i++) { - if (typeCounts[i] > 0 && typeStarts[i] != -1) { - internalTransferPairs[i].splitAndTransfer(typeStarts[i], typeCounts[i]); - to.childVectors[i] = internalTransferPairs[i].getTo(); - } - } - - to.setValueCount(length); - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - this.to.copyFrom(from, to, DenseUnionVector.this); - } - } - - @Override - public FieldReader getReader() { - if (reader == null) { - reader = new DenseUnionReader(this); - } - return reader; - } - - public FieldWriter getWriter() { - if (writer == null) { - writer = new DenseUnionWriter(this); - } - return writer; - } - - @Override - public int getBufferSize() { - return this.getBufferSizeFor(this.valueCount); - } - - @Override - public int getBufferSizeFor(final int count) { - if (count == 0) { - return 0; - } - - int[] counts = new int[Byte.MAX_VALUE + 1]; - for (int i = 0; i < count; i++) { - byte typeId = getTypeId(i); - if (typeId != -1) { - counts[typeId] += 1; - } - } - - long childBytes = 0; - for (int typeId = 0; typeId < childVectors.length; typeId++) { - ValueVector childVector = childVectors[typeId]; - if (childVector != null) { - childBytes += childVector.getBufferSizeFor(counts[typeId]); - } - } - - return (int) (count * TYPE_WIDTH + (long) count * OFFSET_WIDTH + childBytes); - } - - @Override - public ArrowBuf[] getBuffers(boolean clear) { - List list = new java.util.ArrayList<>(); - setReaderAndWriterIndex(); - if (getBufferSize() != 0) { - list.add(typeBuffer); - list.add(offsetBuffer); - list.addAll(java.util.Arrays.asList(internalStruct.getBuffers(clear))); - } - if (clear) { - valueCount = 0; - typeBuffer.getReferenceManager().retain(); - typeBuffer.close(); - typeBuffer = allocator.getEmpty(); - offsetBuffer.getReferenceManager().retain(); - offsetBuffer.close(); - offsetBuffer = allocator.getEmpty(); - } - return list.toArray(new ArrowBuf[list.size()]); - } - - @Override - public Iterator iterator() { - return internalStruct.iterator(); - } - - private ValueVector getVector(int index) { - byte typeId = typeBuffer.getByte(index * TYPE_WIDTH); - return getVectorByType(typeId); - } - - public Object getObject(int index) { - ValueVector vector = getVector(index); - if (vector != null) { - int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); - return vector.isNull(offset) ? null : vector.getObject(offset); - } - return null; - } - - public void get(int index, DenseUnionHolder holder) { - FieldReader reader = new DenseUnionReader(DenseUnionVector.this); - reader.setPosition(index); - holder.reader = reader; - } - - public int getValueCount() { - return valueCount; - } - - /** - * IMPORTANT: Union types always return non null as there is no validity buffer. - * - * To check validity correctly you must check the underlying vector. - */ - public boolean isNull(int index) { - return false; - } - - @Override - public int getNullCount() { - return 0; - } - - public int isSet(int index) { - return isNull(index) ? 0 : 1; - } - - DenseUnionWriter writer; - - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - while (valueCount > getTypeBufferValueCapacity()) { - reallocTypeBuffer(); - reallocOffsetBuffer(); - } - setChildVectorValueCounts(); - } - - private void setChildVectorValueCounts() { - int [] counts = new int[Byte.MAX_VALUE + 1]; - for (int i = 0; i < this.valueCount; i++) { - byte typeId = getTypeId(i); - if (typeId != -1) { - counts[typeId] += 1; - } - } - for (int i = 0; i < nextTypeId; i++) { - childVectors[typeMapFields[i]].setValueCount(counts[typeMapFields[i]]); - } - } - - public void setSafe(int index, DenseUnionHolder holder) { - FieldReader reader = holder.reader; - if (writer == null) { - writer = new DenseUnionWriter(DenseUnionVector.this); - } - int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); - MinorType type = reader.getMinorType(); - writer.setPosition(offset); - byte typeId = holder.typeId; - switch (type) { - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> - case ${name?upper_case}: - Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder(); - reader.read(${uncappedName}Holder); - setSafe(index, ${uncappedName}Holder); - break; - - - - case STRUCT: - case LIST: { - setTypeId(index, typeId); - ComplexCopier.copy(reader, writer); - break; - } - default: - throw new UnsupportedOperationException(); - } - } - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> - public void setSafe(int index, Nullable${name}Holder holder) { - while (index >= getOffsetBufferValueCapacity()) { - reallocOffsetBuffer(); - } - byte typeId = getTypeId(index); - ${name}Vector vector = get${name}Vector(typeId<#if minor.class?starts_with("Decimal")>, new ArrowType.Decimal(holder.precision, holder.scale, holder.WIDTH * 8)); - int offset = vector.getValueCount(); - vector.setValueCount(offset + 1); - vector.setSafe(offset, holder); - offsetBuffer.setInt((long) index * OFFSET_WIDTH, offset); - } - - - - - public void setTypeId(int index, byte typeId) { - while (index >= getTypeBufferValueCapacity()) { - reallocTypeBuffer(); - } - typeBuffer.setByte(index * TYPE_WIDTH , typeId); - } - - private int getTypeBufferValueCapacity() { - return (int) typeBuffer.capacity() / TYPE_WIDTH; - } - - public void setOffset(int index, int offset) { - while (index >= getOffsetBufferValueCapacity()) { - reallocOffsetBuffer(); - } - - offsetBuffer.setInt((long) index * OFFSET_WIDTH, offset); - } - - private long getOffsetBufferValueCapacity() { - return offsetBuffer.capacity() / OFFSET_WIDTH; - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isNull(index)) { - return 0; - } - int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); - return getVector(index).hashCode(offset, hasher); - } - - @Override - public int hashCode(int index) { - return hashCode(index, SimpleHasher.INSTANCE); - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - @Override - public String getName() { - return name; - } - - private void setNegative(long start, long end) { - for (long i = start;i < end; i++) { - typeBuffer.setByte(i, -1); - } - } - - @Override - public T addOrGet(String name, FieldType fieldType, Class clazz) { - return internalStruct.addOrGet(name, fieldType, clazz); - } - - @Override - public T getChild(String name, Class clazz) { - return internalStruct.getChild(name, clazz); - } - - @Override - public VectorWithOrdinal getChildVectorWithOrdinal(String name) { - return internalStruct.getChildVectorWithOrdinal(name); - } - - @Override - public int size() { - return internalStruct.size(); - } - - @Override - public void setInitialCapacity(int valueCount, double density) { - for (final ValueVector vector : internalStruct) { - if (vector instanceof DensityAwareVector) { - ((DensityAwareVector) vector).setInitialCapacity(valueCount, density); - } else { - vector.setInitialCapacity(valueCount); - } - } - } - - /** - * Set the element at the given index to null. For DenseUnionVector, it throws an UnsupportedOperationException - * as nulls are not supported at the top level and isNull() always returns false. - * - * @param index position of element - * @throws UnsupportedOperationException whenever invoked - */ - @Override - public void setNull(int index) { - throw new UnsupportedOperationException("The method setNull() is not supported on DenseUnionVector."); - } -} diff --git a/java/vector/src/main/codegen/templates/DenseUnionWriter.java b/java/vector/src/main/codegen/templates/DenseUnionWriter.java deleted file mode 100644 index 8515b759e669e..0000000000000 --- a/java/vector/src/main/codegen/templates/DenseUnionWriter.java +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory; -import org.apache.arrow.vector.types.Types; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/DenseUnionWriter.java" /> - - -<#include "/@includes/license.ftl" /> - - package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - import org.apache.arrow.vector.complex.writer.BaseWriter; - import org.apache.arrow.vector.types.Types.MinorType; - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ -@SuppressWarnings("unused") -public class DenseUnionWriter extends AbstractFieldWriter implements FieldWriter { - - DenseUnionVector data; - - private BaseWriter[] writers = new BaseWriter[Byte.MAX_VALUE + 1]; - private final NullableStructWriterFactory nullableStructWriterFactory; - - public DenseUnionWriter(DenseUnionVector vector) { - this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - public DenseUnionWriter(DenseUnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) { - data = vector; - this.nullableStructWriterFactory = nullableStructWriterFactory; - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - for (BaseWriter writer : writers) { - writer.setPosition(index); - } - } - - @Override - public void start() { - byte typeId = data.getTypeId(idx()); - getStructWriter((byte) idx()).start(); - } - - @Override - public void end() { - byte typeId = data.getTypeId(idx()); - getStructWriter(typeId).end(); - } - - @Override - public void startList() { - byte typeId = data.getTypeId(idx()); - getListWriter(typeId).startList(); - } - - @Override - public void endList() { - byte typeId = data.getTypeId(idx()); - getListWriter(typeId).endList(); - } - - @Override - public void startListView() { - byte typeId = data.getTypeId(idx()); - getListViewWriter(typeId).startList(); - } - - @Override - public void endListView() { - byte typeId = data.getTypeId(idx()); - getListViewWriter(typeId).endList(); - } - - private StructWriter getStructWriter(byte typeId) { - StructWriter structWriter = (StructWriter) writers[typeId]; - if (structWriter == null) { - structWriter = nullableStructWriterFactory.build((StructVector) data.getVectorByType(typeId)); - writers[typeId] = structWriter; - } - return structWriter; - } - - public StructWriter asStruct(byte typeId) { - data.setTypeId(idx(), typeId); - return getStructWriter(typeId); - } - - private ListWriter getListWriter(byte typeId) { - ListWriter listWriter = (ListWriter) writers[typeId]; - if (listWriter == null) { - listWriter = new UnionListWriter((ListVector) data.getVectorByType(typeId), nullableStructWriterFactory); - writers[typeId] = listWriter; - } - return listWriter; - } - - private ListWriter getListViewWriter(byte typeId) { - ListWriter listWriter = (ListWriter) writers[typeId]; - if (listWriter == null) { - listWriter = new UnionListViewWriter((ListViewVector) data.getVectorByType(typeId), nullableStructWriterFactory); - writers[typeId] = listWriter; - } - return listWriter; - } - - public ListWriter asList(byte typeId) { - data.setTypeId(idx(), typeId); - return getListWriter(typeId); - } - - private MapWriter getMapWriter(byte typeId) { - MapWriter mapWriter = (MapWriter) writers[typeId]; - if (mapWriter == null) { - mapWriter = new UnionMapWriter((MapVector) data.getVectorByType(typeId)); - writers[typeId] = mapWriter; - } - return mapWriter; - } - - public MapWriter asMap(byte typeId) { - data.setTypeId(idx(), typeId); - return getMapWriter(typeId); - } - - BaseWriter getWriter(byte typeId) { - MinorType minorType = data.getVectorByType(typeId).getMinorType(); - switch (minorType) { - case STRUCT: - return getStructWriter(typeId); - case LIST: - return getListWriter(typeId); - case MAP: - return getMapWriter(typeId); - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> - case ${name?upper_case}: - return get${name}Writer(typeId); - - - - default: - throw new UnsupportedOperationException("Unknown type: " + minorType); - } - } - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal")> - - private ${name}Writer get${name}Writer(byte typeId) { - ${name}Writer writer = (${name}Writer) writers[typeId]; - if (writer == null) { - writer = new ${name}WriterImpl((${name}Vector) data.getVectorByType(typeId)); - writers[typeId] = writer; - } - return writer; - } - - public ${name}Writer as${name}(byte typeId) { - data.setTypeId(idx(), typeId); - return get${name}Writer(typeId); - } - - @Override - public void write(${name}Holder holder) { - throw new UnsupportedOperationException(); - } - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, , byte typeId<#if minor.class?starts_with("Decimal")>, ArrowType arrowType) { - data.setTypeId(idx(), typeId); - get${name}Writer(typeId).setPosition(data.getOffset(idx())); - get${name}Writer(typeId).write${name}(<#list fields as field>${field.name}<#if field_has_next>, <#if minor.class?starts_with("Decimal")>, arrowType); - } - - - - - public void writeNull() { - } - - @Override - public StructWriter struct() { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getListWriter(typeId).setPosition(data.getOffset(idx())); - return getListWriter(typeId).struct(); - } - - @Override - public ListWriter list() { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getListWriter(typeId).setPosition(data.getOffset(idx())); - return getListWriter(typeId).list(); - } - - @Override - public ListWriter list(String name) { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getStructWriter(typeId).setPosition(data.getOffset(idx())); - return getStructWriter(typeId).list(name); - } - - @Override - public MapWriter map() { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getListWriter(typeId).setPosition(data.getOffset(idx())); - return getMapWriter(typeId).map(); - } - - @Override - public MapWriter map(String name) { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getStructWriter(typeId).setPosition(data.getOffset(idx())); - return getStructWriter(typeId).map(name); - } - - @Override - public MapWriter map(String name, boolean keysSorted) { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getStructWriter(typeId).setPosition(data.getOffset(idx())); - return getStructWriter(typeId).map(name, keysSorted); - } - - @Override - public StructWriter struct(String name) { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getStructWriter(typeId).setPosition(data.getOffset(idx())); - return getStructWriter(typeId).struct(name); - } - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") > - @Override - public ${capName}Writer ${lowerName}(String name) { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getStructWriter(typeId).setPosition(data.getOffset(idx())); - return getStructWriter(typeId).${lowerName}(name); - } - - @Override - public ${capName}Writer ${lowerName}() { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getListWriter(typeId).setPosition(data.getOffset(idx())); - return getListWriter(typeId).${lowerName}(); - } - - <#if minor.class?starts_with("Decimal")> - public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}) { - byte typeId = data.getTypeId(idx()); - data.setTypeId(idx(), typeId); - getStructWriter(typeId).setPosition(data.getOffset(idx())); - return getStructWriter(typeId).${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}); - } - - - - @Override - public void allocate() { - data.allocateNew(); - } - - @Override - public void clear() { - data.clear(); - } - - @Override - public void close() throws Exception { - data.close(); - } - - @Override - public Field getField() { - return data.getField(); - } - - @Override - public int getValueCapacity() { - return data.getValueCapacity(); - } -} diff --git a/java/vector/src/main/codegen/templates/HolderReaderImpl.java b/java/vector/src/main/codegen/templates/HolderReaderImpl.java deleted file mode 100644 index 1151ea5d39dda..0000000000000 --- a/java/vector/src/main/codegen/templates/HolderReaderImpl.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<#list vv.types as type> -<#list type.minor as minor> -<#list ["", "Nullable"] as holderMode> -<#assign nullMode = holderMode /> - -<#assign lowerName = minor.class?uncap_first /> -<#if lowerName == "int" ><#assign lowerName = "integer" /> -<#assign name = minor.class?cap_first /> -<#assign javaType = (minor.javaType!type.javaType) /> -<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> -<#assign safeType=friendlyType /> -<#if safeType=="byte[]"><#assign safeType="ByteArray" /> -<#assign fields = (minor.fields!type.fields) + minor.typeParams![]/> - -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${holderMode}${name}HolderReaderImpl.java" /> -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -// Source code generated using FreeMarker template ${.template_name} - -@SuppressWarnings("unused") -public class ${holderMode}${name}HolderReaderImpl extends AbstractFieldReader { - - private ${nullMode}${name}Holder holder; - public ${holderMode}${name}HolderReaderImpl(${holderMode}${name}Holder holder) { - this.holder = holder; - } - - @Override - public int size() { - throw new UnsupportedOperationException("You can't call size on a Holder value reader."); - } - - @Override - public boolean next() { - throw new UnsupportedOperationException("You can't call next on a single value reader."); - - } - - @Override - public void setPosition(int index) { - throw new UnsupportedOperationException("You can't call next on a single value reader."); - } - - @Override - public MinorType getMinorType() { - return MinorType.${name?upper_case}; - } - - @Override - public boolean isSet() { - <#if holderMode == "Nullable"> - return this.holder.isSet == 1; - <#else> - return true; - - } - - @Override - public void read(${name}Holder h) { - <#list fields as field> - h.${field.name} = holder.${field.name}; - - } - - @Override - public void read(Nullable${name}Holder h) { - <#list fields as field> - h.${field.name} = holder.${field.name}; - - h.isSet = isSet() ? 1 : 0; - } - - // read friendly type - @Override - public ${friendlyType} read${safeType}() { - <#if nullMode == "Nullable"> - if (!isSet()) { - return null; - } - - - <#if type.major == "VarLen"> - <#if type.width == 4> - int length = holder.end - holder.start; - <#elseif type.width == 8> - int length = (int) (holder.end - holder.start); - - byte[] value = new byte [length]; - holder.buffer.getBytes(holder.start, value, 0, length); - <#if minor.class == "VarBinary" || minor.class == "LargeVarBinary" || minor.class == "ViewVarBinary"> - return value; - <#elseif minor.class == "VarChar" || minor.class == "LargeVarChar" || minor.class == "ViewVarChar"> - Text text = new Text(); - text.set(value); - return text; - - <#elseif minor.class == "IntervalDay"> - return Duration.ofDays(holder.days).plusMillis(holder.milliseconds); - <#elseif minor.class == "IntervalYear"> - return Period.ofMonths(holder.value); - <#elseif minor.class == "IntervalMonthDayNano"> - return new PeriodDuration(Period.ofMonths(holder.months).plusDays(holder.days), - Duration.ofNanos(holder.nanoseconds)); - <#elseif minor.class == "Duration"> - return DurationVector.toDuration(holder.value, holder.unit); - <#elseif minor.class == "Bit" > - return new Boolean(holder.value != 0); - <#elseif minor.class == "Decimal"> - byte[] bytes = new byte[${type.width}]; - holder.buffer.getBytes(holder.start, bytes, 0, ${type.width}); - ${friendlyType} value = new BigDecimal(new BigInteger(bytes), holder.scale); - return value; - <#elseif minor.class == "Decimal256"> - byte[] bytes = new byte[${type.width}]; - holder.buffer.getBytes(holder.start, bytes, 0, ${type.width}); - ${friendlyType} value = new BigDecimal(new BigInteger(bytes), holder.scale); - return value; - <#elseif minor.class == "FixedSizeBinary"> - byte[] value = new byte [holder.byteWidth]; - holder.buffer.getBytes(0, value, 0, holder.byteWidth); - return value; - <#elseif minor.class == "TimeStampSec"> - final long millis = java.util.concurrent.TimeUnit.SECONDS.toMillis(holder.value); - return DateUtility.getLocalDateTimeFromEpochMilli(millis); - <#elseif minor.class == "TimeStampMilli" || minor.class == "DateMilli" || minor.class == "TimeMilli"> - return DateUtility.getLocalDateTimeFromEpochMilli(holder.value); - <#elseif minor.class == "TimeStampMicro"> - return DateUtility.getLocalDateTimeFromEpochMicro(holder.value); - <#elseif minor.class == "TimeStampNano"> - return DateUtility.getLocalDateTimeFromEpochNano(holder.value); - <#else> - ${friendlyType} value = new ${friendlyType}(this.holder.value); - return value; - - } - - @Override - public Object readObject() { - return read${safeType}(); - } - - <#if nullMode != "Nullable"> - public void copyAsValue(${minor.class?cap_first}Writer writer){ - writer.write(holder); - } - -} - - - - diff --git a/java/vector/src/main/codegen/templates/NullReader.java b/java/vector/src/main/codegen/templates/NullReader.java deleted file mode 100644 index 0c65f9a56bfaa..0000000000000 --- a/java/vector/src/main/codegen/templates/NullReader.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.vector.types.pojo.ArrowType.Null; -import org.apache.arrow.vector.types.pojo.Field; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/NullReader.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -/** - * Source code generated using FreeMarker template ${.template_name} - */ -@SuppressWarnings("unused") -public class NullReader extends AbstractBaseReader implements FieldReader{ - - public static final NullReader INSTANCE = new NullReader(); - public static final NullReader EMPTY_LIST_INSTANCE = new NullReader(MinorType.NULL); - public static final NullReader EMPTY_STRUCT_INSTANCE = new NullReader(MinorType.STRUCT); - private MinorType type; - - private NullReader(){ - super(); - type = MinorType.NULL; - } - - private NullReader(MinorType type){ - super(); - this.type = type; - } - - @Override - public MinorType getMinorType() { - return type; - } - - @Override - public Field getField() { - return new Field("", FieldType.nullable(new Null()), null); - } - - public void copyAsValue(StructWriter writer) {} - - public void copyAsValue(ListWriter writer) {} - - public void copyAsValue(UnionWriter writer) {} - - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - public void read(${name}Holder holder){ - throw new UnsupportedOperationException("NullReader cannot write into non-nullable holder"); - } - - public void read(Nullable${name}Holder holder){ - holder.isSet = 0; - } - - public void read(int arrayIndex, ${name}Holder holder){ - throw new ArrayIndexOutOfBoundsException(); - } - - public void copyAsValue(${minor.class}Writer writer){} - public void copyAsField(String name, ${minor.class}Writer writer){} - - public void read(int arrayIndex, Nullable${name}Holder holder){ - throw new ArrayIndexOutOfBoundsException(); - } - - - public int size(){ - return 0; - } - - public boolean isSet(){ - return false; - } - - public boolean next(){ - return false; - } - - public RepeatedStructReader struct(){ - return this; - } - - public RepeatedListReader list(){ - return this; - } - - public StructReader struct(String name){ - return this; - } - - public ListReader list(String name){ - return this; - } - - public FieldReader reader(String name){ - return this; - } - - public FieldReader reader(){ - return this; - } - - private void fail(String name){ - throw new IllegalArgumentException(String.format("You tried to read a %s type when you are using a ValueReader of type %s.", name, this.getClass().getSimpleName())); - } - - <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean", - "LocalDateTime", "Duration", "Period", "Double", "Float", - "Character", "Text", "String", "Byte", "byte[]", "PeriodDuration"] as friendlyType> - <#assign safeType=friendlyType /> - <#if safeType=="byte[]"><#assign safeType="ByteArray" /> - - public ${friendlyType} read${safeType}(int arrayIndex){ - return null; - } - - public ${friendlyType} read${safeType}(){ - return null; - } - - -} - - - diff --git a/java/vector/src/main/codegen/templates/PromotableViewWriter.java b/java/vector/src/main/codegen/templates/PromotableViewWriter.java deleted file mode 100644 index a40901e295557..0000000000000 --- a/java/vector/src/main/codegen/templates/PromotableViewWriter.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/PromotableViewWriter.java" /> - -<#include "/@includes/license.ftl" /> - - package org.apache.arrow.vector.complex.impl; - -import java.util.Locale; -<#include "/@includes/vv_imports.ftl" /> - -/** - * This FieldWriter implementation delegates all FieldWriter API calls to an inner FieldWriter. This - * inner field writer can start as a specific type, and this class will promote the writer to a - * UnionWriter if a call is made that the specifically typed writer cannot handle. A new UnionVector - * is created, wrapping the original vector, and replaces the original vector in the parent vector, - * which can be either an AbstractStructVector or a ListViewVector. - * - *

    The writer used can either be for single elements (struct) or lists. - */ -public class PromotableViewWriter extends PromotableWriter { - - public PromotableViewWriter(ValueVector v, FixedSizeListVector fixedListVector) { - super(v, fixedListVector); - } - - public PromotableViewWriter(ValueVector v, FixedSizeListVector fixedListVector, - NullableStructWriterFactory nullableStructWriterFactory) { - super(v, fixedListVector, nullableStructWriterFactory); - } - - public PromotableViewWriter(ValueVector v, LargeListVector largeListVector) { - super(v, largeListVector); - } - - public PromotableViewWriter(ValueVector v, LargeListVector largeListVector, - NullableStructWriterFactory nullableStructWriterFactory) { - super(v, largeListVector, nullableStructWriterFactory); - } - - public PromotableViewWriter(ValueVector v, ListVector listVector) { - super(v, listVector); - } - - public PromotableViewWriter(ValueVector v, ListVector listVector, - NullableStructWriterFactory nullableStructWriterFactory) { - super(v, listVector, nullableStructWriterFactory); - } - - public PromotableViewWriter(ValueVector v, ListViewVector listViewVector, - NullableStructWriterFactory nullableStructWriterFactory) { - super(v, listViewVector, nullableStructWriterFactory); - } - - public PromotableViewWriter(ValueVector v, LargeListViewVector largeListViewVector) { - super(v, largeListViewVector); - } - - public PromotableViewWriter(ValueVector v, LargeListViewVector largeListViewVector, - NullableStructWriterFactory nullableStructWriterFactory) { - super(v, largeListViewVector, nullableStructWriterFactory); - } - - public PromotableViewWriter(ValueVector v, AbstractStructVector parentContainer) { - super(v, parentContainer); - } - - public PromotableViewWriter(ValueVector v, AbstractStructVector parentContainer, - NullableStructWriterFactory nullableStructWriterFactory) { - super(v, parentContainer, nullableStructWriterFactory); - } - - @Override - protected FieldWriter getWriter(MinorType type, ArrowType arrowType) { - if (state == State.UNION) { - if (requiresArrowType(type)) { - writer = ((UnionWriter) writer).toViewWriter(); - ((UnionViewWriter) writer).getWriter(type, arrowType); - } else { - writer = ((UnionWriter) writer).toViewWriter(); - ((UnionViewWriter) writer).getWriter(type); - } - } else if (state == State.UNTYPED) { - if (type == null) { - // ??? - return null; - } - if (arrowType == null) { - arrowType = type.getType(); - } - FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null); - ValueVector v; - if (listVector != null) { - v = listVector.addOrGetVector(fieldType).getVector(); - } else if (fixedListVector != null) { - v = fixedListVector.addOrGetVector(fieldType).getVector(); - } else if (listViewVector != null) { - v = listViewVector.addOrGetVector(fieldType).getVector(); - } else if (largeListVector != null) { - v = largeListVector.addOrGetVector(fieldType).getVector(); - } else { - v = largeListViewVector.addOrGetVector(fieldType).getVector(); - } - v.allocateNew(); - setWriter(v); - writer.setPosition(position); - } else if (type != this.type) { - promoteToUnion(); - if (requiresArrowType(type)) { - writer = ((UnionWriter) writer).toViewWriter(); - ((UnionViewWriter) writer).getWriter(type, arrowType); - } else { - writer = ((UnionWriter) writer).toViewWriter(); - ((UnionViewWriter) writer).getWriter(type); - } - } - return writer; - } - - @Override - public StructWriter struct() { - return getWriter(MinorType.LISTVIEW).struct(); - } - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - - @Override - public ${capName}Writer ${lowerName}() { - return getWriter(MinorType.LISTVIEW).${lowerName}(); - } - - - - @Override - public void allocate() { - getWriter().allocate(); - } - - @Override - public void clear() { - getWriter().clear(); - } - - @Override - public Field getField() { - return getWriter().getField(); - } - - @Override - public int getValueCapacity() { - return getWriter().getValueCapacity(); - } - - @Override - public void close() throws Exception { - getWriter().close(); - } -} diff --git a/java/vector/src/main/codegen/templates/PromotableWriter.java b/java/vector/src/main/codegen/templates/PromotableWriter.java deleted file mode 100644 index c0e686f3178a4..0000000000000 --- a/java/vector/src/main/codegen/templates/PromotableWriter.java +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/PromotableWriter.java" /> - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -import java.util.Locale; -<#include "/@includes/vv_imports.ftl" /> - -/** - * This FieldWriter implementation delegates all FieldWriter API calls to an inner FieldWriter. This - * inner field writer can start as a specific type, and this class will promote the writer to a - * UnionWriter if a call is made that the specifically typed writer cannot handle. A new UnionVector - * is created, wrapping the original vector, and replaces the original vector in the parent vector, - * which can be either an AbstractStructVector or a ListVector. - * - *

    The writer used can either be for single elements (struct) or lists. - */ -public class PromotableWriter extends AbstractPromotableFieldWriter { - - protected final AbstractStructVector parentContainer; - protected final ListVector listVector; - protected final ListViewVector listViewVector; - protected final FixedSizeListVector fixedListVector; - protected final LargeListVector largeListVector; - protected final LargeListViewVector largeListViewVector; - protected final NullableStructWriterFactory nullableStructWriterFactory; - protected int position; - protected static final int MAX_DECIMAL_PRECISION = 38; - protected static final int MAX_DECIMAL256_PRECISION = 76; - - protected enum State { - UNTYPED, - SINGLE, - UNION - } - - protected MinorType type; - protected ValueVector vector; - protected UnionVector unionVector; - protected State state; - protected FieldWriter writer; - - /** - * Constructs a new instance. - * - * @param v The vector to write. - * @param parentContainer The parent container for the vector. - */ - public PromotableWriter(ValueVector v, AbstractStructVector parentContainer) { - this(v, parentContainer, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param parentContainer The parent container for the vector. - * @param nullableStructWriterFactory The factory to create the delegate writer. - */ - public PromotableWriter( - ValueVector v, - AbstractStructVector parentContainer, - NullableStructWriterFactory nullableStructWriterFactory) { - this.parentContainer = parentContainer; - this.listVector = null; - this.listViewVector = null; - this.fixedListVector = null; - this.largeListVector = null; - this.largeListViewVector = null; - this.nullableStructWriterFactory = nullableStructWriterFactory; - init(v); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param listVector The vector that serves as a parent of v. - */ - public PromotableWriter(ValueVector v, ListVector listVector) { - this(v, listVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param fixedListVector The vector that serves as a parent of v. - */ - public PromotableWriter(ValueVector v, FixedSizeListVector fixedListVector) { - this(v, fixedListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param largeListVector The vector that serves as a parent of v. - */ - public PromotableWriter(ValueVector v, LargeListVector largeListVector) { - this(v, largeListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param listViewVector The vector that serves as a parent of v. - */ - public PromotableWriter(ValueVector v, ListViewVector listViewVector) { - this(v, listViewVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param largeListViewVector The vector that serves as a parent of v. - */ - public PromotableWriter(ValueVector v, LargeListViewVector largeListViewVector) { - this(v, largeListViewVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param listVector The vector that serves as a parent of v. - * @param nullableStructWriterFactory The factory to create the delegate writer. - */ - public PromotableWriter( - ValueVector v, - ListVector listVector, - NullableStructWriterFactory nullableStructWriterFactory) { - this.listVector = listVector; - this.listViewVector = null; - this.parentContainer = null; - this.fixedListVector = null; - this.largeListVector = null; - this.largeListViewVector = null; - this.nullableStructWriterFactory = nullableStructWriterFactory; - init(v); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param listViewVector The vector that serves as a parent of v. - * @param nullableStructWriterFactory The factory to create the delegate writer. - */ - public PromotableWriter( - ValueVector v, - ListViewVector listViewVector, - NullableStructWriterFactory nullableStructWriterFactory) { - this.listViewVector = listViewVector; - this.listVector = null; - this.parentContainer = null; - this.fixedListVector = null; - this.largeListVector = null; - this.largeListViewVector = null; - this.nullableStructWriterFactory = nullableStructWriterFactory; - init(v); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param fixedListVector The vector that serves as a parent of v. - * @param nullableStructWriterFactory The factory to create the delegate writer. - */ - public PromotableWriter( - ValueVector v, - FixedSizeListVector fixedListVector, - NullableStructWriterFactory nullableStructWriterFactory) { - this.fixedListVector = fixedListVector; - this.parentContainer = null; - this.listVector = null; - this.listViewVector = null; - this.largeListVector = null; - this.largeListViewVector = null; - this.nullableStructWriterFactory = nullableStructWriterFactory; - init(v); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param largeListVector The vector that serves as a parent of v. - * @param nullableStructWriterFactory The factory to create the delegate writer. - */ - public PromotableWriter( - ValueVector v, - LargeListVector largeListVector, - NullableStructWriterFactory nullableStructWriterFactory) { - this.largeListVector = largeListVector; - this.fixedListVector = null; - this.parentContainer = null; - this.listVector = null; - this.listViewVector = null; - this.largeListViewVector = null; - this.nullableStructWriterFactory = nullableStructWriterFactory; - init(v); - } - - /** - * Constructs a new instance. - * - * @param v The vector to initialize the writer with. - * @param largeListViewVector The vector that serves as a parent of v. - * @param nullableStructWriterFactory The factory to create the delegate writer. - */ - public PromotableWriter( - ValueVector v, - LargeListViewVector largeListViewVector, - NullableStructWriterFactory nullableStructWriterFactory) { - this.largeListViewVector = largeListViewVector; - this.fixedListVector = null; - this.parentContainer = null; - this.listVector = null; - this.listViewVector = null; - this.largeListVector = null; - this.nullableStructWriterFactory = nullableStructWriterFactory; - init(v); - } - - private void init(ValueVector v) { - if (v instanceof UnionVector) { - state = State.UNION; - unionVector = (UnionVector) v; - writer = new UnionWriter(unionVector, nullableStructWriterFactory); - } else if (v instanceof NullVector) { - state = State.UNTYPED; - } else { - setWriter(v); - } - } - - @Override - public void setAddVectorAsNullable(boolean nullable) { - super.setAddVectorAsNullable(nullable); - if (writer instanceof AbstractFieldWriter) { - ((AbstractFieldWriter) writer).setAddVectorAsNullable(nullable); - } - } - - protected void setWriter(ValueVector v) { - state = State.SINGLE; - vector = v; - type = v.getMinorType(); - switch (type) { - case STRUCT: - writer = nullableStructWriterFactory.build((StructVector) vector); - break; - case LIST: - writer = new UnionListWriter((ListVector) vector, nullableStructWriterFactory); - break; - case LISTVIEW: - writer = new UnionListViewWriter((ListViewVector) vector, nullableStructWriterFactory); - break; - case MAP: - writer = new UnionMapWriter((MapVector) vector); - break; - case UNION: - writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory); - break; - default: - writer = type.getNewFieldWriter(vector); - break; - } - } - - @Override - public void writeNull() { - FieldWriter w = getWriter(); - if (w != null) { - w.writeNull(); - } - setPosition(idx() + 1); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - FieldWriter w = getWriter(); - if (w == null) { - position = index; - } else { - w.setPosition(index); - } - } - - protected boolean requiresArrowType(MinorType type) { - return type == MinorType.DECIMAL - || type == MinorType.MAP - || type == MinorType.DURATION - || type == MinorType.FIXEDSIZEBINARY - || (type.name().startsWith("TIMESTAMP") && type.name().endsWith("TZ")); - } - - @Override - protected FieldWriter getWriter(MinorType type, ArrowType arrowType) { - if (state == State.UNION) { - if (requiresArrowType(type)) { - ((UnionWriter) writer).getWriter(type, arrowType); - } else { - ((UnionWriter) writer).getWriter(type); - } - } else if (state == State.UNTYPED) { - if (type == null) { - // ??? - return null; - } - if (arrowType == null) { - arrowType = type.getType(); - } - FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null); - ValueVector v; - if (listVector != null) { - v = listVector.addOrGetVector(fieldType).getVector(); - } else if (fixedListVector != null) { - v = fixedListVector.addOrGetVector(fieldType).getVector(); - } else if (listViewVector != null) { - v = listViewVector.addOrGetVector(fieldType).getVector(); - } else { - v = largeListVector.addOrGetVector(fieldType).getVector(); - } - v.allocateNew(); - setWriter(v); - writer.setPosition(position); - } else if (type != this.type) { - promoteToUnion(); - if (requiresArrowType(type)) { - ((UnionWriter) writer).getWriter(type, arrowType); - } else { - ((UnionWriter) writer).getWriter(type); - } - } - return writer; - } - - @Override - public boolean isEmptyStruct() { - return writer.isEmptyStruct(); - } - - @Override - protected FieldWriter getWriter() { - return writer; - } - - protected FieldWriter promoteToUnion() { - String name = vector.getField().getName(); - TransferPair tp = - vector.getTransferPair( - vector.getMinorType().name().toLowerCase(Locale.ROOT), vector.getAllocator()); - tp.transfer(); - if (parentContainer != null) { - // TODO allow dictionaries in complex types - unionVector = parentContainer.addOrGetUnion(name); - unionVector.allocateNew(); - } else if (listVector != null) { - unionVector = listVector.promoteToUnion(); - } else if (fixedListVector != null) { - unionVector = fixedListVector.promoteToUnion(); - } else if (largeListVector != null) { - unionVector = largeListVector.promoteToUnion(); - } else if (listViewVector != null) { - unionVector = listViewVector.promoteToUnion(); - } - unionVector.addVector((FieldVector) tp.getTo()); - writer = new UnionWriter(unionVector, nullableStructWriterFactory); - writer.setPosition(idx()); - for (int i = 0; i <= idx(); i++) { - unionVector.setType(i, vector.getMinorType()); - } - vector = null; - state = State.UNION; - return writer; - } - - @Override - public void write(DecimalHolder holder) { - getWriter( - MinorType.DECIMAL, - new ArrowType.Decimal(MAX_DECIMAL_PRECISION, holder.scale, /*bitWidth=*/ 128)) - .write(holder); - } - - @Override - public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) { - getWriter( - MinorType.DECIMAL, - new ArrowType.Decimal( - MAX_DECIMAL_PRECISION, - ((ArrowType.Decimal) arrowType).getScale(), - /*bitWidth=*/ 128)) - .writeDecimal(start, buffer, arrowType); - } - - @Override - public void writeDecimal(BigDecimal value) { - getWriter( - MinorType.DECIMAL, - new ArrowType.Decimal(MAX_DECIMAL_PRECISION, value.scale(), /*bitWidth=*/ 128)) - .writeDecimal(value); - } - - @Override - public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) { - getWriter( - MinorType.DECIMAL, - new ArrowType.Decimal( - MAX_DECIMAL_PRECISION, - ((ArrowType.Decimal) arrowType).getScale(), - /*bitWidth=*/ 128)) - .writeBigEndianBytesToDecimal(value, arrowType); - } - - @Override - public void write(Decimal256Holder holder) { - getWriter( - MinorType.DECIMAL256, - new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, holder.scale, /*bitWidth=*/ 256)) - .write(holder); - } - - @Override - public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) { - getWriter( - MinorType.DECIMAL256, - new ArrowType.Decimal( - MAX_DECIMAL256_PRECISION, - ((ArrowType.Decimal) arrowType).getScale(), - /*bitWidth=*/ 256)) - .writeDecimal256(start, buffer, arrowType); - } - - @Override - public void writeDecimal256(BigDecimal value) { - getWriter( - MinorType.DECIMAL256, - new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, value.scale(), /*bitWidth=*/ 256)) - .writeDecimal256(value); - } - - @Override - public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) { - getWriter( - MinorType.DECIMAL256, - new ArrowType.Decimal( - MAX_DECIMAL256_PRECISION, - ((ArrowType.Decimal) arrowType).getScale(), - /*bitWidth=*/ 256)) - .writeBigEndianBytesToDecimal256(value, arrowType); - } - - @Override - public void writeVarBinary(byte[] value) { - getWriter(MinorType.VARBINARY).writeVarBinary(value); - } - - @Override - public void writeVarBinary(byte[] value, int offset, int length) { - getWriter(MinorType.VARBINARY).writeVarBinary(value, offset, length); - } - - @Override - public void writeVarBinary(ByteBuffer value) { - getWriter(MinorType.VARBINARY).writeVarBinary(value); - } - - @Override - public void writeVarBinary(ByteBuffer value, int offset, int length) { - getWriter(MinorType.VARBINARY).writeVarBinary(value, offset, length); - } - - @Override - public void writeLargeVarBinary(byte[] value) { - getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value); - } - - @Override - public void writeLargeVarBinary(byte[] value, int offset, int length) { - getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value, offset, length); - } - - @Override - public void writeLargeVarBinary(ByteBuffer value) { - getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value); - } - - @Override - public void writeLargeVarBinary(ByteBuffer value, int offset, int length) { - getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value, offset, length); - } - - @Override - public void writeVarChar(Text value) { - getWriter(MinorType.VARCHAR).writeVarChar(value); - } - - @Override - public void writeVarChar(String value) { - getWriter(MinorType.VARCHAR).writeVarChar(value); - } - - @Override - public void writeLargeVarChar(Text value) { - getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); - } - - @Override - public void writeLargeVarChar(String value) { - getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); - } - - @Override - public void allocate() { - getWriter().allocate(); - } - - @Override - public void clear() { - getWriter().clear(); - } - - @Override - public Field getField() { - return getWriter().getField(); - } - - @Override - public int getValueCapacity() { - return getWriter().getValueCapacity(); - } - - @Override - public void close() throws Exception { - getWriter().close(); - } - - /** - * Convert the writer to a PromotableViewWriter. - * - * @return The writer as a PromotableViewWriter. - */ - public PromotableViewWriter toViewWriter() { - PromotableViewWriter promotableViewWriter = new PromotableViewWriter(unionVector, parentContainer, nullableStructWriterFactory); - promotableViewWriter.position = position; - promotableViewWriter.writer = writer; - promotableViewWriter.state = state; - promotableViewWriter.unionVector = unionVector; - promotableViewWriter.type = MinorType.LISTVIEW; - return promotableViewWriter; - } -} diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java deleted file mode 100644 index 3e6258a0c6c0e..0000000000000 --- a/java/vector/src/main/codegen/templates/StructWriters.java +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -<@pp.dropOutputFile /> -<#list ["Nullable", "Single"] as mode> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${mode}StructWriter.java" /> -<#assign index = "idx()"> -<#if mode == "Single"> -<#assign containerClass = "NonNullableStructVector" /> -<#else> -<#assign containerClass = "StructVector" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> -import java.util.Map; -import java.util.HashMap; - -import org.apache.arrow.vector.holders.RepeatedStructHolder; -import org.apache.arrow.vector.AllocationHelper; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.FieldWriter; - -<#function is_timestamp_tz type> - <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> - - -/* - * This class is generated using FreeMarker and the ${.template_name} template. - */ -@SuppressWarnings("unused") -public class ${mode}StructWriter extends AbstractFieldWriter { - - protected final ${containerClass} container; - private int initialCapacity; - private final Map fields = new HashMap<>(); - public ${mode}StructWriter(${containerClass} container) { - <#if mode == "Single"> - if (container instanceof StructVector) { - throw new IllegalArgumentException("Invalid container: " + container); - } - - this.container = container; - this.initialCapacity = 0; - for (Field child : container.getField().getChildren()) { - MinorType minorType = Types.getMinorTypeForArrowType(child.getType()); - addVectorAsNullable = child.isNullable(); - switch (minorType) { - case STRUCT: - struct(child.getName()); - break; - case LIST: - list(child.getName()); - break; - case LISTVIEW: - listView(child.getName()); - break; - case MAP: { - ArrowType.Map arrowType = (ArrowType.Map) child.getType(); - map(child.getName(), arrowType.getKeysSorted()); - break; - } - case DENSEUNION: { - FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.DENSEUNION.getType(), null, null); - DenseUnionWriter writer = new DenseUnionWriter(container.addOrGet(child.getName(), fieldType, DenseUnionVector.class), getNullableStructWriterFactory()); - fields.put(handleCase(child.getName()), writer); - break; - } - case UNION: - FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null); - UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory()); - fields.put(handleCase(child.getName()), writer); - break; -<#list vv.types as type><#list type.minor as minor> -<#assign lowerName = minor.class?uncap_first /> -<#if lowerName == "int" ><#assign lowerName = "integer" /> -<#assign upperName = minor.class?upper_case /> - case ${upperName}: { - <#if minor.typeParams?? > - ${minor.arrowType} arrowType = (${minor.arrowType})child.getType(); - ${lowerName}(child.getName()<#list minor.typeParams as typeParam>, arrowType.get${typeParam.name?cap_first}()); - <#else> - ${lowerName}(child.getName()); - - break; - } - - default: - throw new UnsupportedOperationException("Unknown type: " + minorType); - } - } - } - - protected String handleCase(final String input) { - return input.toLowerCase(); - } - - protected NullableStructWriterFactory getNullableStructWriterFactory() { - return NullableStructWriterFactory.getNullableStructWriterFactoryInstance(); - } - - @Override - public int getValueCapacity() { - return container.getValueCapacity(); - } - - public void setInitialCapacity(int initialCapacity) { - this.initialCapacity = initialCapacity; - container.setInitialCapacity(initialCapacity); - } - - @Override - public boolean isEmptyStruct() { - return 0 == container.size(); - } - - @Override - public Field getField() { - return container.getField(); - } - - @Override - public StructWriter struct(String name) { - String finalName = handleCase(name); - FieldWriter writer = fields.get(finalName); - if(writer == null){ - int vectorCount=container.size(); - FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.STRUCT.getType(), null, null); - StructVector vector = container.addOrGet(name, fieldType, StructVector.class); - writer = new PromotableWriter(vector, container, getNullableStructWriterFactory()); - if(vectorCount != container.size()) { - writer.allocate(); - } - writer.setPosition(idx()); - fields.put(finalName, writer); - } else { - if (writer instanceof PromotableWriter) { - // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.STRUCT); - } - } - return writer; - } - - @Override - public void close() throws Exception { - clear(); - container.close(); - } - - @Override - public void allocate() { - container.allocateNew(); - for(final FieldWriter w : fields.values()) { - w.allocate(); - } - } - - @Override - public void clear() { - container.clear(); - for(final FieldWriter w : fields.values()) { - w.clear(); - } - } - - @Override - public ListWriter list(String name) { - String finalName = handleCase(name); - FieldWriter writer = fields.get(finalName); - int vectorCount = container.size(); - if(writer == null) { - FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.LIST.getType(), null, null); - writer = new PromotableWriter(container.addOrGet(name, fieldType, ListVector.class), container, getNullableStructWriterFactory()); - if (container.size() > vectorCount) { - writer.allocate(); - } - writer.setPosition(idx()); - fields.put(finalName, writer); - } else { - if (writer instanceof PromotableWriter) { - // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.LIST); - } - } - return writer; - } - - @Override - public ListWriter listView(String name) { - String finalName = handleCase(name); - FieldWriter writer = fields.get(finalName); - int vectorCount = container.size(); - if(writer == null) { - FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.LISTVIEW.getType(), null, null); - writer = new PromotableViewWriter(container.addOrGet(name, fieldType, ListViewVector.class), container, getNullableStructWriterFactory()); - if (container.size() > vectorCount) { - writer.allocate(); - } - writer.setPosition(idx()); - fields.put(finalName, writer); - } else { - if (writer instanceof PromotableViewWriter) { - // ensure writers are initialized - ((PromotableViewWriter) writer).getWriter(MinorType.LISTVIEW); - } else { - writer = ((PromotableWriter) writer).toViewWriter(); - ((PromotableViewWriter) writer).getWriter(MinorType.LISTVIEW); - } - } - return writer; - } - - @Override - public MapWriter map(String name) { - return map(name, false); - } - - @Override - public MapWriter map(String name, boolean keysSorted) { - FieldWriter writer = fields.get(handleCase(name)); - if(writer == null) { - ValueVector vector; - ValueVector currentVector = container.getChild(name); - MapVector v = container.addOrGet(name, - new FieldType(addVectorAsNullable, - new ArrowType.Map(keysSorted) - ,null, null), - MapVector.class); - writer = new PromotableWriter(v, container, getNullableStructWriterFactory()); - vector = v; - if (currentVector == null || currentVector != vector) { - if(this.initialCapacity > 0) { - vector.setInitialCapacity(this.initialCapacity); - } - vector.allocateNewSafe(); - } - writer.setPosition(idx()); - fields.put(handleCase(name), writer); - } else { - if (writer instanceof PromotableWriter) { - // ensure writers are initialized - ((PromotableWriter)writer).getWriter(MinorType.MAP, new ArrowType.Map(keysSorted)); - } - } - return writer; - } - - public void setValueCount(int count) { - container.setValueCount(count); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - for(final FieldWriter w: fields.values()) { - w.setPosition(index); - } - } - - <#if mode="Nullable"> - @Override - public void writeNull() { - container.setNull(idx()); - setValueCount(idx()+1); - super.setPosition(idx()+1); - } - - - @Override - public void start() { - <#if mode == "Single"> - <#else> - container.setIndexDefined(idx()); - - } - - @Override - public void end() { - setPosition(idx()+1); - } - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - <#assign vectName = capName /> - - <#if minor.typeParams?? > - @Override - public ${minor.class}Writer ${lowerName}(String name) { - // returns existing writer - final FieldWriter writer = fields.get(handleCase(name)); - Preconditions.checkNotNull(writer); - return writer; - } - - @Override - public ${minor.class}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}) { - <#else> - @Override - public ${minor.class}Writer ${lowerName}(String name) { - - FieldWriter writer = fields.get(handleCase(name)); - if(writer == null) { - ValueVector vector; - ValueVector currentVector = container.getChild(name); - ${vectName}Vector v = container.addOrGet(name, - new FieldType(addVectorAsNullable, - <#if minor.typeParams??> - <#if minor.arrowTypeConstructorParams??> - <#assign constructorParams = minor.arrowTypeConstructorParams /> - <#else> - <#assign constructorParams = [] /> - <#list minor.typeParams?reverse as typeParam> - <#assign constructorParams = constructorParams + [ typeParam.name ] /> - - - new ${minor.arrowType}(${constructorParams?join(", ")}<#if minor.class?starts_with("Decimal")>, ${vectName}Vector.TYPE_WIDTH * 8) - <#else> - MinorType.${upperName}.getType() - - ,null, null), - ${vectName}Vector.class); - writer = new PromotableWriter(v, container, getNullableStructWriterFactory()); - vector = v; - if (currentVector == null || currentVector != vector) { - if(this.initialCapacity > 0) { - vector.setInitialCapacity(this.initialCapacity); - } - vector.allocateNewSafe(); - } - writer.setPosition(idx()); - fields.put(handleCase(name), writer); - } else { - if (writer instanceof PromotableWriter) { - // ensure writers are initialized - <#if minor.class?starts_with("Decimal")> - ((PromotableWriter)writer).getWriter(MinorType.${upperName}<#if minor.class?starts_with("Decimal")>, new ${minor.arrowType}(precision, scale, ${vectName}Vector.TYPE_WIDTH * 8)); - <#elseif is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - <#if minor.arrowTypeConstructorParams??> - <#assign constructorParams = minor.arrowTypeConstructorParams /> - <#else> - <#assign constructorParams = [] /> - <#list minor.typeParams?reverse as typeParam> - <#assign constructorParams = constructorParams + [ typeParam.name ] /> - - - ArrowType arrowType = new ${minor.arrowType}(${constructorParams?join(", ")}); - ((PromotableWriter)writer).getWriter(MinorType.${upperName}, arrowType); - <#else> - ((PromotableWriter)writer).getWriter(MinorType.${upperName}); - - } - } - return writer; - } - - - -} - diff --git a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java deleted file mode 100644 index 3436e3a967651..0000000000000 --- a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.complex.writer.Decimal256Writer; -import org.apache.arrow.vector.complex.writer.DecimalWriter; -import org.apache.arrow.vector.holders.Decimal256Holder; -import org.apache.arrow.vector.holders.DecimalHolder; - - -import java.lang.UnsupportedOperationException; -import java.math.BigDecimal; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionFixedSizeListWriter.java" /> - - -<#include "/@includes/license.ftl" /> - - package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ - -@SuppressWarnings("unused") -public class UnionFixedSizeListWriter extends AbstractFieldWriter { - - protected FixedSizeListVector vector; - protected PromotableWriter writer; - private boolean inStruct = false; - private String structName; - private final int listSize; - - public UnionFixedSizeListWriter(FixedSizeListVector vector) { - this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - public UnionFixedSizeListWriter(FixedSizeListVector vector, NullableStructWriterFactory nullableStructWriterFactory) { - this.vector = vector; - this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory); - this.listSize = vector.getListSize(); - } - - public UnionFixedSizeListWriter(FixedSizeListVector vector, AbstractFieldWriter parent) { - this(vector); - } - - @Override - public void allocate() { - vector.allocateNew(); - } - - @Override - public void clear() { - vector.clear(); - } - - @Override - public Field getField() { - return vector.getField(); - } - - public void setValueCount(int count) { - vector.setValueCount(count); - } - - @Override - public int getValueCapacity() { - return vector.getValueCapacity(); - } - - @Override - public void close() throws Exception { - vector.close(); - writer.close(); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - } - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if uncappedName == "int" ><#assign uncappedName = "integer" /> - <#if !minor.typeParams?? > - - @Override - public ${name}Writer ${uncappedName}() { - return this; - } - - @Override - public ${name}Writer ${uncappedName}(String name) { - structName = name; - return writer.${uncappedName}(name); - } - - - - @Override - public DecimalWriter decimal() { - return this; - } - - @Override - public DecimalWriter decimal(String name, int scale, int precision) { - return writer.decimal(name, scale, precision); - } - - @Override - public DecimalWriter decimal(String name) { - return writer.decimal(name); - } - - - @Override - public Decimal256Writer decimal256() { - return this; - } - - @Override - public Decimal256Writer decimal256(String name, int scale, int precision) { - return writer.decimal256(name, scale, precision); - } - - @Override - public Decimal256Writer decimal256(String name) { - return writer.decimal256(name); - } - - @Override - public StructWriter struct() { - inStruct = true; - return this; - } - - @Override - public ListWriter list() { - return writer; - } - - @Override - public ListWriter list(String name) { - ListWriter listWriter = writer.list(name); - return listWriter; - } - - @Override - public StructWriter struct(String name) { - StructWriter structWriter = writer.struct(name); - return structWriter; - } - - @Override - public MapWriter map() { - return writer; - } - - @Override - public MapWriter map(String name) { - MapWriter mapWriter = writer.map(name); - return mapWriter; - } - - @Override - public MapWriter map(boolean keysSorted) { - writer.map(keysSorted); - return writer; - } - - @Override - public MapWriter map(String name, boolean keysSorted) { - MapWriter mapWriter = writer.map(name, keysSorted); - return mapWriter; - } - - @Override - public void startList() { - int start = vector.startNewValue(idx()); - writer.setPosition(start); - } - - @Override - public void endList() { - setPosition(idx() + 1); - } - - @Override - public void start() { - writer.start(); - } - - @Override - public void end() { - writer.end(); - inStruct = false; - } - - @Override - public void write(DecimalHolder holder) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write(holder); - writer.setPosition(writer.idx() + 1); - } - - @Override - public void write(Decimal256Holder holder) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write(holder); - writer.setPosition(writer.idx() + 1); - } - - - @Override - public void writeNull() { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.writeNull(); - } - - public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.writeDecimal(start, buffer, arrowType); - writer.setPosition(writer.idx() + 1); - } - - public void writeDecimal(BigDecimal value) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.writeDecimal(value); - writer.setPosition(writer.idx() + 1); - } - - public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.writeBigEndianBytesToDecimal(value, arrowType); - writer.setPosition(writer.idx() + 1); - } - - public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.writeDecimal256(start, buffer, arrowType); - writer.setPosition(writer.idx() + 1); - } - - public void writeDecimal256(BigDecimal value) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.writeDecimal256(value); - writer.setPosition(writer.idx() + 1); - } - - public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.writeBigEndianBytesToDecimal256(value, arrowType); - writer.setPosition(writer.idx() + 1); - } - - - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if minor.class?ends_with("VarBinary")> - @Override - public void write${minor.class}(byte[] value) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write${minor.class}(value); - writer.setPosition(writer.idx() + 1); - } - - @Override - public void write${minor.class}(byte[] value, int offset, int length) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write${minor.class}(value, offset, length); - writer.setPosition(writer.idx() + 1); - } - - @Override - public void write${minor.class}(ByteBuffer value) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write${minor.class}(value); - writer.setPosition(writer.idx() + 1); - } - - @Override - public void write${minor.class}(ByteBuffer value, int offset, int length) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write${minor.class}(value, offset, length); - writer.setPosition(writer.idx() + 1); - } - <#elseif minor.class?ends_with("VarChar")> - @Override - public void write${minor.class}(Text value) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write${minor.class}(value); - writer.setPosition(writer.idx() + 1); - } - - @Override - public void write${minor.class}(String value) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write${minor.class}(value); - writer.setPosition(writer.idx() + 1); - } - - - <#if !minor.typeParams?? > - @Override - public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); - writer.setPosition(writer.idx() + 1); - } - - public void write(${name}Holder holder) { - if (writer.idx() >= (idx() + 1) * listSize) { - throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); - } - writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, ); - writer.setPosition(writer.idx() + 1); - } - - - - -} diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java deleted file mode 100644 index 3962e1d0731f3..0000000000000 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.complex.writer.Decimal256Writer; -import org.apache.arrow.vector.complex.writer.DecimalWriter; -import org.apache.arrow.vector.holders.Decimal256Holder; -import org.apache.arrow.vector.holders.DecimalHolder; - - -import java.lang.UnsupportedOperationException; -import java.math.BigDecimal; - -<@pp.dropOutputFile /> -<#list ["List", "ListView", "LargeList", "LargeListView"] as listName> - -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" /> - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -<#include "/@includes/vv_imports.ftl" /> - -<#function is_timestamp_tz type> - <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> - - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ - -@SuppressWarnings("unused") -public class Union${listName}Writer extends AbstractFieldWriter { - - protected ${listName}Vector vector; - protected PromotableWriter writer; - private boolean inStruct = false; - private boolean listStarted = false; - private String structName; - <#if listName == "LargeList" || listName == "LargeListView"> - private static final long OFFSET_WIDTH = 8; - <#else> - private static final int OFFSET_WIDTH = 4; - - - <#if listName == "ListView"> - private static final long SIZE_WIDTH = 4; - - <#if listName == "LargeListView"> - private static final long SIZE_WIDTH = 8; - - - public Union${listName}Writer(${listName}Vector vector) { - this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - public Union${listName}Writer(${listName}Vector vector, NullableStructWriterFactory nullableStructWriterFactory) { - this.vector = vector; - <#if listName = "ListView" || listName = "LargeListView"> - this.writer = new PromotableViewWriter(vector.getDataVector(), vector, nullableStructWriterFactory); - <#else> - this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory); - - } - - public Union${listName}Writer(${listName}Vector vector, AbstractFieldWriter parent) { - this(vector); - } - - @Override - public void allocate() { - vector.allocateNew(); - } - - @Override - public void clear() { - vector.clear(); - } - - @Override - public Field getField() { - return vector.getField(); - } - - public void setValueCount(int count) { - vector.setValueCount(count); - } - - @Override - public int getValueCapacity() { - return vector.getValueCapacity(); - } - - @Override - public void close() throws Exception { - vector.close(); - writer.close(); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - } - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - <#assign vectName = capName /> - @Override - public ${minor.class}Writer ${lowerName}() { - return this; - } - - <#if minor.typeParams?? > - @Override - public ${minor.class}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}) { - return writer.${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}); - } - - - @Override - public ${minor.class}Writer ${lowerName}(String name) { - structName = name; - return writer.${lowerName}(name); - } - - - - @Override - public StructWriter struct() { - inStruct = true; - return this; - } - - @Override - public ListWriter list() { - return writer; - } - - @Override - public ListWriter list(String name) { - ListWriter listWriter = writer.list(name); - return listWriter; - } - - @Override - public ListWriter listView() { - return writer; - } - - @Override - public ListWriter listView(String name) { - ListWriter listWriter = writer.listView(name); - return listWriter; - } - - @Override - public StructWriter struct(String name) { - StructWriter structWriter = writer.struct(name); - return structWriter; - } - - @Override - public MapWriter map() { - return writer; - } - - @Override - public MapWriter map(String name) { - MapWriter mapWriter = writer.map(name); - return mapWriter; - } - - @Override - public MapWriter map(boolean keysSorted) { - writer.map(keysSorted); - return writer; - } - - @Override - public MapWriter map(String name, boolean keysSorted) { - MapWriter mapWriter = writer.map(name, keysSorted); - return mapWriter; - } - - <#if listName == "LargeList"> - @Override - public void startList() { - vector.startNewValue(idx()); - writer.setPosition(checkedCastToInt(vector.getOffsetBuffer().getLong((idx() + 1L) * OFFSET_WIDTH))); - listStarted = true; - } - - @Override - public void endList() { - vector.getOffsetBuffer().setLong((idx() + 1L) * OFFSET_WIDTH, writer.idx()); - setPosition(idx() + 1); - listStarted = false; - } - <#elseif listName == "ListView"> - @Override - public void startList() { - vector.startNewValue(idx()); - writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)); - listStarted = true; - } - - @Override - public void endList() { - int sizeUptoIdx = 0; - for (int i = 0; i < idx(); i++) { - sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); - } - vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); - setPosition(idx() + 1); - listStarted = false; - } - - @Override - public void startListView() { - vector.startNewValue(idx()); - writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)); - listStarted = true; - } - - @Override - public void endListView() { - int sizeUptoIdx = 0; - for (int i = 0; i < idx(); i++) { - sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); - } - vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); - setPosition(idx() + 1); - listStarted = false; - } - <#elseif listName == "LargeListView"> - @Override - public void startList() { - vector.startNewValue(idx()); - writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)); - listStarted = true; - } - - @Override - public void endList() { - int sizeUptoIdx = 0; - for (int i = 0; i < idx(); i++) { - sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); - } - vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); - setPosition(idx() + 1); - listStarted = false; - } - - @Override - public void startListView() { - vector.startNewValue(idx()); - writer.setPosition(checkedCastToInt(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH))); - listStarted = true; - } - - @Override - public void endListView() { - int sizeUptoIdx = 0; - for (int i = 0; i < idx(); i++) { - sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); - } - vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); - setPosition(idx() + 1); - listStarted = false; - } - <#else> - @Override - public void startList() { - vector.startNewValue(idx()); - writer.setPosition(vector.getOffsetBuffer().getInt((idx() + 1L) * OFFSET_WIDTH)); - listStarted = true; - } - - @Override - public void endList() { - vector.getOffsetBuffer().setInt((idx() + 1L) * OFFSET_WIDTH, writer.idx()); - setPosition(idx() + 1); - listStarted = false; - } - - - @Override - public void start() { - writer.start(); - } - - @Override - public void end() { - writer.end(); - inStruct = false; - } - - @Override - public void writeNull() { - if (!listStarted){ - vector.setNull(idx()); - } else { - writer.writeNull(); - } - } - - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - @Override - public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { - writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); - writer.setPosition(writer.idx()+1); - } - - <#if is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - @Override - public void write(${name}Holder holder) { - writer.write(holder); - writer.setPosition(writer.idx()+1); - } - - <#elseif minor.class?starts_with("Decimal")> - public void write${name}(long start, ArrowBuf buffer, ArrowType arrowType) { - writer.write${name}(start, buffer, arrowType); - writer.setPosition(writer.idx()+1); - } - - @Override - public void write(${name}Holder holder) { - writer.write(holder); - writer.setPosition(writer.idx()+1); - } - - public void write${name}(BigDecimal value) { - writer.write${name}(value); - writer.setPosition(writer.idx()+1); - } - - public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType){ - writer.writeBigEndianBytesTo${name}(value, arrowType); - writer.setPosition(writer.idx() + 1); - } - <#else> - @Override - public void write(${name}Holder holder) { - writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, ); - writer.setPosition(writer.idx()+1); - } - - - <#if minor.class?ends_with("VarBinary")> - @Override - public void write${minor.class}(byte[] value) { - writer.write${minor.class}(value); - writer.setPosition(writer.idx() + 1); - } - - @Override - public void write${minor.class}(byte[] value, int offset, int length) { - writer.write${minor.class}(value, offset, length); - writer.setPosition(writer.idx() + 1); - } - - @Override - public void write${minor.class}(ByteBuffer value) { - writer.write${minor.class}(value); - writer.setPosition(writer.idx() + 1); - } - - @Override - public void write${minor.class}(ByteBuffer value, int offset, int length) { - writer.write${minor.class}(value, offset, length); - writer.setPosition(writer.idx() + 1); - } - <#elseif minor.class?ends_with("VarChar")> - @Override - public void write${minor.class}(Text value) { - writer.write${minor.class}(value); - writer.setPosition(writer.idx() + 1); - } - - public void write${minor.class}(String value) { - writer.write${minor.class}(value); - writer.setPosition(writer.idx() + 1); - } - - - - -} - diff --git a/java/vector/src/main/codegen/templates/UnionMapWriter.java b/java/vector/src/main/codegen/templates/UnionMapWriter.java deleted file mode 100644 index 90b55cb65e6ef..0000000000000 --- a/java/vector/src/main/codegen/templates/UnionMapWriter.java +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.complex.writer.Decimal256Writer; -import org.apache.arrow.vector.complex.writer.DecimalWriter; -import org.apache.arrow.vector.holders.Decimal256Holder; -import org.apache.arrow.vector.holders.DecimalHolder; - -import java.lang.UnsupportedOperationException; -import java.math.BigDecimal; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionMapWriter.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ - -/** - *

    Writer for MapVectors. This extends UnionListWriter to simplify writing map entries to a list - * of struct elements, with "key" and "value" fields. The procedure for writing a map begin with - * {@link #startMap()} followed by {@link #startEntry()}. An entry is written by using the - * {@link #key()} writer to write the key, then the {@link #value()} writer to write a value. After - * writing the value, call {@link #endEntry()} to complete the entry. Each map can have 1 or more - * entries. When done writing entries, call {@link #endMap()} to complete the map. - * - *

    NOTE: the MapVector can have NULL values by not writing to position. If a map is started with - * {@link #startMap()}, then it must have a key written. The value of a map entry can be NULL by - * not using the {@link #value()} writer. - * - *

    Example to write the following map to position 5 of a vector - *

    {@code
    - *   // {
    - *   //   1 -> 3,
    - *   //   2 -> 4,
    - *   //   3 -> NULL
    - *   // }
    - *
    - *   UnionMapWriter writer = ...
    - *
    - *   writer.setPosition(5);
    - *   writer.startMap();
    - *   writer.startEntry();
    - *   writer.key().integer().writeInt(1);
    - *   writer.value().integer().writeInt(3);
    - *   writer.endEntry();
    - *   writer.startEntry();
    - *   writer.key().integer().writeInt(2);
    - *   writer.value().integer().writeInt(4);
    - *   writer.endEntry();
    - *   writer.startEntry();
    - *   writer.key().integer().writeInt(3);
    - *   writer.endEntry();
    - *   writer.endMap();
    - * 
    - *

    - */ -@SuppressWarnings("unused") -public class UnionMapWriter extends UnionListWriter { - - /** - * Current mode for writing map entries, set by calling {@link #key()} or {@link #value()} - * and reset with a call to {@link #endEntry()}. With KEY mode, a struct writer with field - * named "key" is returned. With VALUE mode, a struct writer with field named "value" is - * returned. In OFF mode, the writer will behave like a standard UnionListWriter - */ - private enum MapWriteMode { - OFF, - KEY, - VALUE, - } - - private MapWriteMode mode = MapWriteMode.OFF; - private StructWriter entryWriter; - - public UnionMapWriter(MapVector vector) { - super(vector); - entryWriter = struct(); - } - - /** Start writing a map that consists of 1 or more entries. */ - public void startMap() { - startList(); - } - - /** Complete the map. */ - public void endMap() { - endList(); - } - - /** - * Start a map entry that should be followed by calls to {@link #key()} and {@link #value()} - * writers. Call {@link #endEntry()} to complete the entry. - */ - public void startEntry() { - writer.setAddVectorAsNullable(false); - entryWriter.start(); - } - - /** Complete the map entry. */ - public void endEntry() { - entryWriter.end(); - mode = MapWriteMode.OFF; - writer.setAddVectorAsNullable(true); - } - - /** Return the key writer that is used to write to the "key" field. */ - public UnionMapWriter key() { - writer.setAddVectorAsNullable(false); - mode = MapWriteMode.KEY; - return this; - } - - /** Return the value writer that is used to write to the "value" field. */ - public UnionMapWriter value() { - writer.setAddVectorAsNullable(true); - mode = MapWriteMode.VALUE; - return this; - } - - <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if uncappedName == "int" ><#assign uncappedName = "integer" /> - <#if !minor.typeParams?? > - @Override - public ${name}Writer ${uncappedName}() { - switch (mode) { - case KEY: - return entryWriter.${uncappedName}(MapVector.KEY_NAME); - case VALUE: - return entryWriter.${uncappedName}(MapVector.VALUE_NAME); - default: - return this; - } - } - - - - @Override - public DecimalWriter decimal() { - switch (mode) { - case KEY: - return entryWriter.decimal(MapVector.KEY_NAME); - case VALUE: - return entryWriter.decimal(MapVector.VALUE_NAME); - default: - return this; - } - } - - @Override - public Decimal256Writer decimal256() { - switch (mode) { - case KEY: - return entryWriter.decimal256(MapVector.KEY_NAME); - case VALUE: - return entryWriter.decimal256(MapVector.VALUE_NAME); - default: - return this; - } - } - - - @Override - public StructWriter struct() { - switch (mode) { - case KEY: - return entryWriter.struct(MapVector.KEY_NAME); - case VALUE: - return entryWriter.struct(MapVector.VALUE_NAME); - default: - return super.struct(); - } - } - - @Override - public ListWriter list() { - switch (mode) { - case KEY: - return entryWriter.list(MapVector.KEY_NAME); - case VALUE: - return entryWriter.list(MapVector.VALUE_NAME); - default: - return super.list(); - } - } - - @Override - public MapWriter map(boolean keysSorted) { - switch (mode) { - case KEY: - return entryWriter.map(MapVector.KEY_NAME, keysSorted); - case VALUE: - return entryWriter.map(MapVector.VALUE_NAME, keysSorted); - default: - return super.map(); - } - } - - @Override - public MapWriter map() { - switch (mode) { - case KEY: - return entryWriter.map(MapVector.KEY_NAME); - case VALUE: - return entryWriter.map(MapVector.VALUE_NAME); - default: - return super.map(); - } - } -} diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java deleted file mode 100644 index 68e30ef48846b..0000000000000 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionReader.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> - -<#function is_timestamp_tz type> - <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> - - -/** - * Source code generated using FreeMarker template ${.template_name} - */ -@SuppressWarnings("unused") -public class UnionReader extends AbstractFieldReader { - - private static final int NUM_SUPPORTED_TYPES = 51; - - private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; - public UnionVector data; - - public UnionReader(UnionVector data) { - this.data = data; - } - - public MinorType getMinorType() { - return TYPES[data.getTypeValue(idx())]; - } - - private static MinorType[] TYPES = new MinorType[NUM_SUPPORTED_TYPES]; - - static { - for (MinorType minorType : MinorType.values()) { - TYPES[minorType.ordinal()] = minorType; - } - } - - @Override - public Field getField() { - return data.getField(); - } - - public boolean isSet(){ - return !data.isNull(idx()); - } - - public void read(UnionHolder holder) { - holder.reader = this; - holder.isSet = this.isSet() ? 1 : 0; - } - - public void read(int index, UnionHolder holder) { - getList().read(index, holder); - } - - private FieldReader getReaderForIndex(int index) { - int typeValue = data.getTypeValue(index); - FieldReader reader = (FieldReader) readers[typeValue]; - if (reader != null) { - return reader; - } - switch (MinorType.values()[typeValue]) { - case NULL: - return NullReader.INSTANCE; - case STRUCT: - return (FieldReader) getStruct(); - case LIST: - return (FieldReader) getList(); - case LISTVIEW: - return (FieldReader) getListView(); - case MAP: - return (FieldReader) getMap(); - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - case ${name?upper_case}: - return (FieldReader) get${name}(); - - - - default: - throw new UnsupportedOperationException("Unsupported type: " + MinorType.values()[typeValue]); - } - } - - private SingleStructReaderImpl structReader; - - private StructReader getStruct() { - if (structReader == null) { - structReader = (SingleStructReaderImpl) data.getStruct().getReader(); - structReader.setPosition(idx()); - readers[MinorType.STRUCT.ordinal()] = structReader; - } - return structReader; - } - - private UnionListReader listReader; - - private FieldReader getList() { - if (listReader == null) { - listReader = new UnionListReader(data.getList()); - listReader.setPosition(idx()); - readers[MinorType.LIST.ordinal()] = listReader; - } - return listReader; - } - - private UnionListViewReader listViewReader; - - private FieldReader getListView() { - if (listViewReader == null) { - listViewReader = new UnionListViewReader(data.getListView()); - listViewReader.setPosition(idx()); - readers[MinorType.LISTVIEW.ordinal()] = listViewReader; - } - return listViewReader; - } - - private UnionMapReader mapReader; - - private FieldReader getMap() { - if (mapReader == null) { - mapReader = new UnionMapReader(data.getMap()); - mapReader.setPosition(idx()); - readers[MinorType.MAP.ordinal()] = mapReader; - } - return mapReader; - } - - @Override - public java.util.Iterator iterator() { - return getStruct().iterator(); - } - - @Override - public void copyAsValue(UnionWriter writer) { - writer.data.copyFrom(idx(), writer.idx(), data); - } - - <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean", - "LocalDateTime", "Duration", "Period", "Double", "Float", - "Character", "Text", "Byte", "byte[]", "PeriodDuration"] as friendlyType> - <#assign safeType=friendlyType /> - <#if safeType=="byte[]"><#assign safeType="ByteArray" /> - - @Override - public ${friendlyType} read${safeType}() { - return getReaderForIndex(idx()).read${safeType}(); - } - - - - public int size() { - return getReaderForIndex(idx()).size(); - } - - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign uncappedName = name?uncap_first/> - <#assign boxedType = (minor.boxedType!type.boxedType) /> - <#assign javaType = (minor.javaType!type.javaType) /> - <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> - <#assign safeType=friendlyType /> - <#if safeType=="byte[]"><#assign safeType="ByteArray" /> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - - private ${name}ReaderImpl ${uncappedName}Reader; - - private ${name}ReaderImpl get${name}() { - if (${uncappedName}Reader == null) { - ${uncappedName}Reader = new ${name}ReaderImpl(data.get${name}Vector()); - ${uncappedName}Reader.setPosition(idx()); - readers[MinorType.${name?upper_case}.ordinal()] = ${uncappedName}Reader; - } - return ${uncappedName}Reader; - } - - public void read(Nullable${name}Holder holder){ - getReaderForIndex(idx()).read(holder); - } - - public void copyAsValue(${name}Writer writer){ - getReaderForIndex(idx()).copyAsValue(writer); - } - - - - - @Override - public void copyAsValue(ListWriter writer) { - ComplexCopier.copy(this, (FieldWriter) writer); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - for (BaseReader reader : readers) { - if (reader != null) { - reader.setPosition(index); - } - } - } - - public FieldReader reader(String name){ - return getStruct().reader(name); - } - - public FieldReader reader() { - return getList().reader(); - } - - public boolean next() { - return getReaderForIndex(idx()).next(); - } -} diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java deleted file mode 100644 index e0fd0e4644313..0000000000000 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ /dev/null @@ -1,928 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.AbstractStructVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/UnionVector.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex; - -<#include "/@includes/vv_imports.ftl" /> -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.ValueVectorUtility; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.util.Preconditions; - -import static org.apache.arrow.vector.types.UnionMode.Sparse; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; - -<#function is_timestamp_tz type> - <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> - - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ -@SuppressWarnings("unused") - - -/** - * A vector which can hold values of different types. It does so by using a StructVector which contains a vector for each - * primitive type that is stored. StructVector is used in order to take advantage of its serialization/deserialization methods, - * as well as the addOrGet method. - * - * For performance reasons, UnionVector stores a cached reference to each subtype vector, to avoid having to do the struct lookup - * each time the vector is accessed. - * Source code generated using FreeMarker template ${.template_name} - */ -public class UnionVector extends AbstractContainerVector implements FieldVector { - int valueCount; - - NonNullableStructVector internalStruct; - protected ArrowBuf typeBuffer; - - private StructVector structVector; - private ListVector listVector; - private ListViewVector listViewVector; - private MapVector mapVector; - - private FieldReader reader; - - private int singleType = 0; - private ValueVector singleVector; - - private int typeBufferAllocationSizeInBytes; - - private final FieldType fieldType; - private final Field[] typeIds = new Field[Byte.MAX_VALUE + 1]; - - public static final byte TYPE_WIDTH = 1; - private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(false /*nullable*/, - ArrowType.Struct.INSTANCE, null /*dictionary*/, null /*metadata*/); - - public static UnionVector empty(String name, BufferAllocator allocator) { - FieldType fieldType = FieldType.nullable(new ArrowType.Union( - UnionMode.Sparse, null)); - return new UnionVector(name, allocator, fieldType, null); - } - - public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(name, allocator, callBack); - this.fieldType = fieldType; - this.internalStruct = new NonNullableStructVector( - "internal", - allocator, - INTERNAL_STRUCT_TYPE, - callBack, - AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, - false); - this.typeBuffer = allocator.getEmpty(); - this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH; - } - - public BufferAllocator getAllocator() { - return allocator; - } - - @Override - public MinorType getMinorType() { - return MinorType.UNION; - } - - @Override - public void initializeChildrenFromFields(List children) { - int count = 0; - for (Field child: children) { - int typeId = Types.getMinorTypeForArrowType(child.getType()).ordinal(); - if (this.fieldType != null) { - int[] typeIds = ((ArrowType.Union)this.fieldType.getType()).getTypeIds(); - if (typeIds != null) { - typeId = typeIds[count++]; - } - } - typeIds[typeId] = child; - } - internalStruct.initializeChildrenFromFields(children); - } - - @Override - public List getChildrenFromFields() { - return internalStruct.getChildrenFromFields(); - } - - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 1) { - throw new IllegalArgumentException("Illegal buffer count, expected 1, got: " + ownBuffers.size()); - } - ArrowBuf buffer = ownBuffers.get(0); - typeBuffer.getReferenceManager().release(); - typeBuffer = buffer.getReferenceManager().retain(buffer, allocator); - typeBufferAllocationSizeInBytes = checkedCastToInt(typeBuffer.capacity()); - this.valueCount = fieldNode.getLength(); - } - - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(1); - setReaderAndWriterIndex(); - result.add(typeBuffer); - - return result; - } - - private void setReaderAndWriterIndex() { - typeBuffer.readerIndex(0); - typeBuffer.writerIndex(valueCount * TYPE_WIDTH); - } - - /** - * Get the inner vectors. - * - * @deprecated This API will be removed as the current implementations no longer support inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use geFieldBuffers"); - } - - private String fieldName(MinorType type) { - return type.name().toLowerCase(); - } - - private FieldType fieldType(MinorType type) { - return FieldType.nullable(type.getType()); - } - - private T addOrGet(Types.MinorType minorType, Class c) { - return addOrGet(null, minorType, c); - } - - private T addOrGet(String name, Types.MinorType minorType, ArrowType arrowType, Class c) { - return internalStruct.addOrGet(name == null ? fieldName(minorType) : name, FieldType.nullable(arrowType), c); - } - - private T addOrGet(String name, Types.MinorType minorType, Class c) { - return internalStruct.addOrGet(name == null ? fieldName(minorType) : name, fieldType(minorType), c); - } - - - @Override - public long getValidityBufferAddress() { - throw new UnsupportedOperationException(); - } - - public long getTypeBufferAddress() { - return typeBuffer.memoryAddress(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getOffsetBufferAddress() { - throw new UnsupportedOperationException(); - } - - public ArrowBuf getTypeBuffer() { - return typeBuffer; - } - - @Override - public ArrowBuf getValidityBuffer() { throw new UnsupportedOperationException(); } - - @Override - public ArrowBuf getDataBuffer() { throw new UnsupportedOperationException(); } - - @Override - public ArrowBuf getOffsetBuffer() { throw new UnsupportedOperationException(); } - - public StructVector getStruct() { - if (structVector == null) { - int vectorCount = internalStruct.size(); - structVector = addOrGet(MinorType.STRUCT, StructVector.class); - if (internalStruct.size() > vectorCount) { - structVector.allocateNew(); - if (callBack != null) { - callBack.doWork(); - } - } - } - return structVector; - } - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#assign lowerCaseName = name?lower_case/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - - private ${name}Vector ${uncappedName}Vector; - - <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - public ${name}Vector get${name}Vector() { - if (${uncappedName}Vector == null) { - throw new IllegalArgumentException("No ${name} present. Provide ArrowType argument to create a new vector"); - } - return ${uncappedName}Vector; - } - public ${name}Vector get${name}Vector(ArrowType arrowType) { - return get${name}Vector(null, arrowType); - } - public ${name}Vector get${name}Vector(String name, ArrowType arrowType) { - if (${uncappedName}Vector == null) { - int vectorCount = internalStruct.size(); - ${uncappedName}Vector = addOrGet(name, MinorType.${name?upper_case}, arrowType, ${name}Vector.class); - if (internalStruct.size() > vectorCount) { - ${uncappedName}Vector.allocateNew(); - if (callBack != null) { - callBack.doWork(); - } - } - } - return ${uncappedName}Vector; - } - <#else> - public ${name}Vector get${name}Vector() { - return get${name}Vector(null); - } - - public ${name}Vector get${name}Vector(String name) { - if (${uncappedName}Vector == null) { - int vectorCount = internalStruct.size(); - ${uncappedName}Vector = addOrGet(name, MinorType.${name?upper_case}, ${name}Vector.class); - if (internalStruct.size() > vectorCount) { - ${uncappedName}Vector.allocateNew(); - if (callBack != null) { - callBack.doWork(); - } - } - } - return ${uncappedName}Vector; - } - - - - - - public ListVector getList() { - if (listVector == null) { - int vectorCount = internalStruct.size(); - listVector = addOrGet(MinorType.LIST, ListVector.class); - if (internalStruct.size() > vectorCount) { - listVector.allocateNew(); - if (callBack != null) { - callBack.doWork(); - } - } - } - return listVector; - } - - public ListViewVector getListView() { - if (listViewVector == null) { - int vectorCount = internalStruct.size(); - listViewVector = addOrGet(MinorType.LISTVIEW, ListViewVector.class); - if (internalStruct.size() > vectorCount) { - listViewVector.allocateNew(); - if (callBack != null) { - callBack.doWork(); - } - } - } - return listViewVector; - } - - public MapVector getMap() { - if (mapVector == null) { - throw new IllegalArgumentException("No map present. Provide ArrowType argument to create a new vector"); - } - return mapVector; - } - - public MapVector getMap(ArrowType arrowType) { - return getMap(null, arrowType); - } - - public MapVector getMap(String name, ArrowType arrowType) { - if (mapVector == null) { - int vectorCount = internalStruct.size(); - mapVector = addOrGet(name, MinorType.MAP, arrowType, MapVector.class); - if (internalStruct.size() > vectorCount) { - mapVector.allocateNew(); - if (callBack != null) { - callBack.doWork(); - } - } - } - return mapVector; - } - - public int getTypeValue(int index) { - return typeBuffer.getByte(index * TYPE_WIDTH); - } - - @Override - public void allocateNew() throws OutOfMemoryException { - /* new allocation -- clear the current buffers */ - clear(); - internalStruct.allocateNew(); - try { - allocateTypeBuffer(); - } catch (Exception e) { - clear(); - throw e; - } - } - - @Override - public boolean allocateNewSafe() { - /* new allocation -- clear the current buffers */ - clear(); - boolean safe = internalStruct.allocateNewSafe(); - if (!safe) { return false; } - try { - allocateTypeBuffer(); - } catch (Exception e) { - clear(); - return false; - } - - return true; - } - - private void allocateTypeBuffer() { - typeBuffer = allocator.buffer(typeBufferAllocationSizeInBytes); - typeBuffer.readerIndex(0); - typeBuffer.setZero(0, typeBuffer.capacity()); - } - - @Override - public void reAlloc() { - internalStruct.reAlloc(); - reallocTypeBuffer(); - } - - private void reallocTypeBuffer() { - final long currentBufferCapacity = typeBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (typeBufferAllocationSizeInBytes > 0) { - newAllocationSize = typeBufferAllocationSizeInBytes; - } else { - newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH * 2; - } - } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(checkedCastToInt(newAllocationSize)); - newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - typeBuffer.getReferenceManager().release(1); - typeBuffer = newBuf; - typeBufferAllocationSizeInBytes = (int)newAllocationSize; - } - - @Override - public void setInitialCapacity(int numRecords) { } - - @Override - public int getValueCapacity() { - return Math.min(getTypeBufferValueCapacity(), internalStruct.getValueCapacity()); - } - - @Override - public void close() { - clear(); - } - - @Override - public void clear() { - valueCount = 0; - typeBuffer.getReferenceManager().release(); - typeBuffer = allocator.getEmpty(); - internalStruct.clear(); - } - - @Override - public void reset() { - valueCount = 0; - typeBuffer.setZero(0, typeBuffer.capacity()); - internalStruct.reset(); - } - - @Override - public Field getField() { - List childFields = new ArrayList<>(); - List children = internalStruct.getChildren(); - int[] typeIds = new int[children.size()]; - for (ValueVector v : children) { - typeIds[childFields.size()] = v.getMinorType().ordinal(); - childFields.add(v.getField()); - } - - FieldType fieldType; - if (this.fieldType == null) { - fieldType = FieldType.nullable(new ArrowType.Union(Sparse, typeIds)); - } else { - final UnionMode mode = ((ArrowType.Union)this.fieldType.getType()).getMode(); - fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds), - this.fieldType.getDictionary(), this.fieldType.getMetadata()); - } - - return new Field(name, fieldType, childFields); - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(name, allocator); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new org.apache.arrow.vector.complex.UnionVector.TransferImpl(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return getTransferPair(field, allocator, null); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new org.apache.arrow.vector.complex.UnionVector.TransferImpl(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((UnionVector) target); - } - - @Override - public void copyFrom(int inIndex, int outIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - UnionVector fromCast = (UnionVector) from; - fromCast.getReader().setPosition(inIndex); - getWriter().setPosition(outIndex); - ComplexCopier.copy(fromCast.reader, writer); - } - - @Override - public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from); - } - - public FieldVector addVector(FieldVector v) { - final String name = v.getName().isEmpty() ? fieldName(v.getMinorType()) : v.getName(); - Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name)); - final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass()); - v.makeTransferPair(newVector).transfer(); - internalStruct.putChild(name, newVector); - if (callBack != null) { - callBack.doWork(); - } - return newVector; - } - - /** - * Directly put a vector to internalStruct without creating a new one with same type. - */ - public void directAddVector(FieldVector v) { - String name = fieldName(v.getMinorType()); - Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name)); - internalStruct.putChild(name, v); - if (callBack != null) { - callBack.doWork(); - } - } - - private class TransferImpl implements TransferPair { - private final TransferPair internalStructVectorTransferPair; - private final UnionVector to; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - to = new UnionVector(name, allocator, /* field type */ null, callBack); - internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - to = new UnionVector(field.getName(), allocator, null, callBack); - internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct); - } - - public TransferImpl(UnionVector to) { - this.to = to; - internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct); - } - - @Override - public void transfer() { - to.clear(); - ReferenceManager refManager = typeBuffer.getReferenceManager(); - to.typeBuffer = refManager.transferOwnership(typeBuffer, to.allocator).getTransferredBuffer(); - internalStructVectorTransferPair.transfer(); - to.valueCount = valueCount; - clear(); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount); - to.clear(); - - internalStructVectorTransferPair.splitAndTransfer(startIndex, length); - final int startPoint = startIndex * TYPE_WIDTH; - final int sliceLength = length * TYPE_WIDTH; - final ArrowBuf slicedBuffer = typeBuffer.slice(startPoint, sliceLength); - final ReferenceManager refManager = slicedBuffer.getReferenceManager(); - to.typeBuffer = refManager.transferOwnership(slicedBuffer, to.allocator).getTransferredBuffer(); - to.setValueCount(length); - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - this.to.copyFrom(from, to, UnionVector.this); - } - } - - @Override - public FieldReader getReader() { - if (reader == null) { - reader = new UnionReader(this); - } - return reader; - } - - public FieldWriter getWriter() { - if (writer == null) { - writer = new UnionWriter(this); - } - return writer; - } - - @Override - public int getBufferSize() { - if (valueCount == 0) { return 0; } - - return (valueCount * TYPE_WIDTH) + internalStruct.getBufferSize(); - } - - @Override - public int getBufferSizeFor(final int valueCount) { - if (valueCount == 0) { - return 0; - } - - long bufferSize = 0; - for (final ValueVector v : (Iterable) this) { - bufferSize += v.getBufferSizeFor(valueCount); - } - - return (int) bufferSize + (valueCount * TYPE_WIDTH); - } - - @Override - public ArrowBuf[] getBuffers(boolean clear) { - List list = new java.util.ArrayList<>(); - setReaderAndWriterIndex(); - if (getBufferSize() != 0) { - list.add(typeBuffer); - list.addAll(java.util.Arrays.asList(internalStruct.getBuffers(clear))); - } - if (clear) { - valueCount = 0; - typeBuffer.getReferenceManager().retain(); - typeBuffer.getReferenceManager().release(); - typeBuffer = allocator.getEmpty(); - } - return list.toArray(new ArrowBuf[list.size()]); - } - - @Override - public Iterator iterator() { - return internalStruct.iterator(); - } - - public ValueVector getVector(int index) { - return getVector(index, null); - } - - public ValueVector getVector(int index, ArrowType arrowType) { - int type = typeBuffer.getByte(index * TYPE_WIDTH); - return getVectorByType(type, arrowType); - } - - public ValueVector getVectorByType(int typeId) { - return getVectorByType(typeId, null); - } - - public ValueVector getVectorByType(int typeId, ArrowType arrowType) { - Field type = typeIds[typeId]; - Types.MinorType minorType; - String name = null; - if (type == null) { - minorType = Types.MinorType.values()[typeId]; - } else { - minorType = Types.getMinorTypeForArrowType(type.getType()); - name = type.getName(); - } - switch (minorType) { - case NULL: - return null; - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - case ${name?upper_case}: - return get${name}Vector(name<#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">, arrowType); - - - - case STRUCT: - return getStruct(); - case LIST: - return getList(); - case LISTVIEW: - return getListView(); - case MAP: - return getMap(name, arrowType); - default: - throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[typeId]); - } - } - - public Object getObject(int index) { - ValueVector vector = getVector(index); - if (vector != null) { - return vector.isNull(index) ? null : vector.getObject(index); - } - return null; - } - - public byte[] get(int index) { - return null; - } - - public void get(int index, ComplexHolder holder) { - } - - public void get(int index, UnionHolder holder) { - FieldReader reader = new UnionReader(UnionVector.this); - reader.setPosition(index); - holder.reader = reader; - } - - public int getValueCount() { - return valueCount; - } - - /** - * IMPORTANT: Union types always return non null as there is no validity buffer. - * - * To check validity correctly you must check the underlying vector. - */ - public boolean isNull(int index) { - return false; - } - - @Override - public int getNullCount() { - return 0; - } - - public int isSet(int index) { - return isNull(index) ? 0 : 1; - } - - UnionWriter writer; - - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - while (valueCount > getTypeBufferValueCapacity()) { - reallocTypeBuffer(); - } - internalStruct.setValueCount(valueCount); - } - - public void setSafe(int index, UnionHolder holder) { - setSafe(index, holder, null); - } - - public void setSafe(int index, UnionHolder holder, ArrowType arrowType) { - FieldReader reader = holder.reader; - if (writer == null) { - writer = new UnionWriter(UnionVector.this); - } - writer.setPosition(index); - MinorType type = reader.getMinorType(); - switch (type) { - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - case ${name?upper_case}: - Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder(); - reader.read(${uncappedName}Holder); - <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - setSafe(index, ${uncappedName}Holder, arrowType); - <#else> - setSafe(index, ${uncappedName}Holder); - - break; - - - - case STRUCT: { - ComplexCopier.copy(reader, writer); - break; - } - case LIST: { - ComplexCopier.copy(reader, writer); - break; - } - default: - throw new UnsupportedOperationException(); - } - } - - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - public void setSafe(int index, Nullable${name}Holder holder, ArrowType arrowType) { - setType(index, MinorType.${name?upper_case}); - get${name}Vector(null, arrowType).setSafe(index, holder); - } - <#else> - public void setSafe(int index, Nullable${name}Holder holder) { - setType(index, MinorType.${name?upper_case}); - get${name}Vector(null).setSafe(index, holder); - } - - - - - - public void setType(int index, MinorType type) { - while (index >= getTypeBufferValueCapacity()) { - reallocTypeBuffer(); - } - typeBuffer.setByte(index * TYPE_WIDTH , (byte) type.ordinal()); - } - - private int getTypeBufferValueCapacity() { - return capAtMaxInt(typeBuffer.capacity() / TYPE_WIDTH); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - ValueVector vec = getVector(index); - if (vec == null) { - return ArrowBufPointer.NULL_HASH_CODE; - } - return vec.hashCode(index, hasher); - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - @Override - public String getName() { - return name; - } - - @Override - public String toString() { - return ValueVectorUtility.getToString(this, 0, getValueCount()); - } - - @Override - public T addOrGet(String name, FieldType fieldType, Class clazz) { - return internalStruct.addOrGet(name, fieldType, clazz); - } - - @Override - public T getChild(String name, Class clazz) { - return internalStruct.getChild(name, clazz); - } - - @Override - public VectorWithOrdinal getChildVectorWithOrdinal(String name) { - return internalStruct.getChildVectorWithOrdinal(name); - } - - @Override - public int size() { - return internalStruct.size(); - } - - @Override - public void setInitialCapacity(int valueCount, double density) { - for (final ValueVector vector : internalStruct) { - if (vector instanceof DensityAwareVector) { - ((DensityAwareVector) vector).setInitialCapacity(valueCount, density); - } else { - vector.setInitialCapacity(valueCount); - } - } - } - - /** - * Set the element at the given index to null. For UnionVector, it throws an UnsupportedOperationException - * as nulls are not supported at the top level and isNull() always returns false. - * - * @param index position of element - * @throws UnsupportedOperationException whenever invoked - */ - @Override - public void setNull(int index) { - throw new UnsupportedOperationException("The method setNull() is not supported on UnionVector."); - } -} diff --git a/java/vector/src/main/codegen/templates/UnionViewWriter.java b/java/vector/src/main/codegen/templates/UnionViewWriter.java deleted file mode 100644 index 7b834d8b6cd86..0000000000000 --- a/java/vector/src/main/codegen/templates/UnionViewWriter.java +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionViewWriter.java" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> -import org.apache.arrow.vector.complex.writer.BaseWriter; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory; -import org.apache.arrow.vector.types.Types; - -<#function is_timestamp_tz type> - <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> - - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ -@SuppressWarnings("unused") -public class UnionViewWriter extends UnionWriter { - - public UnionViewWriter(UnionVector vector) { - this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - public UnionViewWriter(UnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) { - super(vector, nullableStructWriterFactory); - } - - @Override - public StructWriter struct() { - data.setType(idx(), MinorType.LISTVIEW); - getListWriter().setPosition(idx()); - return getListWriter().struct(); - } - - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - - private ${name}Writer ${name?uncap_first}Writer; - - <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - private ${name}Writer get${name}Writer(ArrowType arrowType) { - if (${uncappedName}Writer == null) { - ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector(arrowType)); - ${uncappedName}Writer.setPosition(idx()); - writers.add(${uncappedName}Writer); - } - return ${uncappedName}Writer; - } - - public ${name}Writer as${name}(ArrowType arrowType) { - data.setType(idx(), MinorType.${name?upper_case}); - return get${name}Writer(arrowType); - } - <#else> - private ${name}Writer get${name}Writer() { - if (${uncappedName}Writer == null) { - ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector()); - ${uncappedName}Writer.setPosition(idx()); - writers.add(${uncappedName}Writer); - } - return ${uncappedName}Writer; - } - - public ${name}Writer as${name}() { - data.setType(idx(), MinorType.${name?upper_case}); - return get${name}Writer(); - } - - - @Override - public void write(${name}Holder holder) { - data.setType(idx(), MinorType.${name?upper_case}); - <#if minor.class?starts_with("Decimal")> - ArrowType arrowType = new ArrowType.Decimal(holder.precision, holder.scale, ${name}Holder.WIDTH * 8); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, , arrowType); - <#elseif is_timestamp_tz(minor.class)> - ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType(); - ArrowType arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), holder.timezone); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write(holder); - <#elseif minor.class == "Duration"> - ArrowType arrowType = new ArrowType.Duration(holder.unit); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write(holder); - <#elseif minor.class == "FixedSizeBinary"> - ArrowType arrowType = new ArrowType.FixedSizeBinary(holder.byteWidth); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write(holder); - <#else> - get${name}Writer().setPosition(idx()); - get${name}Writer().write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, ); - - } - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, <#if minor.class?starts_with("Decimal")>, ArrowType arrowType) { - data.setType(idx(), MinorType.${name?upper_case}); - <#if minor.class?starts_with("Decimal")> - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, , arrowType); - <#elseif is_timestamp_tz(minor.class)> - ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType(); - ArrowType arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), "UTC"); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); - <#elseif minor.class == "Duration" || minor.class == "FixedSizeBinary"> - // This is expected to throw. There's nothing more that we can do here since we can't infer any - // sort of default unit for the Duration or a default width for the FixedSizeBinary types. - ArrowType arrowType = MinorType.${name?upper_case}.getType(); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); - <#else> - get${name}Writer().setPosition(idx()); - get${name}Writer().write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); - - } - <#if minor.class?starts_with("Decimal")> - public void write${name}(${friendlyType} value) { - data.setType(idx(), MinorType.${name?upper_case}); - ArrowType arrowType = new ArrowType.Decimal(value.precision(), value.scale(), ${name}Vector.TYPE_WIDTH * 8); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(value); - } - - public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType) { - data.setType(idx(), MinorType.${name?upper_case}); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).writeBigEndianBytesTo${name}(value, arrowType); - } - <#elseif minor.class?ends_with("VarBinary")> - @Override - public void write${minor.class}(byte[] value) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value); - } - - @Override - public void write${minor.class}(byte[] value, int offset, int length) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value, offset, length); - } - - @Override - public void write${minor.class}(ByteBuffer value) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value); - } - - @Override - public void write${minor.class}(ByteBuffer value, int offset, int length) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value, offset, length); - } - <#elseif minor.class?ends_with("VarChar")> - @Override - public void write${minor.class}(${friendlyType} value) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value); - } - - @Override - public void write${minor.class}(String value) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value); - } - - - - - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - - @Override - public ${capName}Writer ${lowerName}() { - data.setType(idx(), MinorType.LISTVIEW); - getListViewWriter().setPosition(idx()); - return getListViewWriter().${lowerName}(); - } - - -} diff --git a/java/vector/src/main/codegen/templates/UnionWriter.java b/java/vector/src/main/codegen/templates/UnionWriter.java deleted file mode 100644 index bfe97e2770553..0000000000000 --- a/java/vector/src/main/codegen/templates/UnionWriter.java +++ /dev/null @@ -1,517 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory; -import org.apache.arrow.vector.types.Types; - -<@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionWriter.java" /> - - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.complex.impl; - -<#include "/@includes/vv_imports.ftl" /> -import org.apache.arrow.vector.complex.writer.BaseWriter; -import org.apache.arrow.vector.types.Types.MinorType; - -<#function is_timestamp_tz type> - <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> - - - -/* - * This class is generated using freemarker and the ${.template_name} template. - */ -@SuppressWarnings("unused") -public class UnionWriter extends AbstractFieldWriter implements FieldWriter { - - protected UnionVector data; - protected StructWriter structWriter; - protected UnionListWriter listWriter; - protected UnionListViewWriter listViewWriter; - protected UnionMapWriter mapWriter; - protected List writers = new java.util.ArrayList<>(); - protected final NullableStructWriterFactory nullableStructWriterFactory; - - public UnionWriter(UnionVector vector) { - this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); - } - - public UnionWriter(UnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) { - data = vector; - this.nullableStructWriterFactory = nullableStructWriterFactory; - } - - /** - * Convert the UnionWriter to a UnionViewWriter. - * - * @return the converted UnionViewWriter - */ - public UnionViewWriter toViewWriter() { - UnionViewWriter unionViewWriter = new UnionViewWriter(data, nullableStructWriterFactory); - unionViewWriter.structWriter = structWriter; - unionViewWriter.listWriter = listWriter; - unionViewWriter.listViewWriter = listViewWriter; - unionViewWriter.mapWriter = mapWriter; - unionViewWriter.writers = writers; - unionViewWriter.setPosition(this.getPosition()); - return unionViewWriter; - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - for (BaseWriter writer : writers) { - writer.setPosition(index); - } - } - - - @Override - public void start() { - data.setType(idx(), MinorType.STRUCT); - getStructWriter().start(); - } - - @Override - public void end() { - getStructWriter().end(); - } - - @Override - public void startList() { - getListWriter().startList(); - data.setType(idx(), MinorType.LIST); - } - - @Override - public void endList() { - getListWriter().endList(); - } - - @Override - public void startListView() { - getListViewWriter().startListView(); - data.setType(idx(), MinorType.LISTVIEW); - } - - @Override - public void endListView() { - getListViewWriter().endListView(); - } - - @Override - public void startMap() { - getMapWriter().startMap(); - data.setType(idx(), MinorType.MAP); - } - - @Override - public void endMap() { - getMapWriter().endMap(); - } - - @Override - public void startEntry() { - getMapWriter().startEntry(); - } - - @Override - public MapWriter key() { - return getMapWriter().key(); - } - - @Override - public MapWriter value() { - return getMapWriter().value(); - } - - @Override - public void endEntry() { - getMapWriter().endEntry(); - } - - private StructWriter getStructWriter() { - if (structWriter == null) { - structWriter = nullableStructWriterFactory.build(data.getStruct()); - structWriter.setPosition(idx()); - writers.add(structWriter); - } - return structWriter; - } - - public StructWriter asStruct() { - data.setType(idx(), MinorType.STRUCT); - return getStructWriter(); - } - - protected ListWriter getListWriter() { - if (listWriter == null) { - listWriter = new UnionListWriter(data.getList(), nullableStructWriterFactory); - listWriter.setPosition(idx()); - writers.add(listWriter); - } - return listWriter; - } - - protected ListWriter getListViewWriter() { - if (listViewWriter == null) { - listViewWriter = new UnionListViewWriter(data.getListView(), nullableStructWriterFactory); - listViewWriter.setPosition(idx()); - writers.add(listViewWriter); - } - return listViewWriter; - } - - public ListWriter asList() { - data.setType(idx(), MinorType.LIST); - return getListWriter(); - } - - public ListWriter asListView() { - data.setType(idx(), MinorType.LISTVIEW); - return getListViewWriter(); - } - - private MapWriter getMapWriter() { - if (mapWriter == null) { - mapWriter = new UnionMapWriter(data.getMap(new ArrowType.Map(false))); - mapWriter.setPosition(idx()); - writers.add(mapWriter); - } - return mapWriter; - } - - private MapWriter getMapWriter(ArrowType arrowType) { - if (mapWriter == null) { - mapWriter = new UnionMapWriter(data.getMap(arrowType)); - mapWriter.setPosition(idx()); - writers.add(mapWriter); - } - return mapWriter; - } - - public MapWriter asMap(ArrowType arrowType) { - data.setType(idx(), MinorType.MAP); - return getMapWriter(arrowType); - } - - BaseWriter getWriter(MinorType minorType) { - return getWriter(minorType, null); - } - - BaseWriter getWriter(MinorType minorType, ArrowType arrowType) { - switch (minorType) { - case STRUCT: - return getStructWriter(); - case LIST: - return getListWriter(); - case LISTVIEW: - return getListViewWriter(); - case MAP: - return getMapWriter(arrowType); - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - case ${name?upper_case}: - <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - return get${name}Writer(arrowType); - <#else> - return get${name}Writer(); - - - - - default: - throw new UnsupportedOperationException("Unknown type: " + minorType); - } - } - <#list vv.types as type> - <#list type.minor as minor> - <#assign name = minor.class?cap_first /> - <#assign fields = minor.fields!type.fields /> - <#assign uncappedName = name?uncap_first/> - <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) /> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - - private ${name}Writer ${name?uncap_first}Writer; - - <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - private ${name}Writer get${name}Writer(ArrowType arrowType) { - if (${uncappedName}Writer == null) { - ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector(arrowType)); - ${uncappedName}Writer.setPosition(idx()); - writers.add(${uncappedName}Writer); - } - return ${uncappedName}Writer; - } - - public ${name}Writer as${name}(ArrowType arrowType) { - data.setType(idx(), MinorType.${name?upper_case}); - return get${name}Writer(arrowType); - } - <#else> - private ${name}Writer get${name}Writer() { - if (${uncappedName}Writer == null) { - ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector()); - ${uncappedName}Writer.setPosition(idx()); - writers.add(${uncappedName}Writer); - } - return ${uncappedName}Writer; - } - - public ${name}Writer as${name}() { - data.setType(idx(), MinorType.${name?upper_case}); - return get${name}Writer(); - } - - - @Override - public void write(${name}Holder holder) { - data.setType(idx(), MinorType.${name?upper_case}); - <#if minor.class?starts_with("Decimal")> - ArrowType arrowType = new ArrowType.Decimal(holder.precision, holder.scale, ${name}Holder.WIDTH * 8); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, , arrowType); - <#elseif is_timestamp_tz(minor.class)> - ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType(); - ArrowType arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), holder.timezone); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write(holder); - <#elseif minor.class == "Duration"> - ArrowType arrowType = new ArrowType.Duration(holder.unit); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write(holder); - <#elseif minor.class == "FixedSizeBinary"> - ArrowType arrowType = new ArrowType.FixedSizeBinary(holder.byteWidth); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write(holder); - <#else> - get${name}Writer().setPosition(idx()); - get${name}Writer().write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, ); - - } - - public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, <#if minor.class?starts_with("Decimal")>, ArrowType arrowType) { - data.setType(idx(), MinorType.${name?upper_case}); - <#if minor.class?starts_with("Decimal")> - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, , arrowType); - <#elseif is_timestamp_tz(minor.class)> - ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType(); - ArrowType arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), "UTC"); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); - <#elseif minor.class == "Duration" || minor.class == "FixedSizeBinary"> - // This is expected to throw. There's nothing more that we can do here since we can't infer any - // sort of default unit for the Duration or a default width for the FixedSizeBinary types. - ArrowType arrowType = MinorType.${name?upper_case}.getType(); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); - <#else> - get${name}Writer().setPosition(idx()); - get${name}Writer().write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); - - } - <#if minor.class?starts_with("Decimal")> - public void write${name}(${friendlyType} value) { - data.setType(idx(), MinorType.${name?upper_case}); - ArrowType arrowType = new ArrowType.Decimal(value.precision(), value.scale(), ${name}Vector.TYPE_WIDTH * 8); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).write${name}(value); - } - - public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType) { - data.setType(idx(), MinorType.${name?upper_case}); - get${name}Writer(arrowType).setPosition(idx()); - get${name}Writer(arrowType).writeBigEndianBytesTo${name}(value, arrowType); - } - <#elseif minor.class?ends_with("VarBinary")> - @Override - public void write${minor.class}(byte[] value) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value); - } - - @Override - public void write${minor.class}(byte[] value, int offset, int length) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value, offset, length); - } - - @Override - public void write${minor.class}(ByteBuffer value) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value); - } - - @Override - public void write${minor.class}(ByteBuffer value, int offset, int length) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value, offset, length); - } - <#elseif minor.class?ends_with("VarChar")> - @Override - public void write${minor.class}(${friendlyType} value) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value); - } - - @Override - public void write${minor.class}(String value) { - get${name}Writer().setPosition(idx()); - get${name}Writer().write${minor.class}(value); - } - - - - - - public void writeNull() { - } - - @Override - public StructWriter struct() { - data.setType(idx(), MinorType.LIST); - getListWriter().setPosition(idx()); - return getListWriter().struct(); - } - - @Override - public ListWriter list() { - data.setType(idx(), MinorType.LIST); - getListWriter().setPosition(idx()); - return getListWriter().list(); - } - - @Override - public ListWriter list(String name) { - data.setType(idx(), MinorType.STRUCT); - getStructWriter().setPosition(idx()); - return getStructWriter().list(name); - } - - @Override - public ListWriter listView() { - data.setType(idx(), MinorType.LISTVIEW); - getListViewWriter().setPosition(idx()); - return getListViewWriter().listView(); - } - - @Override - public ListWriter listView(String name) { - data.setType(idx(), MinorType.STRUCT); - getStructWriter().setPosition(idx()); - return getStructWriter().listView(name); - } - - @Override - public StructWriter struct(String name) { - data.setType(idx(), MinorType.STRUCT); - getStructWriter().setPosition(idx()); - return getStructWriter().struct(name); - } - - @Override - public MapWriter map() { - data.setType(idx(), MinorType.MAP); - getListWriter().setPosition(idx()); - return getListWriter().map(); - } - - @Override - public MapWriter map(boolean keysSorted) { - data.setType(idx(), MinorType.MAP); - getListWriter().setPosition(idx()); - return getListWriter().map(keysSorted); - } - - @Override - public MapWriter map(String name) { - data.setType(idx(), MinorType.MAP); - getStructWriter().setPosition(idx()); - return getStructWriter().map(name); - } - - @Override - public MapWriter map(String name, boolean keysSorted) { - data.setType(idx(), MinorType.MAP); - getStructWriter().setPosition(idx()); - return getStructWriter().map(name, keysSorted); - } - - <#list vv.types as type><#list type.minor as minor> - <#assign lowerName = minor.class?uncap_first /> - <#if lowerName == "int" ><#assign lowerName = "integer" /> - <#assign upperName = minor.class?upper_case /> - <#assign capName = minor.class?cap_first /> - <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - @Override - public ${capName}Writer ${lowerName}(String name) { - data.setType(idx(), MinorType.STRUCT); - getStructWriter().setPosition(idx()); - return getStructWriter().${lowerName}(name); - } - - @Override - public ${capName}Writer ${lowerName}() { - data.setType(idx(), MinorType.LIST); - getListWriter().setPosition(idx()); - return getListWriter().${lowerName}(); - } - - <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> - @Override - public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}) { - data.setType(idx(), MinorType.STRUCT); - getStructWriter().setPosition(idx()); - return getStructWriter().${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}); - } - - - - @Override - public void allocate() { - data.allocateNew(); - } - - @Override - public void clear() { - data.clear(); - } - - @Override - public void close() throws Exception { - data.close(); - } - - @Override - public Field getField() { - return data.getField(); - } - - @Override - public int getValueCapacity() { - return data.getValueCapacity(); - } -} diff --git a/java/vector/src/main/codegen/templates/ValueHolders.java b/java/vector/src/main/codegen/templates/ValueHolders.java deleted file mode 100644 index 2a2bbe81b2e74..0000000000000 --- a/java/vector/src/main/codegen/templates/ValueHolders.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -<@pp.dropOutputFile /> -<#list vv.modes as mode> -<#list vv.types as type> -<#list type.minor as minor> - -<#assign className="${mode.prefix}${minor.class}Holder" /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/holders/${className}.java" /> - -<#include "/@includes/license.ftl" /> - -package org.apache.arrow.vector.holders; - -<#include "/@includes/vv_imports.ftl" /> -/** - * Source code generated using FreeMarker template ${.template_name} - */ -public final class ${className} implements ValueHolder{ - - <#if mode.name == "Repeated"> - - /** The first index (inclusive) into the Vector. **/ - public int start; - - /** The last index (exclusive) into the Vector. **/ - public int end; - - /** The Vector holding the actual values. **/ - public ${minor.class}Vector vector; - - <#else> - - public static final int WIDTH = ${type.width}; - - <#if mode.name == "Optional">public int isSet; - <#else>public final int isSet = 1; - <#assign fields = (minor.fields!type.fields) + (minor.typeParams![]) /> - <#list fields as field> - public ${field.type} ${field.name}; - - - /** - * Reason for not supporting the operation is that ValueHolders are potential scalar - * replacements and hence we don't want any methods to be invoked on them. - */ - public int hashCode(){ - throw new UnsupportedOperationException(); - } - - /** - * Reason for not supporting the operation is that ValueHolders are potential scalar - * replacements and hence we don't want any methods to be invoked on them. - */ - public String toString(){ - throw new UnsupportedOperationException(); - } - -} - - - - \ No newline at end of file diff --git a/java/vector/src/main/java/module-info.java b/java/vector/src/main/java/module-info.java deleted file mode 100644 index 8ba1b3579e0e1..0000000000000 --- a/java/vector/src/main/java/module-info.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module org.apache.arrow.vector { - exports org.apache.arrow.vector; - exports org.apache.arrow.vector.compare; - exports org.apache.arrow.vector.compare.util; - exports org.apache.arrow.vector.complex; - exports org.apache.arrow.vector.complex.impl; - exports org.apache.arrow.vector.complex.reader; - exports org.apache.arrow.vector.complex.writer; - exports org.apache.arrow.vector.compression; - exports org.apache.arrow.vector.dictionary; - exports org.apache.arrow.vector.extension; - exports org.apache.arrow.vector.holders; - exports org.apache.arrow.vector.ipc; - exports org.apache.arrow.vector.ipc.message; - exports org.apache.arrow.vector.table; - exports org.apache.arrow.vector.types; - exports org.apache.arrow.vector.types.pojo; - exports org.apache.arrow.vector.util; - exports org.apache.arrow.vector.validate; - - opens org.apache.arrow.vector.types.pojo to - com.fasterxml.jackson.databind; - - requires com.fasterxml.jackson.annotation; - requires com.fasterxml.jackson.core; - requires com.fasterxml.jackson.databind; - requires com.fasterxml.jackson.datatype.jsr310; - requires flatbuffers.java; - requires jdk.unsupported; - requires org.apache.arrow.format; - requires org.apache.arrow.memory.core; - requires org.apache.commons.codec; - requires org.slf4j; - - uses org.apache.arrow.vector.compression.CompressionCodec.Factory; -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java b/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java deleted file mode 100644 index 0b392e14a8aa5..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.util.Preconditions; - -/** - * Tuple class containing a vector and whether it was created. - * - * @param The type of vector the result is for. - */ -public class AddOrGetResult { - private final V vector; - private final boolean created; - - /** Constructs a new object. */ - public AddOrGetResult(V vector, boolean created) { - this.vector = Preconditions.checkNotNull(vector); - this.created = created; - } - - /** Returns the vector. */ - public V getVector() { - return vector; - } - - /** Returns whether the vector is created. */ - public boolean isCreated() { - return created; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java deleted file mode 100644 index aae833c7addbc..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.vector.complex.RepeatedFixedWidthVectorLike; -import org.apache.arrow.vector.complex.RepeatedVariableWidthVectorLike; - -/** Helper utility methods for allocating storage for Vectors. */ -public class AllocationHelper { - private AllocationHelper() {} - - /** - * Allocates the vector. - * - * @param v The vector to allocate. - * @param valueCount Number of values to allocate. - * @param bytesPerValue bytes per value. - * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory. - */ - public static void allocate(ValueVector v, int valueCount, int bytesPerValue) { - allocate(v, valueCount, bytesPerValue, 5); - } - - /** - * Allocates memory for a vector assuming given number of values and their width. - * - * @param v The vector the allocate. - * @param valueCount The number of elements to allocate. - * @param bytesPerValue The bytes per value to use for allocating underlying storage - * @param childValCount If v is a repeated vector, this is number of child elements - * to allocate. - * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory. - */ - public static void allocatePrecomputedChildCount( - ValueVector v, int valueCount, int bytesPerValue, int childValCount) { - if (v instanceof FixedWidthVector) { - ((FixedWidthVector) v).allocateNew(valueCount); - } else if (v instanceof VariableWidthVector) { - ((VariableWidthVector) v).allocateNew(valueCount * bytesPerValue, valueCount); - } else if (v instanceof RepeatedFixedWidthVectorLike) { - ((RepeatedFixedWidthVectorLike) v).allocateNew(valueCount, childValCount); - } else if (v instanceof RepeatedVariableWidthVectorLike) { - ((RepeatedVariableWidthVectorLike) v) - .allocateNew(childValCount * bytesPerValue, valueCount, childValCount); - } else { - v.allocateNew(); - } - } - - /** - * Allocates memory for a vector assuming given number of values and their width. - * - * @param v The vector the allocate. - * @param valueCount The number of elements to allocate. - * @param bytesPerValue The bytes per value to use for allocating underlying storage - * @param repeatedPerTop If v is a repeated vector, this is assumed number of - * elements per child. - * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory - */ - public static void allocate( - ValueVector v, int valueCount, int bytesPerValue, int repeatedPerTop) { - allocatePrecomputedChildCount(v, valueCount, bytesPerValue, repeatedPerTop * valueCount); - } - - /** - * Allocates the exact amount if v is fixed width, otherwise falls back to dynamic allocation. - * - * @param v value vector we are trying to allocate - * @param valueCount size we are trying to allocate - * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory - */ - public static void allocateNew(ValueVector v, int valueCount) { - if (v instanceof FixedWidthVector) { - ((FixedWidthVector) v).allocateNew(valueCount); - } else if (v instanceof VariableWidthVector) { - ((VariableWidthVector) v).allocateNew(valueCount); - } else { - v.allocateNew(); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java deleted file mode 100644 index 4be55396b7492..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ /dev/null @@ -1,960 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * BaseFixedWidthVector provides an abstract interface for implementing vectors of fixed width - * values. The vectors are nullable implying that zero or more elements in the vector could be NULL. - */ -public abstract class BaseFixedWidthVector extends BaseValueVector - implements FixedWidthVector, FieldVector, VectorDefinitionSetter { - private final int typeWidth; - - protected int lastValueCapacity; - protected int actualValueCapacity; - - protected final Field field; - private int allocationMonitor; - protected ArrowBuf validityBuffer; - protected ArrowBuf valueBuffer; - protected int valueCount; - - /** - * Constructs a new instance. - * - * @param field field materialized by this vector - * @param allocator The allocator to use for allocating memory for the vector. - * @param typeWidth The width in bytes of the type. - */ - public BaseFixedWidthVector(Field field, final BufferAllocator allocator, final int typeWidth) { - super(allocator); - this.typeWidth = typeWidth; - this.field = field; - valueCount = 0; - allocationMonitor = 0; - validityBuffer = allocator.getEmpty(); - valueBuffer = allocator.getEmpty(); - lastValueCapacity = INITIAL_VALUE_ALLOCATION; - refreshValueCapacity(); - } - - public int getTypeWidth() { - return typeWidth; - } - - @Override - public String getName() { - return field.getName(); - } - - /* TODO: - * see if getNullCount() can be made faster -- O(1) - */ - - /* TODO: - * Once the entire hierarchy has been refactored, move common functions - * like getNullCount(), splitAndTransferValidityBuffer to top level - * base class BaseValueVector. - * - * Along with this, some class members (validityBuffer) can also be - * abstracted out to top level base class. - * - * Right now BaseValueVector is the top level base class for other - * vector types in ValueVector hierarchy (non-nullable) and those - * vectors have not yet been refactored/removed so moving things to - * the top class as of now is not a good idea. - */ - - /** - * Get the memory address of buffer that manages the validity (NULL or NON-NULL nature) of - * elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - /** - * Get the memory address of buffer that stores the data for elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getDataBufferAddress() { - return valueBuffer.memoryAddress(); - } - - /** - * Get the memory address of buffer that stores the offsets for elements in the vector. This - * operation is not supported for fixed-width vectors. - * - * @return starting address of the buffer - * @throws UnsupportedOperationException for fixed width vectors - */ - @Override - public long getOffsetBufferAddress() { - throw new UnsupportedOperationException("not supported for fixed-width vectors"); - } - - /** - * Get buffer that manages the validity (NULL or NON-NULL nature) of elements in the vector. - * Consider it as a buffer for internal bit vector data structure. - * - * @return buffer - */ - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - /** - * Get the buffer that stores the data for elements in the vector. - * - * @return buffer - */ - @Override - public ArrowBuf getDataBuffer() { - return valueBuffer; - } - - /** - * buffer that stores the offsets for elements in the vector. This operation is not supported for - * fixed-width vectors. - * - * @return buffer - * @throws UnsupportedOperationException for fixed width vectors - */ - @Override - public ArrowBuf getOffsetBuffer() { - throw new UnsupportedOperationException("not supported for fixed-width vectors"); - } - - /** - * Sets the desired value capacity for the vector. This function doesn't allocate any memory for - * the vector. - * - * @param valueCount desired number of elements in the vector - */ - @Override - public void setInitialCapacity(int valueCount) { - computeAndCheckBufferSize(valueCount); - lastValueCapacity = valueCount; - } - - /** - * Get the current value capacity for the vector. - * - * @return number of elements that vector can hold. - */ - @Override - public int getValueCapacity() { - return actualValueCapacity; - } - - /** Call this if you change the capacity of valueBuffer or validityBuffer. */ - protected void refreshValueCapacity() { - actualValueCapacity = Math.min(getValueBufferValueCapacity(), getValidityBufferValueCapacity()); - } - - protected int getValueBufferValueCapacity() { - return capAtMaxInt(valueBuffer.capacity() / typeWidth); - } - - protected int getValidityBufferValueCapacity() { - return capAtMaxInt(validityBuffer.capacity() * 8); - } - - /** zero out the vector and the data in associated buffers. */ - @Override - public void zeroVector() { - initValidityBuffer(); - initValueBuffer(); - } - - /* zero out the validity buffer */ - private void initValidityBuffer() { - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - /* zero out the data buffer */ - private void initValueBuffer() { - valueBuffer.setZero(0, valueBuffer.capacity()); - } - - /** - * Reset the vector to initial state. Same as {@link #zeroVector()}. Note that this method doesn't - * release any memory. - */ - @Override - public void reset() { - valueCount = 0; - zeroVector(); - } - - /** Close the vector and release the associated buffers. */ - @Override - public void close() { - clear(); - } - - /** Same as {@link #close()}. */ - @Override - public void clear() { - valueCount = 0; - validityBuffer = releaseBuffer(validityBuffer); - valueBuffer = releaseBuffer(valueBuffer); - refreshValueCapacity(); - } - - /* used to step down the memory allocation */ - protected void incrementAllocationMonitor() { - if (allocationMonitor < 0) { - allocationMonitor = 0; - } - allocationMonitor++; - } - - /* used to step up the memory allocation */ - protected void decrementAllocationMonitor() { - if (allocationMonitor > 0) { - allocationMonitor = 0; - } - allocationMonitor--; - } - - /** Same as {@link #allocateNewSafe()}. */ - @Override - public void allocateNew() { - allocateNew(lastValueCapacity); - } - - /** - * Allocate memory for the vector. We internally use a default value count of 4096 to allocate - * memory for at least these many elements in the vector. See {@link #allocateNew(int)} for - * allocating memory for specific number of elements in the vector. - * - * @return false if memory allocation fails, true otherwise. - */ - @Override - public boolean allocateNewSafe() { - try { - allocateNew(lastValueCapacity); - return true; - } catch (Exception e) { - return false; - } - } - - /** - * Allocate memory for the vector to support storing at least the provided number of elements in - * the vector. This method must be called prior to using the ValueVector. - * - * @param valueCount the desired number of elements in the vector - * @throws org.apache.arrow.memory.OutOfMemoryException on error - */ - @Override - public void allocateNew(int valueCount) { - computeAndCheckBufferSize(valueCount); - - /* we are doing a new allocation -- release the current buffers */ - clear(); - - try { - allocateBytes(valueCount); - } catch (Exception e) { - clear(); - throw e; - } - } - - /* - * Compute the buffer size required for 'valueCount', and check if it's within bounds. - */ - private long computeAndCheckBufferSize(int valueCount) { - final long size = computeCombinedBufferSize(valueCount, typeWidth); - if (size > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException( - "Memory required for vector capacity " - + valueCount - + " is (" - + size - + "), which is more than max allowed (" - + MAX_ALLOCATION_SIZE - + ")"); - } - return size; - } - - /** - * Actual memory allocation is done by this function. All the calculations and knowledge about - * what size to allocate is upto the callers of this method. Callers appropriately handle errors - * if memory allocation fails here. Callers should also take care of determining that desired size - * is within the bounds of max allocation allowed and any other error conditions. - */ - private void allocateBytes(int valueCount) { - DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount, typeWidth); - valueBuffer = buffers.getDataBuf(); - validityBuffer = buffers.getValidityBuf(); - zeroVector(); - - refreshValueCapacity(); - lastValueCapacity = getValueCapacity(); - } - - /** - * During splitAndTransfer, if we splitting from a random position within a byte, we can't just - * slice the source buffer so we have to explicitly allocate the validityBuffer of the target - * vector. This is unlike the databuffer which we can always slice for the target vector. - */ - private void allocateValidityBuffer(final int validityBufferSize) { - validityBuffer = allocator.buffer(validityBufferSize); - validityBuffer.readerIndex(0); - refreshValueCapacity(); - } - - /** - * Get the potential buffer size for a particular number of records. - * - * @param count desired number of elements in the vector - * @return estimated size of underlying buffers if the vector holds a given number of elements - */ - @Override - public int getBufferSizeFor(final int count) { - if (count == 0) { - return 0; - } - return (count * typeWidth) + getValidityBufferSizeFromCount(count); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - return (valueCount * typeWidth) + getValidityBufferSizeFromCount(valueCount); - } - - /** - * Get information about how this field is materialized. - * - * @return the field corresponding to this vector - */ - @Override - public Field getField() { - return field; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer so it only should be used for in-context access. Also note - * that this buffer changes regularly thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning; the buffers will still be refcounted but - * the returned array will be the only reference to them - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - final ArrowBuf[] buffers; - setReaderAndWriterIndex(); - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - buffers = new ArrowBuf[2]; - buffers[0] = validityBuffer; - buffers[1] = valueBuffer; - } - if (clear) { - for (final ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(1); - } - clear(); - } - return buffers; - } - - /** - * Resize the vector to increase the capacity. The internal behavior is to double the current - * value capacity. - */ - @Override - public void reAlloc() { - int targetValueCount = getValueCapacity() * 2; - if (targetValueCount == 0) { - if (lastValueCapacity > 0) { - targetValueCount = lastValueCapacity; - } else { - targetValueCount = INITIAL_VALUE_ALLOCATION * 2; - } - } - computeAndCheckBufferSize(targetValueCount); - - DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetValueCount, typeWidth); - final ArrowBuf newValueBuffer = buffers.getDataBuf(); - newValueBuffer.setBytes(0, valueBuffer, 0, valueBuffer.capacity()); - newValueBuffer.setZero( - valueBuffer.capacity(), newValueBuffer.capacity() - valueBuffer.capacity()); - valueBuffer.getReferenceManager().release(); - valueBuffer = newValueBuffer; - - final ArrowBuf newValidityBuffer = buffers.getValidityBuf(); - newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); - newValidityBuffer.setZero( - validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); - validityBuffer.getReferenceManager().release(); - validityBuffer = newValidityBuffer; - - refreshValueCapacity(); - lastValueCapacity = getValueCapacity(); - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - /** - * Initialize the children in schema for this Field. This operation is a NO-OP for scalar types - * since they don't have any children. - * - * @param children the schema - * @throws IllegalArgumentException if children is a non-empty list for scalar types. - */ - @Override - public void initializeChildrenFromFields(List children) { - if (!children.isEmpty()) { - throw new IllegalArgumentException("primitive type vector cannot have children"); - } - } - - /** - * Get the inner child vectors. - * - * @return list of child vectors for complex types, empty list for scalar vector types - */ - @Override - public List getChildrenFromFields() { - return Collections.emptyList(); - } - - /** - * Load the buffers of this vector with provided source buffers. The caller manages the source - * buffers and populates them before invoking this method. - * - * @param fieldNode the fieldNode indicating the value count - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 2) { - throw new IllegalArgumentException( - "Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size()); - } - - ArrowBuf bitBuffer = ownBuffers.get(0); - ArrowBuf dataBuffer = ownBuffers.get(1); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - valueBuffer.getReferenceManager().release(); - valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator); - refreshValueCapacity(); - - valueCount = fieldNode.getLength(); - } - - /** - * Get the buffers belonging to this vector. - * - * @return the inner buffers. - */ - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(2); - setReaderAndWriterIndex(); - result.add(validityBuffer); - result.add(valueBuffer); - - return result; - } - - /** Set the reader and writer indexes for the inner buffers. */ - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - valueBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - valueBuffer.writerIndex(0); - } else { - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - if (typeWidth == 0) { - /* specialized handling for BitVector */ - valueBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - } else { - valueBuffer.writerIndex((long) valueCount * typeWidth); - } - } - } - - /** Validate the scalar values held by this vector. */ - public void validateScalars() { - // No validation by default. - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @param callBack not used - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(ref, allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param field The field materialized by this vector. - * @param allocator allocator for the target vector - * @param callBack not used - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(field, allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(getName(), allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); - - /** - * Transfer this vector's data to another vector. The memory associated with this vector is - * transferred to the allocator of target vector for accounting and management purposes. - * - * @param target destination vector for transfer - */ - public void transferTo(BaseFixedWidthVector target) { - compareTypes(target, "transferTo"); - target.clear(); - target.validityBuffer = transferBuffer(validityBuffer, target.allocator); - target.valueBuffer = transferBuffer(valueBuffer, target.allocator); - target.valueCount = valueCount; - target.refreshValueCapacity(); - clear(); - } - - /** - * Slice this vector at desired index and length and transfer the corresponding data to the target - * vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - * @param target destination vector - */ - public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector target) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - compareTypes(target, "splitAndTransferTo"); - target.clear(); - splitAndTransferValidityBuffer(startIndex, length, target); - splitAndTransferValueBuffer(startIndex, length, target); - target.setValueCount(length); - } - - /** Data buffer can always be split and transferred using slicing. */ - private void splitAndTransferValueBuffer( - int startIndex, int length, BaseFixedWidthVector target) { - final int startPoint = startIndex * typeWidth; - final int sliceLength = length * typeWidth; - final ArrowBuf slicedBuffer = valueBuffer.slice(startPoint, sliceLength); - target.valueBuffer = transferBuffer(slicedBuffer, target.allocator); - target.refreshValueCapacity(); - } - - /** - * Validity buffer has multiple cases of split and transfer depending on the starting position of - * the source index. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, BaseFixedWidthVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - /* slice */ - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); - target.refreshValueCapacity(); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - /*----------------------------------------------------------------* - | | - | common getters and setters | - | | - *----------------------------------------------------------------*/ - - /** - * Get the number of elements that are null in the vector. - * - * @return the number of null elements. - */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** - * Get the value count of vector. This will always be zero unless {@link #setValueCount(int)} has - * been called prior to calling this. - * - * @return valueCount for the vector - */ - @Override - public int getValueCount() { - return valueCount; - } - - /** - * Set value count for the vector. - * - * @param valueCount value count to set - */ - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - final int currentValueCapacity = getValueCapacity(); - while (valueCount > getValueCapacity()) { - reAlloc(); - } - /* - * We are trying to understand the pattern of memory allocation. - * If initially, the user did vector.allocateNew(), we would have - * allocated memory of default size (4096 * type width). - * Later on user invokes setValueCount(count). - * - * If the existing value capacity is twice as large as the - * valueCount, we know that we over-provisioned memory in the - * first place when default memory allocation was done because user - * really needs a much less value count in the vector. - * - * We record this by bumping up the allocationMonitor. If this pattern - * happens for certain number of times and allocationMonitor - * reaches the threshold (internal hardcoded) value, subsequent - * call to allocateNew() will take care of stepping down the - * default memory allocation size. - * - * Another case would be under-provisioning the initial memory and - * thus going through a lot of realloc(). Here the goal is to - * see if we can minimize the number of reallocations. Again the - * state is recorded in allocationMonitor by decrementing it - * (negative value). If a threshold is hit, realloc will try to - * allocate more memory in order to possibly avoid a future realloc. - * This case is also applicable to setSafe() methods which can trigger - * a realloc() and thus we record the state there as well. - */ - if (valueCount > 0) { - if (currentValueCapacity >= (valueCount * 2)) { - incrementAllocationMonitor(); - } else if (currentValueCapacity <= (valueCount / 2)) { - decrementAllocationMonitor(); - } - } - setReaderAndWriterIndex(); - } - - /** - * Check if the given index is within the current value capacity of the vector. - * - * @param index position to check - * @return true if index is within the current value capacity - */ - public boolean isSafe(int index) { - return index < getValueCapacity(); - } - - /** - * Check if element at given index is null. - * - * @param index position of element - * @return true if element at given index is null, false otherwise - */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** - * Same as {@link #isNull(int)}. - * - * @param index position of element - * @return 1 if element at given index is not null, 0 otherwise - */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Mark the particular position in the vector as non-null. - * - * @param index position of the element. - */ - @Override - public void setIndexDefined(int index) { - handleSafe(index); - BitVectorHelper.setBit(validityBuffer, index); - } - - public void set(int index, byte[] value, int start, int length) { - throw new UnsupportedOperationException(); - } - - public void setSafe(int index, byte[] value, int start, int length) { - throw new UnsupportedOperationException(); - } - - public void set(int index, ByteBuffer value, int start, int length) { - throw new UnsupportedOperationException(); - } - - public void setSafe(int index, ByteBuffer value, int start, int length) { - throw new UnsupportedOperationException(); - } - - /*----------------------------------------------------------------* - | | - | helper methods for setters | - | | - *----------------------------------------------------------------*/ - - protected void handleSafe(int index) { - while (index >= getValueCapacity()) { - decrementAllocationMonitor(); - reAlloc(); - } - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. The source vector should be of the same type as this one. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - if (from.isNull(fromIndex)) { - BitVectorHelper.unsetBit(this.getValidityBuffer(), thisIndex); - } else { - BitVectorHelper.setBit(this.getValidityBuffer(), thisIndex); - MemoryUtil.copyMemory( - from.getDataBuffer().memoryAddress() + (long) fromIndex * typeWidth, - this.getDataBuffer().memoryAddress() + (long) thisIndex * typeWidth, - typeWidth); - } - } - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - handleSafe(thisIndex); - copyFrom(fromIndex, thisIndex, from); - } - - /** - * Set the element at the given index to null. - * - * @param index position of element - */ - @Override - public void setNull(int index) { - handleSafe(index); - // not really needed to set the bit to 0 as long as - // the buffer always starts from 0. - BitVectorHelper.unsetBit(validityBuffer, index); - } - - @Override - public ArrowBufPointer getDataPointer(int index) { - return getDataPointer(index, new ArrowBufPointer()); - } - - @Override - public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { - if (isNull(index)) { - reuse.set(null, 0, 0); - } else { - reuse.set(valueBuffer, (long) index * typeWidth, typeWidth); - } - return reuse; - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isNull(index)) { - return ArrowBufPointer.NULL_HASH_CODE; - } - long start = (long) typeWidth * index; - long end = (long) typeWidth * (index + 1); - return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end); - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java deleted file mode 100644 index 0adba35437296..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -/** Interface for all int type vectors. */ -public interface BaseIntVector extends FieldVector { - - /** - * Sets the value at index, note this value may need to be truncated. Note this is safe version - * (i.e. call setSafe(int, ...) method in vector) - */ - void setWithPossibleTruncate(int index, long value); - - /** - * Sets the value at index, note this value may need to be truncated. Note this is unsafe version - * (i.e. call set(int, ...) method in vector) - */ - void setUnsafeWithPossibleTruncate(int index, long value); - - /** - * Gets the value at index. This value may have been extended to long and will throw {@link - * NullPointerException} if the value is null. Note null check could be turned off via {@link - * NullCheckingForGet}. - */ - long getValueAsLong(int index); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java deleted file mode 100644 index 552a896ea8c36..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java +++ /dev/null @@ -1,1450 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * BaseLargeVariableWidthVector is a base class providing functionality for large strings/large - * bytes types. - */ -public abstract class BaseLargeVariableWidthVector extends BaseValueVector - implements VariableWidthFieldVector { - private static final int DEFAULT_RECORD_BYTE_COUNT = 12; - private static final int INITIAL_BYTE_COUNT = - INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; - private int lastValueCapacity; - private long lastValueAllocationSizeInBytes; - - /* protected members */ - public static final int OFFSET_WIDTH = 8; /* 8 byte unsigned int to track offsets */ - protected static final byte[] emptyByteArray = new byte[] {}; - protected ArrowBuf validityBuffer; - protected ArrowBuf valueBuffer; - protected ArrowBuf offsetBuffer; - protected int valueCount; - protected int lastSet; - protected final Field field; - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for creating/resizing buffers - */ - public BaseLargeVariableWidthVector(Field field, final BufferAllocator allocator) { - super(allocator); - this.field = field; - lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; - // -1 because we require one extra slot for the offset array. - lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1; - valueCount = 0; - lastSet = -1; - offsetBuffer = allocator.getEmpty(); - validityBuffer = allocator.getEmpty(); - valueBuffer = allocator.getEmpty(); - } - - @Override - public String getName() { - return field.getName(); - } - - /** - * Get buffer that manages the validity (NULL or NON-NULL nature) of elements in the vector. - * Consider it as a buffer for internal bit vector data structure. - * - * @return buffer - */ - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - /** - * Get the buffer that stores the data for elements in the vector. - * - * @return buffer - */ - @Override - public ArrowBuf getDataBuffer() { - return valueBuffer; - } - - /** - * buffer that stores the offsets for elements in the vector. This operation is not supported for - * fixed-width vectors. - * - * @return buffer - */ - @Override - public ArrowBuf getOffsetBuffer() { - return offsetBuffer; - } - - /** - * Get the memory address of buffer that stores the offsets for elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getOffsetBufferAddress() { - return offsetBuffer.memoryAddress(); - } - - /** - * Get the memory address of buffer that manages the validity (NULL or NON-NULL nature) of - * elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - /** - * Get the memory address of buffer that stores the data for elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getDataBufferAddress() { - return valueBuffer.memoryAddress(); - } - - /** - * Sets the desired value capacity for the vector. This function doesn't allocate any memory for - * the vector. - * - * @param valueCount desired number of elements in the vector - */ - @Override - public void setInitialCapacity(int valueCount) { - final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT; - checkDataBufferSize(size); - computeAndCheckOffsetsBufferSize(valueCount); - lastValueAllocationSizeInBytes = size; - lastValueCapacity = valueCount; - } - - /** - * Sets the desired value capacity for the vector. This function doesn't allocate any memory for - * the vector. - * - * @param valueCount desired number of elements in the vector - * @param density average number of bytes per variable width element - */ - @Override - public void setInitialCapacity(int valueCount, double density) { - long size = Math.max((long) (valueCount * density), 1L); - checkDataBufferSize(size); - computeAndCheckOffsetsBufferSize(valueCount); - lastValueAllocationSizeInBytes = size; - lastValueCapacity = valueCount; - } - - /** - * Get the density of this ListVector. - * - * @return density - */ - public double getDensity() { - if (valueCount == 0) { - return 0.0D; - } - final long startOffset = getStartOffset(0); - final long endOffset = getStartOffset(valueCount); - final double totalListSize = endOffset - startOffset; - return totalListSize / valueCount; - } - - /** - * Get the current capacity which does not exceed either validity buffer or offset buffer. Note: - * Here the `getValueCapacity` has no relationship with the value buffer. - * - * @return number of elements that vector can hold. - */ - @Override - public int getValueCapacity() { - final long offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); - return capAtMaxInt(Math.min(offsetValueCapacity, getValidityBufferValueCapacity())); - } - - private long getValidityBufferValueCapacity() { - return validityBuffer.capacity() * 8; - } - - private long getOffsetBufferValueCapacity() { - return offsetBuffer.capacity() / OFFSET_WIDTH; - } - - /** zero out the vector and the data in associated buffers. */ - public void zeroVector() { - initValidityBuffer(); - initOffsetBuffer(); - valueBuffer.setZero(0, valueBuffer.capacity()); - } - - /* zero out the validity buffer */ - private void initValidityBuffer() { - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - /* zero out the offset buffer */ - private void initOffsetBuffer() { - offsetBuffer.setZero(0, offsetBuffer.capacity()); - } - - /** - * Reset the vector to initial state. Same as {@link #zeroVector()}. Note that this method doesn't - * release any memory. - */ - @Override - public void reset() { - zeroVector(); - lastSet = -1; - valueCount = 0; - } - - /** Close the vector and release the associated buffers. */ - @Override - public void close() { - clear(); - } - - /** Same as {@link #close()}. */ - @Override - public void clear() { - validityBuffer = releaseBuffer(validityBuffer); - valueBuffer = releaseBuffer(valueBuffer); - offsetBuffer = releaseBuffer(offsetBuffer); - lastSet = -1; - valueCount = 0; - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Override - @Deprecated - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - /** - * Initialize the children in schema for this Field. This operation is a NO-OP for scalar types - * since they don't have any children. - * - * @param children the schema - * @throws IllegalArgumentException if children is a non-empty list for scalar types. - */ - @Override - public void initializeChildrenFromFields(List children) { - if (!children.isEmpty()) { - throw new IllegalArgumentException("primitive type vector cannot have children"); - } - } - - /** - * Get the inner child vectors. - * - * @return list of child vectors for complex types, empty list for scalar vector types - */ - @Override - public List getChildrenFromFields() { - return Collections.emptyList(); - } - - /** - * Load the buffers of this vector with provided source buffers. The caller manages the source - * buffers and populates them before invoking this method. - * - * @param fieldNode the fieldNode indicating the value count - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - ArrowBuf bitBuffer = ownBuffers.get(0); - ArrowBuf offBuffer = ownBuffers.get(1); - ArrowBuf dataBuffer = ownBuffers.get(2); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); - valueBuffer.getReferenceManager().release(); - valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator); - - lastSet = fieldNode.getLength() - 1; - valueCount = fieldNode.getLength(); - } - - /** - * Get the buffers belonging to this vector. - * - * @return the inner buffers. - */ - @Override - public List getFieldBuffers() { - // before flight/IPC, we must bring the vector to a consistent state. - // this is because, it is possible that the offset buffers of some trailing values - // are not updated. this may cause some data in the data buffer being lost. - // for details, please see TestValueVector#testUnloadVariableWidthVector. - fillHoles(valueCount); - - List result = new ArrayList<>(3); - setReaderAndWriterIndex(); - result.add(validityBuffer); - result.add(offsetBuffer); - result.add(valueBuffer); - - return result; - } - - /** - * Export the buffers of the fields for C Data Interface. This method traverse the buffers and - * export buffer and buffer's memory address into a list of buffers and a pointer to the list of - * buffers. - */ - @Override - public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - // before flight/IPC, we must bring the vector to a consistent state. - // this is because, it is possible that the offset buffers of some trailing values - // are not updated. this may cause some data in the data buffer being lost. - // for details, please see TestValueVector#testUnloadVariableWidthVector. - fillHoles(valueCount); - - exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); - - if (offsetBuffer.capacity() == 0) { - // Empty offset buffer is allowed for historical reason. - // To export it through C Data interface, we need to allocate a buffer with one offset. - // We set `retain = false` to explicitly not increase the ref count for the exported buffer. - // The ref count of the newly created buffer (i.e., 1) already represents the usage - // at imported side. - exportBuffer(allocateOffsetBuffer(OFFSET_WIDTH), buffers, buffersPtr, nullValue, false); - } else { - exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true); - } - - exportBuffer(valueBuffer, buffers, buffersPtr, nullValue, true); - } - - /** Set the reader and writer indexes for the inner buffers. */ - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - offsetBuffer.readerIndex(0); - valueBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); - valueBuffer.writerIndex(0); - } else { - final long lastDataOffset = getStartOffset(valueCount); - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); - valueBuffer.writerIndex(lastDataOffset); - } - } - - /** Same as {@link #allocateNewSafe()}. */ - @Override - public void allocateNew() { - allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); - } - - /** - * Allocate memory for the vector. We internally use a default value count of 4096 to allocate - * memory for at least these many elements in the vector. See {@link #allocateNew(long, int)} for - * allocating memory for specific number of elements in the vector. - * - * @return false if memory allocation fails, true otherwise. - */ - @Override - public boolean allocateNewSafe() { - try { - allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); - return true; - } catch (Exception e) { - return false; - } - } - - /** - * Allocate memory for the vector to support storing at least the provided number of elements in - * the vector. This method must be called prior to using the ValueVector. - * - * @param totalBytes desired total memory capacity - * @param valueCount the desired number of elements in the vector - * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation fails - */ - @Override - public void allocateNew(long totalBytes, int valueCount) { - assert totalBytes >= 0; - - checkDataBufferSize(totalBytes); - computeAndCheckOffsetsBufferSize(valueCount); - - /* we are doing a new allocation -- release the current buffers */ - clear(); - - try { - allocateBytes(totalBytes, valueCount); - } catch (Exception e) { - clear(); - throw e; - } - } - - @Override - public void allocateNew(int valueCount) { - allocateNew(lastValueAllocationSizeInBytes, valueCount); - } - - /* Check if the data buffer size is within bounds. */ - private void checkDataBufferSize(long size) { - if (size > MAX_ALLOCATION_SIZE || size < 0) { - throw new OversizedAllocationException( - "Memory required for vector " - + " is (" - + size - + "), which is more than max allowed (" - + MAX_ALLOCATION_SIZE - + ")"); - } - } - - /** - * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's - * within bounds. - */ - private long computeAndCheckOffsetsBufferSize(int valueCount) { - /* to track the end offset of last data element in vector, we need - * an additional slot in offset buffer. - */ - final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH); - if (size > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException( - "Memory required for vector capacity " - + valueCount - + " is (" - + size - + "), which is more than max allowed (" - + MAX_ALLOCATION_SIZE - + ")"); - } - return size; - } - - /* allocate the inner buffers */ - private void allocateBytes(final long valueBufferSize, final int valueCount) { - /* allocate data buffer */ - long curSize = valueBufferSize; - valueBuffer = allocator.buffer(curSize); - valueBuffer.readerIndex(0); - - /* allocate offset buffer and validity buffer */ - DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH); - offsetBuffer = buffers.getDataBuf(); - validityBuffer = buffers.getValidityBuf(); - initOffsetBuffer(); - initValidityBuffer(); - - lastValueCapacity = getValueCapacity(); - lastValueAllocationSizeInBytes = capAtMaxInt(valueBuffer.capacity()); - } - - /* allocate offset buffer */ - private ArrowBuf allocateOffsetBuffer(final long size) { - ArrowBuf offsetBuffer = allocator.buffer(size); - offsetBuffer.readerIndex(0); - initOffsetBuffer(); - return offsetBuffer; - } - - /* allocate validity buffer */ - private void allocateValidityBuffer(final long size) { - validityBuffer = allocator.buffer(size); - validityBuffer.readerIndex(0); - initValidityBuffer(); - } - - /** - * Resize the vector to increase the capacity. The internal behavior is to double the current - * value capacity. - */ - @Override - public void reAlloc() { - reallocDataBuffer(); - reallocValidityAndOffsetBuffers(); - } - - /** - * Reallocate the data buffer. Data Buffer stores the actual data for LARGEVARCHAR or - * LARGEVARBINARY elements in the vector. The behavior is to double the size of buffer. - * - * @throws OversizedAllocationException if the desired new size is more than max allowed - * @throws OutOfMemoryException if the internal memory allocation fails - */ - public void reallocDataBuffer() { - final long currentBufferCapacity = valueBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (lastValueAllocationSizeInBytes > 0) { - newAllocationSize = lastValueAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_BYTE_COUNT * 2; - } - } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - checkDataBufferSize(newAllocationSize); - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity); - valueBuffer.getReferenceManager().release(); - valueBuffer = newBuf; - lastValueAllocationSizeInBytes = valueBuffer.capacity(); - } - - /** - * Reallocate the validity and offset buffers for this vector. Validity buffer is used to track - * the NULL or NON-NULL nature of elements in the vector and offset buffer is used to store the - * lengths of variable width elements in the vector. - * - *

    Note that data buffer for variable length vectors moves independent of the companion - * validity and offset buffers. This is in contrast to what we have for fixed width vectors. - * - *

    So even though we may have setup an initial capacity of 1024 elements in the vector, it is - * quite possible that we need to reAlloc() the data buffer when we are setting the 5th element in - * the vector simply because previous variable length elements have exhausted the buffer capacity. - * However, we really don't need to reAlloc() validity and offset buffers until we try to set the - * 1025th element This is why we do a separate check for safe methods to determine which buffer - * needs reallocation. - * - * @throws OversizedAllocationException if the desired new size is more than max allowed - * @throws OutOfMemoryException if the internal memory allocation fails - */ - public void reallocValidityAndOffsetBuffers() { - int targetOffsetCount = capAtMaxInt((offsetBuffer.capacity() / OFFSET_WIDTH) * 2); - if (targetOffsetCount == 0) { - if (lastValueCapacity > 0) { - targetOffsetCount = (lastValueCapacity + 1); - } else { - targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1); - } - } - computeAndCheckOffsetsBufferSize(targetOffsetCount); - - DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH); - final ArrowBuf newOffsetBuffer = buffers.getDataBuf(); - newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); - newOffsetBuffer.setZero( - offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity()); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = newOffsetBuffer; - - final ArrowBuf newValidityBuffer = buffers.getValidityBuf(); - newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); - newValidityBuffer.setZero( - validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); - validityBuffer.getReferenceManager().release(); - validityBuffer = newValidityBuffer; - - lastValueCapacity = getValueCapacity(); - } - - /** - * Get the size (number of bytes) of underlying data buffer. - * - * @return number of bytes in the data buffer - */ - @Override - public int getByteCapacity() { - return capAtMaxInt(valueBuffer.capacity()); - } - - @Override - public int sizeOfValueBuffer() { - if (valueCount == 0) { - return 0; - } - return capAtMaxInt(getStartOffset(valueCount)); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - return getBufferSizeFor(this.valueCount); - } - - /** - * Get the potential buffer size for a particular number of records. - * - * @param valueCount desired number of elements in the vector - * @return estimated size of underlying buffers if the vector holds a given number of elements - */ - @Override - public int getBufferSizeFor(final int valueCount) { - if (valueCount == 0) { - return 0; - } - - final long validityBufferSize = getValidityBufferSizeFromCount(valueCount); - final long offsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH; - /* get the end offset for this valueCount */ - final long dataBufferSize = getStartOffset(valueCount); - return capAtMaxInt(validityBufferSize + offsetBufferSize + dataBufferSize); - } - - /** - * Get information about how this field is materialized. - * - * @return the field corresponding to this vector - */ - @Override - public Field getField() { - return field; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer so it only should be used for in-context access. Also note - * that this buffer changes regularly thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning; the buffers will still be refcounted but - * the returned array will be the only reference to them - * @return The underlying {@link io.netty.buffer.ArrowBuf buffers} that is used by this vector - * instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - final ArrowBuf[] buffers; - setReaderAndWriterIndex(); - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - buffers = new ArrowBuf[3]; - buffers[0] = validityBuffer; - buffers[1] = offsetBuffer; - buffers[2] = valueBuffer; - } - if (clear) { - for (final ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - /** Validate the scalar values held by this vector. */ - public void validateScalars() { - // No validation by default. - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @param callBack not used - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(ref, allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param field The field materialized by this vector - * @param allocator allocator for the target vector - * @param callBack not used - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(field, allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(getName(), allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param field The field materialized by this vector - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); - - /** - * Transfer this vector's data to another vector. The memory associated with this vector is - * transferred to the allocator of target vector for accounting and management purposes. - * - * @param target destination vector for transfer - */ - public void transferTo(BaseLargeVariableWidthVector target) { - compareTypes(target, "transferTo"); - target.clear(); - target.validityBuffer = transferBuffer(validityBuffer, target.allocator); - target.valueBuffer = transferBuffer(valueBuffer, target.allocator); - target.offsetBuffer = transferBuffer(offsetBuffer, target.allocator); - target.setLastSet(this.lastSet); - if (this.valueCount > 0) { - target.setValueCount(this.valueCount); - } - clear(); - } - - /** - * Slice this vector at desired index and length and transfer the corresponding data to the target - * vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - * @param target destination vector - */ - public void splitAndTransferTo(int startIndex, int length, BaseLargeVariableWidthVector target) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - compareTypes(target, "splitAndTransferTo"); - target.clear(); - if (length > 0) { - splitAndTransferValidityBuffer(startIndex, length, target); - splitAndTransferOffsetBuffer(startIndex, length, target); - target.setLastSet(length - 1); - target.setValueCount(length); - } - } - - /** - * Transfer the offsets along with data. Unlike the data buffer, we cannot simply slice the offset - * buffer for split and transfer. The reason is that offsets in the target vector have to be - * adjusted and made relative to the staring offset in source vector from the start index of - * split. This is why, we need to explicitly allocate the offset buffer and set the adjusted - * offsets in the target vector. - */ - private void splitAndTransferOffsetBuffer( - int startIndex, int length, BaseLargeVariableWidthVector target) { - final long start = getStartOffset(startIndex); - final long end = getStartOffset(startIndex + length); - final long dataLength = end - start; - target.offsetBuffer = target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH); - for (int i = 0; i < length + 1; i++) { - final long relativeSourceOffset = getStartOffset(startIndex + i) - start; - target.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeSourceOffset); - } - final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength); - target.valueBuffer = transferBuffer(slicedBuffer, target.allocator); - } - - /* - * Transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, BaseLargeVariableWidthVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - /*----------------------------------------------------------------* - | | - | common getters and setters | - | | - *----------------------------------------------------------------*/ - - /** - * Get the number of elements that are null in the vector. - * - * @return the number of null elements. - */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** - * Check if the given index is within the current value capacity of the vector. - * - * @param index position to check - * @return true if index is within the current value capacity - */ - public boolean isSafe(int index) { - return index < getValueCapacity(); - } - - /** - * Check if element at given index is null. - * - * @param index position of element - * @return true if element at given index is null - */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** - * Same as {@link #isNull(int)}. - * - * @param index position of element - * @return 1 if element at given index is not null, 0 otherwise - */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Get the value count of vector. This will always be zero unless setValueCount(int) has been - * called prior to calling this. - * - * @return valueCount for the vector - */ - @Override - public int getValueCount() { - return valueCount; - } - - /** - * Sets the value count for the vector. - * - * @param valueCount value count - */ - @Override - public void setValueCount(int valueCount) { - assert valueCount >= 0; - this.valueCount = valueCount; - while (valueCount > getValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - fillHoles(valueCount); - lastSet = valueCount - 1; - setReaderAndWriterIndex(); - } - - /** - * Create holes in the vector upto the given index (exclusive). Holes will be created from the - * current last set position in the vector. - * - * @param index target index - */ - @Override - public void fillEmpties(int index) { - handleSafe(index, emptyByteArray.length); - fillHoles(index); - lastSet = index - 1; - } - - /** - * Set the index of last non-null element in the vector. It is important to call this method with - * appropriate value before calling {@link #setValueCount(int)}. - * - * @param value desired index of last non-null element. - */ - @Override - public void setLastSet(int value) { - lastSet = value; - } - - /** - * Get the index of last non-null element in the vector. - * - * @return index of the last non-null element - */ - @Override - public int getLastSet() { - return lastSet; - } - - /** - * Mark the particular position in the vector as non-null. - * - * @param index position of the element. - */ - @Override - public void setIndexDefined(int index) { - // We need to check and realloc both validity and offset buffer - while (index >= getValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - BitVectorHelper.setBit(validityBuffer, index); - } - - /** - * Sets the value length for an element. - * - * @param index position of the element to set - * @param length length of the element - */ - public void setValueLengthSafe(int index, int length) { - assert index >= 0; - handleSafe(index, length); - fillHoles(index); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - lastSet = index; - } - - /** - * Get the variable length element at specified index as Text. - * - * @param index position of element to get - * @return greater than 0 length for non-null element, 0 otherwise - */ - @Override - public int getValueLength(int index) { - assert index >= 0; - if (isSet(index) == 0) { - return 0; - } - final long startOffset = getStartOffset(index); - final int dataLength = (int) (getEndOffset(index) - startOffset); - return dataLength; - } - - /** - * Set the variable length element at the specified index to the supplied byte array. This is same - * as using {@link #set(int, byte[], int, int)} with start as 0 and length as value.length - * - * @param index position of the element to set - * @param value array of bytes to write - */ - @Override - public void set(int index, byte[] value) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, 0, value.length); - lastSet = index; - } - - /** - * Same as {@link #set(int, byte[])} except that it handles the case where index and length of new - * element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value array of bytes to write - */ - @Override - public void setSafe(int index, byte[] value) { - assert index >= 0; - handleSafe(index, value.length); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, 0, value.length); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the supplied byte array. - * - * @param index position of the element to set - * @param value array of bytes to write - * @param start start index in array of bytes - * @param length length of data in array of bytes - */ - @Override - public void set(int index, byte[] value, int start, int length) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, byte[], int, int)} except that it handles the case where index and - * length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value array of bytes to write - * @param start start index in array of bytes - * @param length length of data in array of bytes - */ - public void setSafe(int index, byte[] value, int start, int length) { - assert index >= 0; - handleSafe(index, length); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, start, length); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the content in supplied ByteBuffer. - * - * @param index position of the element to set - * @param value ByteBuffer with data - * @param start start index in ByteBuffer - * @param length length of data in ByteBuffer - */ - @Override - public void set(int index, ByteBuffer value, int start, int length) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - valueBuffer.setBytes(startOffset, value, start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the case where index and - * length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value ByteBuffer with data - * @param start start index in ByteBuffer - * @param length length of data in ByteBuffer - */ - public void setSafe(int index, ByteBuffer value, int start, int length) { - assert index >= 0; - handleSafe(index, length); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - valueBuffer.setBytes(startOffset, value, start, length); - lastSet = index; - } - - /** - * Set the element at the given index to null. - * - * @param index position of element - */ - @Override - public void setNull(int index) { - // We need to check and realloc both validity and offset buffer - while (index >= getValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - BitVectorHelper.unsetBit(validityBuffer, index); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param start start position of data in buffer - * @param end end position of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void set(int index, int isSet, long start, long end, ArrowBuf buffer) { - assert index >= 0; - final long dataLength = end - start; - fillHoles(index); - BitVectorHelper.setValidityBit(validityBuffer, index, isSet); - final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, end); - valueBuffer.setBytes(startOffset, buffer, start, dataLength); - lastSet = index; - } - - /** - * Same as {@link #set(int, int, long, long, ArrowBuf)} except that it handles the case when index - * is greater than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param start start position of data in buffer - * @param end end position of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void setSafe(int index, int isSet, long start, long end, ArrowBuf buffer) { - assert index >= 0; - final long dataLength = end - start; - handleSafe(index, (int) dataLength); - fillHoles(index); - BitVectorHelper.setValidityBit(validityBuffer, index, isSet); - final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, buffer, start, dataLength); - lastSet = index; - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param start start position of data in buffer - * @param length length of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void set(int index, long start, int length, ArrowBuf buffer) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - final ArrowBuf bb = buffer.slice(start, length); - valueBuffer.setBytes(startOffset, bb); - lastSet = index; - } - - /** - * Same as {@link #set(int, int, long, long, ArrowBuf)} except that it handles the case when index - * is greater than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param start start position of data in buffer - * @param length length of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void setSafe(int index, long start, int length, ArrowBuf buffer) { - assert index >= 0; - handleSafe(index, length); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - final ArrowBuf bb = buffer.slice(start, length); - valueBuffer.setBytes(startOffset, bb); - lastSet = index; - } - - /*----------------------------------------------------------------* - | | - | helper methods for setters | - | | - *----------------------------------------------------------------*/ - - protected final void fillHoles(int index) { - for (int i = lastSet + 1; i < index; i++) { - setBytes(i, emptyByteArray, 0, emptyByteArray.length); - } - lastSet = index - 1; - } - - protected final void setBytes(int index, byte[] value, int start, int length) { - /* end offset of current last element in the vector. this will - * be the start offset of new element we are trying to store. - */ - final long startOffset = getStartOffset(index); - /* set new end offset */ - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - /* store the var length data in value buffer */ - valueBuffer.setBytes(startOffset, value, start, length); - } - - /** - * Gets the starting offset of a record, given its index. - * - * @param index index of the record. - * @return the starting offset of the record. - */ - protected final long getStartOffset(int index) { - return offsetBuffer.getLong((long) index * OFFSET_WIDTH); - } - - protected final void handleSafe(int index, int dataLength) { - /* - * IMPORTANT: - * value buffer for variable length vectors moves independent - * of the companion validity and offset buffers. This is in - * contrast to what we have for fixed width vectors. - * - * Here there is no concept of getValueCapacity() in the - * data stream. getValueCapacity() is applicable only to validity - * and offset buffers. - * - * So even though we may have setup an initial capacity of 1024 - * elements in the vector, it is quite possible - * that we need to reAlloc() the data buffer when we are setting - * the 5th element in the vector simply because previous - * variable length elements have exhausted the buffer capacity. - * However, we really don't need to reAlloc() validity and - * offset buffers until we try to set the 1025th element - * This is why we do a separate check for safe methods to - * determine which buffer needs reallocation. - */ - while (index >= getValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - final long startOffset = lastSet < 0 ? 0L : getStartOffset(lastSet + 1); - while (valueBuffer.capacity() < (startOffset + dataLength)) { - reallocDataBuffer(); - } - } - - /** - * Method used by Json Writer to read a variable width element from the variable width vector and - * write to Json. - * - *

    This method should not be used externally. - * - * @param data buffer storing the variable width vector elements - * @param offset buffer storing the offsets of variable width vector elements - * @param index position of the element in the vector - * @return array of bytes - */ - public static byte[] get(final ArrowBuf data, final ArrowBuf offset, int index) { - final long currentStartOffset = offset.getLong((long) index * OFFSET_WIDTH); - final int dataLength = - (int) (offset.getLong((long) (index + 1) * OFFSET_WIDTH) - currentStartOffset); - final byte[] result = new byte[dataLength]; - data.getBytes(currentStartOffset, result, 0, dataLength); - return result; - } - - /** - * Method used by Json Reader to explicitly set the offsets of the variable width vector data. The - * method takes care of allocating the memory for offsets if the caller hasn't done so. - * - *

    This method should not be used externally. - * - * @param buffer ArrowBuf to store offsets for variable width elements - * @param allocator memory allocator - * @param valueCount number of elements - * @param index position of the element - * @param value offset of the element - * @return buffer holding the offsets - */ - public static ArrowBuf set( - ArrowBuf buffer, BufferAllocator allocator, int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer((long) valueCount * OFFSET_WIDTH); - } - buffer.setLong((long) index * OFFSET_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex((long) valueCount * OFFSET_WIDTH); - } - - return buffer; - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - if (from.isNull(fromIndex)) { - fillHoles(thisIndex); - BitVectorHelper.unsetBit(this.validityBuffer, thisIndex); - final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH); - offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart); - } else { - final long start = from.getOffsetBuffer().getLong((long) fromIndex * OFFSET_WIDTH); - final long end = from.getOffsetBuffer().getLong((long) (fromIndex + 1) * OFFSET_WIDTH); - final long length = end - start; - fillHoles(thisIndex); - BitVectorHelper.setBit(this.validityBuffer, thisIndex); - final long copyStart = getStartOffset(thisIndex); - from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, (int) length); - offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length); - } - lastSet = thisIndex; - } - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - if (from.isNull(fromIndex)) { - handleSafe(thisIndex, 0); - fillHoles(thisIndex); - BitVectorHelper.unsetBit(this.validityBuffer, thisIndex); - final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH); - offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart); - } else { - final long start = from.getOffsetBuffer().getLong((long) fromIndex * OFFSET_WIDTH); - final long end = from.getOffsetBuffer().getLong((long) (fromIndex + 1) * OFFSET_WIDTH); - final int length = (int) (end - start); - handleSafe(thisIndex, length); - fillHoles(thisIndex); - BitVectorHelper.setBit(this.validityBuffer, thisIndex); - final long copyStart = getStartOffset(thisIndex); - from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length); - offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length); - } - lastSet = thisIndex; - } - - @Override - public ArrowBufPointer getDataPointer(int index) { - return getDataPointer(index, new ArrowBufPointer()); - } - - @Override - public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { - if (isNull(index)) { - reuse.set(null, 0, 0); - } else { - long offset = getStartOffset(index); - int length = (int) (getEndOffset(index) - offset); - reuse.set(valueBuffer, offset, length); - } - return reuse; - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isNull(index)) { - return ArrowBufPointer.NULL_HASH_CODE; - } - final long start = getStartOffset(index); - final long end = getEndOffset(index); - return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end); - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - protected final long getEndOffset(int index) { - return offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java deleted file mode 100644 index 9befcb890f011..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.Collections; -import java.util.Iterator; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * Base class for other Arrow Vector Types. Provides basic functionality around memory management. - */ -public abstract class BaseValueVector implements ValueVector { - - public static final String MAX_ALLOCATION_SIZE_PROPERTY = "arrow.vector.max_allocation_bytes"; - public static final long MAX_ALLOCATION_SIZE = - Long.getLong(MAX_ALLOCATION_SIZE_PROPERTY, Long.MAX_VALUE); - /* - * For all fixed width vectors, the value and validity buffers are sliced from a single buffer. - * Similarly, for variable width vectors, the offsets and validity buffers are sliced from a - * single buffer. To ensure the single buffer is power-of-2 size, the initial value allocation - * should be less than power-of-2. For IntVectors, this comes to 3970*4 (15880) for the data - * buffer and 504 bytes for the validity buffer, totalling to 16384 (2^16). - */ - public static final int INITIAL_VALUE_ALLOCATION = 3970; - - protected final BufferAllocator allocator; - - protected volatile FieldReader fieldReader; - - protected BaseValueVector(BufferAllocator allocator) { - this.allocator = Preconditions.checkNotNull(allocator, "allocator cannot be null"); - } - - @Override - public abstract String getName(); - - /** Representation of vector suitable for debugging. */ - @Override - public String toString() { - return ValueVectorUtility.getToString(this, 0, getValueCount()); - } - - @Override - public void clear() {} - - @Override - public void close() { - clear(); - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(getName(), allocator); - } - - @Override - public Iterator iterator() { - return Collections.emptyIterator(); - } - - /** - * Checks to ensure that every buffer vv uses has a positive reference count, throws - * if this precondition isn't met. Returns true otherwise. - */ - public static boolean checkBufRefs(final ValueVector vv) { - for (final ArrowBuf buffer : vv.getBuffers(false)) { - if (buffer.refCnt() <= 0) { - throw new IllegalStateException("zero refcount"); - } - } - - return true; - } - - @Override - public BufferAllocator getAllocator() { - return allocator; - } - - void compareTypes(BaseValueVector target, String caller) { - if (this.getMinorType() != target.getMinorType()) { - throw new UnsupportedOperationException(caller + " should have vectors of exact same type"); - } - } - - protected ArrowBuf releaseBuffer(ArrowBuf buffer) { - buffer.getReferenceManager().release(); - buffer = allocator.getEmpty(); - return buffer; - } - - /* number of bytes for the validity buffer for the given valueCount */ - protected static int getValidityBufferSizeFromCount(final int valueCount) { - return DataSizeRoundingUtil.divideBy8Ceil(valueCount); - } - - /* round up bytes for the validity buffer for the given valueCount */ - private static long roundUp8ForValidityBuffer(long valueCount) { - return ((valueCount + 63) >> 6) << 3; - } - - long computeCombinedBufferSize(int valueCount, int typeWidth) { - Preconditions.checkArgument(valueCount >= 0, "valueCount must be >= 0"); - Preconditions.checkArgument(typeWidth >= 0, "typeWidth must be >= 0"); - - // compute size of validity buffer. - long bufferSize = roundUp8ForValidityBuffer(valueCount); - - // add the size of the value buffer. - if (typeWidth == 0) { - // for boolean type, value-buffer and validity-buffer are of same size. - bufferSize *= 2; - } else { - bufferSize += DataSizeRoundingUtil.roundUpTo8Multiple((long) valueCount * typeWidth); - } - return allocator.getRoundingPolicy().getRoundedSize(bufferSize); - } - - /** - * Each vector has a different reader that implements the FieldReader interface. Overridden - * methods must make sure to return the correct concrete reader implementation. - * - * @return Returns a lambda that initializes a reader when called. - */ - protected abstract FieldReader getReaderImpl(); - - /** - * Default implementation to create a reader for the vector. Depends on the individual vector - * class' implementation of {@link #getReaderImpl} to initialize the reader appropriately. - * - * @return Concrete instance of FieldReader by using double-checked locking. - */ - @Override - public FieldReader getReader() { - FieldReader reader = fieldReader; - - if (reader != null) { - return reader; - } - synchronized (this) { - if (fieldReader == null) { - fieldReader = getReaderImpl(); - } - - return fieldReader; - } - } - - /** Container for primitive vectors (1 for the validity bit-mask and one to hold the values). */ - static class DataAndValidityBuffers { - private ArrowBuf dataBuf; - private ArrowBuf validityBuf; - - DataAndValidityBuffers(ArrowBuf dataBuf, ArrowBuf validityBuf) { - this.dataBuf = dataBuf; - this.validityBuf = validityBuf; - } - - ArrowBuf getDataBuf() { - return dataBuf; - } - - ArrowBuf getValidityBuf() { - return validityBuf; - } - } - - DataAndValidityBuffers allocFixedDataAndValidityBufs(int valueCount, int typeWidth) { - long bufferSize = computeCombinedBufferSize(valueCount, typeWidth); - assert bufferSize <= MAX_ALLOCATION_SIZE; - - long validityBufferSize; - long dataBufferSize; - if (typeWidth == 0) { - validityBufferSize = dataBufferSize = bufferSize / 2; - } else { - // Due to the rounding policy, the bufferSize could be greater than the - // requested size. Utilize the allocated buffer fully.; - long actualCount = (long) ((bufferSize * 8.0) / (8 * typeWidth + 1)); - do { - validityBufferSize = roundUp8ForValidityBuffer(actualCount); - dataBufferSize = DataSizeRoundingUtil.roundUpTo8Multiple(actualCount * typeWidth); - if (validityBufferSize + dataBufferSize <= bufferSize) { - break; - } - --actualCount; - } while (true); - } - - /* allocate combined buffer */ - ArrowBuf combinedBuffer = allocator.buffer(bufferSize); - - /* slice into requested lengths */ - ArrowBuf dataBuf = null; - ArrowBuf validityBuf = null; - long bufferOffset = 0; - for (int numBuffers = 0; numBuffers < 2; ++numBuffers) { - long len = (numBuffers == 0 ? dataBufferSize : validityBufferSize); - ArrowBuf buf = combinedBuffer.slice(bufferOffset, len); - buf.getReferenceManager().retain(); - buf.readerIndex(0); - buf.writerIndex(0); - - bufferOffset += len; - if (numBuffers == 0) { - dataBuf = buf; - } else { - validityBuf = buf; - } - } - combinedBuffer.getReferenceManager().release(); - return new DataAndValidityBuffers(dataBuf, validityBuf); - } - - public static ArrowBuf transferBuffer( - final ArrowBuf srcBuffer, final BufferAllocator targetAllocator) { - final ReferenceManager referenceManager = srcBuffer.getReferenceManager(); - return referenceManager.transferOwnership(srcBuffer, targetAllocator).getTransferredBuffer(); - } - - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - throw new UnsupportedOperationException(); - } - - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - throw new UnsupportedOperationException(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java deleted file mode 100644 index aaccec602f292..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ /dev/null @@ -1,1500 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** BaseVariableWidthVector is a base class providing functionality for strings/bytes types. */ -public abstract class BaseVariableWidthVector extends BaseValueVector - implements VariableWidthFieldVector { - private static final int DEFAULT_RECORD_BYTE_COUNT = 8; - private static final int INITIAL_BYTE_COUNT = - INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; - private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); - private int lastValueCapacity; - private long lastValueAllocationSizeInBytes; - - /* protected members */ - public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */ - protected static final byte[] emptyByteArray = new byte[] {}; - protected ArrowBuf validityBuffer; - protected ArrowBuf valueBuffer; - protected ArrowBuf offsetBuffer; - protected int valueCount; - protected int lastSet; - protected final Field field; - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for creating/resizing buffers - */ - public BaseVariableWidthVector(Field field, final BufferAllocator allocator) { - super(allocator); - this.field = field; - lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; - // -1 because we require one extra slot for the offset array. - lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1; - valueCount = 0; - lastSet = -1; - offsetBuffer = allocator.getEmpty(); - validityBuffer = allocator.getEmpty(); - valueBuffer = allocator.getEmpty(); - } - - @Override - public String getName() { - return field.getName(); - } - - /* TODO: - * see if getNullCount() can be made faster -- O(1) - */ - - /* TODO: - * Once the entire hierarchy has been refactored, move common functions - * like getNullCount(), splitAndTransferValidityBuffer to top level - * base class BaseValueVector. - * - * Along with this, some class members (validityBuffer) can also be - * abstracted out to top level base class. - * - * Right now BaseValueVector is the top level base class for other - * vector types in ValueVector hierarchy (non-nullable) and those - * vectors have not yet been refactored/removed so moving things to - * the top class as of now is not a good idea. - */ - - /** - * Get buffer that manages the validity (NULL or NON-NULL nature) of elements in the vector. - * Consider it as a buffer for internal bit vector data structure. - * - * @return buffer - */ - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - /** - * Get the buffer that stores the data for elements in the vector. - * - * @return buffer - */ - @Override - public ArrowBuf getDataBuffer() { - return valueBuffer; - } - - /** - * buffer that stores the offsets for elements in the vector. This operation is not supported for - * fixed-width vectors. - * - * @return buffer - */ - @Override - public ArrowBuf getOffsetBuffer() { - return offsetBuffer; - } - - /** - * Get the memory address of buffer that stores the offsets for elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getOffsetBufferAddress() { - return offsetBuffer.memoryAddress(); - } - - /** - * Get the memory address of buffer that manages the validity (NULL or NON-NULL nature) of - * elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - /** - * Get the memory address of buffer that stores the data for elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getDataBufferAddress() { - return valueBuffer.memoryAddress(); - } - - /** - * Sets the desired value capacity for the vector. This function doesn't allocate any memory for - * the vector. - * - * @param valueCount desired number of elements in the vector - */ - @Override - public void setInitialCapacity(int valueCount) { - final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT; - checkDataBufferSize(size); - computeAndCheckOffsetsBufferSize(valueCount); - lastValueAllocationSizeInBytes = (int) size; - lastValueCapacity = valueCount; - } - - /** - * Sets the desired value capacity for the vector. This function doesn't allocate any memory for - * the vector. - * - * @param valueCount desired number of elements in the vector - * @param density average number of bytes per variable width element - */ - @Override - public void setInitialCapacity(int valueCount, double density) { - long size = Math.max((long) (valueCount * density), 1L); - checkDataBufferSize(size); - computeAndCheckOffsetsBufferSize(valueCount); - lastValueAllocationSizeInBytes = (int) size; - lastValueCapacity = valueCount; - } - - /** - * Get the density of this ListVector. - * - * @return density - */ - public double getDensity() { - if (valueCount == 0) { - return 0.0D; - } - final int startOffset = getStartOffset(0); - final int endOffset = getStartOffset(valueCount); - final double totalListSize = endOffset - startOffset; - return totalListSize / valueCount; - } - - /** - * Get the current capacity which does not exceed either validity buffer or offset buffer. Note: - * Here the `getValueCapacity` has no relationship with the value buffer. - * - * @return number of elements that vector can hold. - */ - @Override - public int getValueCapacity() { - final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); - return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); - } - - private int getValidityBufferValueCapacity() { - return capAtMaxInt(validityBuffer.capacity() * 8); - } - - private int getOffsetBufferValueCapacity() { - return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); - } - - /** zero out the vector and the data in associated buffers. */ - public void zeroVector() { - initValidityBuffer(); - initOffsetBuffer(); - valueBuffer.setZero(0, valueBuffer.capacity()); - } - - /* zero out the validity buffer */ - private void initValidityBuffer() { - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - /* zero out the offset buffer */ - private void initOffsetBuffer() { - offsetBuffer.setZero(0, offsetBuffer.capacity()); - } - - /** - * Reset the vector to initial state. Same as {@link #zeroVector()}. Note that this method doesn't - * release any memory. - */ - @Override - public void reset() { - zeroVector(); - lastSet = -1; - valueCount = 0; - } - - /** Close the vector and release the associated buffers. */ - @Override - public void close() { - clear(); - } - - /** Same as {@link #close()}. */ - @Override - public void clear() { - validityBuffer = releaseBuffer(validityBuffer); - valueBuffer = releaseBuffer(valueBuffer); - offsetBuffer = releaseBuffer(offsetBuffer); - lastSet = -1; - valueCount = 0; - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - /** - * Initialize the children in schema for this Field. This operation is a NO-OP for scalar types - * since they don't have any children. - * - * @param children the schema - * @throws IllegalArgumentException if children is a non-empty list for scalar types. - */ - @Override - public void initializeChildrenFromFields(List children) { - if (!children.isEmpty()) { - throw new IllegalArgumentException("primitive type vector cannot have children"); - } - } - - /** - * Get the inner child vectors. - * - * @return list of child vectors for complex types, empty list for scalar vector types - */ - @Override - public List getChildrenFromFields() { - return Collections.emptyList(); - } - - /** - * Load the buffers of this vector with provided source buffers. The caller manages the source - * buffers and populates them before invoking this method. - * - * @param fieldNode the fieldNode indicating the value count - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - ArrowBuf bitBuffer = ownBuffers.get(0); - ArrowBuf offBuffer = ownBuffers.get(1); - ArrowBuf dataBuffer = ownBuffers.get(2); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); - valueBuffer.getReferenceManager().release(); - valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator); - - lastSet = fieldNode.getLength() - 1; - valueCount = fieldNode.getLength(); - } - - /** - * Get the buffers belonging to this vector. - * - * @return the inner buffers. - */ - @Override - public List getFieldBuffers() { - // before flight/IPC, we must bring the vector to a consistent state. - // this is because, it is possible that the offset buffers of some trailing values - // are not updated. this may cause some data in the data buffer being lost. - // for details, please see TestValueVector#testUnloadVariableWidthVector. - fillHoles(valueCount); - - List result = new ArrayList<>(3); - setReaderAndWriterIndex(); - result.add(validityBuffer); - result.add(offsetBuffer); - result.add(valueBuffer); - - return result; - } - - /** - * Export the buffers of the fields for C Data Interface. This method traverse the buffers and - * export buffer and buffer's memory address into a list of buffers and a pointer to the list of - * buffers. - */ - @Override - public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - // before flight/IPC, we must bring the vector to a consistent state. - // this is because, it is possible that the offset buffers of some trailing values - // are not updated. this may cause some data in the data buffer being lost. - // for details, please see TestValueVector#testUnloadVariableWidthVector. - fillHoles(valueCount); - - exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); - - if (offsetBuffer.capacity() == 0) { - // Empty offset buffer is allowed for historical reason. - // To export it through C Data interface, we need to allocate a buffer with one offset. - // We set `retain = false` to explicitly not increase the ref count for the exported buffer. - // The ref count of the newly created buffer (i.e., 1) already represents the usage - // at imported side. - exportBuffer(allocateOffsetBuffer(OFFSET_WIDTH), buffers, buffersPtr, nullValue, false); - } else { - exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true); - } - - exportBuffer(valueBuffer, buffers, buffersPtr, nullValue, true); - } - - /** Set the reader and writer indexes for the inner buffers. */ - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - offsetBuffer.readerIndex(0); - valueBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); - valueBuffer.writerIndex(0); - } else { - final int lastDataOffset = getStartOffset(valueCount); - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); - valueBuffer.writerIndex(lastDataOffset); - } - } - - /** Same as {@link #allocateNewSafe()}. */ - @Override - public void allocateNew() { - allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); - } - - /** - * Allocate memory for the vector. We internally use a default value count of 4096 to allocate - * memory for at least these many elements in the vector. See {@link #allocateNew(long, int)} for - * allocating memory for specific number of elements in the vector. - * - * @return false if memory allocation fails, true otherwise. - */ - @Override - public boolean allocateNewSafe() { - try { - allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); - return true; - } catch (Exception e) { - return false; - } - } - - /** - * Allocate memory for the vector to support storing at least the provided number of elements in - * the vector. This method must be called prior to using the ValueVector. - * - * @param totalBytes desired total memory capacity - * @param valueCount the desired number of elements in the vector - * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation fails - */ - @Override - public void allocateNew(long totalBytes, int valueCount) { - assert totalBytes >= 0; - - checkDataBufferSize(totalBytes); - computeAndCheckOffsetsBufferSize(valueCount); - - /* we are doing a new allocation -- release the current buffers */ - clear(); - - try { - allocateBytes(totalBytes, valueCount); - } catch (Exception e) { - clear(); - throw e; - } - } - - @Override - public void allocateNew(int valueCount) { - allocateNew(lastValueAllocationSizeInBytes, valueCount); - } - - /* Check if the data buffer size is within bounds. */ - private void checkDataBufferSize(long size) { - if (size > MAX_BUFFER_SIZE || size < 0) { - throw new OversizedAllocationException( - "Memory required for vector " - + "is (" - + size - + "), which is overflow or more than max allowed (" - + MAX_BUFFER_SIZE - + "). " - + "You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types"); - } - } - - /* - * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's - * within bounds. - */ - private long computeAndCheckOffsetsBufferSize(int valueCount) { - /* to track the end offset of last data element in vector, we need - * an additional slot in offset buffer. - */ - final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH); - if (size > MAX_BUFFER_SIZE) { - throw new OversizedAllocationException( - "Memory required for vector capacity " - + valueCount - + " is (" - + size - + "), which is more than max allowed (" - + MAX_BUFFER_SIZE - + ")"); - } - return size; - } - - /* allocate the inner buffers */ - private void allocateBytes(final long valueBufferSize, final int valueCount) { - /* allocate data buffer */ - long curSize = valueBufferSize; - valueBuffer = allocator.buffer(curSize); - valueBuffer.readerIndex(0); - - /* allocate offset buffer and validity buffer */ - DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH); - offsetBuffer = buffers.getDataBuf(); - validityBuffer = buffers.getValidityBuf(); - initOffsetBuffer(); - initValidityBuffer(); - - lastValueCapacity = getValueCapacity(); - lastValueAllocationSizeInBytes = capAtMaxInt(valueBuffer.capacity()); - } - - /* allocate offset buffer */ - private ArrowBuf allocateOffsetBuffer(final long size) { - final int curSize = (int) size; - ArrowBuf offsetBuffer = allocator.buffer(curSize); - offsetBuffer.readerIndex(0); - initOffsetBuffer(); - return offsetBuffer; - } - - /* allocate validity buffer */ - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - initValidityBuffer(); - } - - /** - * Resize the vector to increase the capacity. The internal behavior is to double the current - * value capacity. - */ - @Override - public void reAlloc() { - reallocDataBuffer(); - reallocValidityAndOffsetBuffers(); - } - - /** - * Reallocate the data buffer. Data Buffer stores the actual data for VARCHAR or VARBINARY - * elements in the vector. The behavior is to double the size of buffer. - * - * @throws OversizedAllocationException if the desired new size is more than max allowed - * @throws OutOfMemoryException if the internal memory allocation fails - */ - public void reallocDataBuffer() { - final long currentBufferCapacity = valueBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (lastValueAllocationSizeInBytes > 0) { - newAllocationSize = lastValueAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_BYTE_COUNT * 2L; - } - } - - reallocDataBuffer(newAllocationSize); - } - - /** - * Reallocate the data buffer to given size. Data Buffer stores the actual data for VARCHAR or - * VARBINARY elements in the vector. The actual allocate size may be larger than the request one - * because it will round up the provided value to the nearest power of two. - * - * @param desiredAllocSize the desired new allocation size - * @throws OversizedAllocationException if the desired new size is more than max allowed - * @throws OutOfMemoryException if the internal memory allocation fails - */ - public void reallocDataBuffer(long desiredAllocSize) { - if (desiredAllocSize == 0) { - return; - } - - final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); - assert newAllocationSize >= 1; - - checkDataBufferSize(newAllocationSize); - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity()); - valueBuffer.getReferenceManager().release(); - valueBuffer = newBuf; - lastValueAllocationSizeInBytes = valueBuffer.capacity(); - } - - /** - * Reallocate the validity and offset buffers for this vector. Validity buffer is used to track - * the NULL or NON-NULL nature of elements in the vector and offset buffer is used to store the - * lengths of variable width elements in the vector. - * - *

    Note that data buffer for variable length vectors moves independent of the companion - * validity and offset buffers. This is in contrast to what we have for fixed width vectors. - * - *

    So even though we may have setup an initial capacity of 1024 elements in the vector, it is - * quite possible that we need to reAlloc() the data buffer when we are setting the 5th element in - * the vector simply because previous variable length elements have exhausted the buffer capacity. - * However, we really don't need to reAlloc() validity and offset buffers until we try to set the - * 1025th element This is why we do a separate check for safe methods to determine which buffer - * needs reallocation. - * - * @throws OversizedAllocationException if the desired new size is more than max allowed - * @throws OutOfMemoryException if the internal memory allocation fails - */ - public void reallocValidityAndOffsetBuffers() { - int targetOffsetCount = capAtMaxInt((offsetBuffer.capacity() / OFFSET_WIDTH) * 2); - if (targetOffsetCount == 0) { - if (lastValueCapacity > 0) { - targetOffsetCount = (lastValueCapacity + 1); - } else { - targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1); - } - } - computeAndCheckOffsetsBufferSize(targetOffsetCount); - - DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH); - final ArrowBuf newOffsetBuffer = buffers.getDataBuf(); - newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); - newOffsetBuffer.setZero( - offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity()); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = newOffsetBuffer; - - final ArrowBuf newValidityBuffer = buffers.getValidityBuf(); - newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); - newValidityBuffer.setZero( - validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); - validityBuffer.getReferenceManager().release(); - validityBuffer = newValidityBuffer; - - lastValueCapacity = getValueCapacity(); - } - - /** - * Get the size (number of bytes) of underlying data buffer. - * - * @return number of bytes in the data buffer - */ - @Override - public int getByteCapacity() { - return capAtMaxInt(valueBuffer.capacity()); - } - - @Override - public int sizeOfValueBuffer() { - if (valueCount == 0) { - return 0; - } - return offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - return getBufferSizeFor(this.valueCount); - } - - /** - * Get the potential buffer size for a particular number of records. - * - * @param valueCount desired number of elements in the vector - * @return estimated size of underlying buffers if the vector holds a given number of elements - */ - @Override - public int getBufferSizeFor(final int valueCount) { - if (valueCount == 0) { - return 0; - } - - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH; - /* get the end offset for this valueCount */ - final int dataBufferSize = offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH); - return validityBufferSize + offsetBufferSize + dataBufferSize; - } - - /** - * Get information about how this field is materialized. - * - * @return the field corresponding to this vector - */ - @Override - public Field getField() { - return field; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer so it only should be used for in-context access. Also note - * that this buffer changes regularly thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning; the buffers will still be refcounted but - * the returned array will be the only reference to them - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - final ArrowBuf[] buffers; - setReaderAndWriterIndex(); - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - buffers = new ArrowBuf[3]; - buffers[0] = validityBuffer; - buffers[1] = offsetBuffer; - buffers[2] = valueBuffer; - } - if (clear) { - for (final ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - /** Validate the scalar values held by this vector. */ - public void validateScalars() { - // No validation by default. - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param field The field materialized by this vector. - * @param allocator allocator for the target vector - * @param callBack not used - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(field, allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @param callBack not used - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(ref, allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(getName(), allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); - - /** - * Construct a transfer pair of this vector and another vector of same type. - * - * @param field The field materialized by this vector. - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); - - /** - * Transfer this vector'data to another vector. The memory associated with this vector is - * transferred to the allocator of target vector for accounting and management purposes. - * - * @param target destination vector for transfer - */ - public void transferTo(BaseVariableWidthVector target) { - compareTypes(target, "transferTo"); - target.clear(); - target.validityBuffer = transferBuffer(validityBuffer, target.allocator); - target.valueBuffer = transferBuffer(valueBuffer, target.allocator); - target.offsetBuffer = transferBuffer(offsetBuffer, target.allocator); - target.setLastSet(this.lastSet); - if (this.valueCount > 0) { - target.setValueCount(this.valueCount); - } - clear(); - } - - /** - * Slice this vector at desired index and length and transfer the corresponding data to the target - * vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - * @param target destination vector - */ - public void splitAndTransferTo(int startIndex, int length, BaseVariableWidthVector target) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - compareTypes(target, "splitAndTransferTo"); - target.clear(); - if (length > 0) { - splitAndTransferValidityBuffer(startIndex, length, target); - splitAndTransferOffsetBuffer(startIndex, length, target); - target.setLastSet(length - 1); - target.setValueCount(length); - } - } - - /** - * Transfer the offsets along with data. Unlike the data buffer, we cannot simply slice the offset - * buffer for split and transfer. The reason is that offsets in the target vector have to be - * adjusted and made relative to the staring offset in source vector from the start index of - * split. This is why, we need to explicitly allocate the offset buffer and set the adjusted - * offsets in the target vector. - */ - private void splitAndTransferOffsetBuffer( - int startIndex, int length, BaseVariableWidthVector target) { - final int start = getStartOffset(startIndex); - final int end = getStartOffset(startIndex + length); - final int dataLength = end - start; - - if (start == 0) { - final ArrowBuf slicedOffsetBuffer = - offsetBuffer.slice( - startIndex * ((long) OFFSET_WIDTH), (1 + length) * ((long) OFFSET_WIDTH)); - target.offsetBuffer = transferBuffer(slicedOffsetBuffer, target.allocator); - } else { - target.offsetBuffer = target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH); - for (int i = 0; i < length + 1; i++) { - final int relativeSourceOffset = getStartOffset(startIndex + i) - start; - target.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeSourceOffset); - } - } - final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength); - target.valueBuffer = transferBuffer(slicedBuffer, target.allocator); - } - - /* - * Transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, BaseVariableWidthVector target) { - if (length <= 0) { - return; - } - - final int firstByteSource = BitVectorHelper.byteIndex(startIndex); - final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - final int byteSizeTarget = getValidityBufferSizeFromCount(length); - final int offset = startIndex % 8; - - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); - return; - } - - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - - /*----------------------------------------------------------------* - | | - | common getters and setters | - | | - *----------------------------------------------------------------*/ - - /** - * Get the number of elements that are null in the vector. - * - * @return the number of null elements. - */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** - * Check if the given index is within the current value capacity of the vector. - * - * @param index position to check - * @return true if index is within the current value capacity - */ - public boolean isSafe(int index) { - return index < getValueCapacity(); - } - - /** - * Check if element at given index is null. - * - * @param index position of element - * @return true if element at given index is null - */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** - * Same as {@link #isNull(int)}. - * - * @param index position of element - * @return 1 if element at given index is not null, 0 otherwise - */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Get the value count of vector. This will always be zero unless setValueCount(int) has been - * called prior to calling this. - * - * @return valueCount for the vector - */ - @Override - public int getValueCount() { - return valueCount; - } - - /** - * Sets the value count for the vector. - * - * @param valueCount value count - */ - @Override - public void setValueCount(int valueCount) { - assert valueCount >= 0; - this.valueCount = valueCount; - while (valueCount > getValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - fillHoles(valueCount); - lastSet = valueCount - 1; - setReaderAndWriterIndex(); - } - - /** - * Create holes in the vector upto the given index (exclusive). Holes will be created from the - * current last set position in the vector. - * - * @param index target index - */ - @Override - public void fillEmpties(int index) { - handleSafe(index, emptyByteArray.length); - fillHoles(index); - lastSet = index - 1; - } - - /** - * Set the index of last non-null element in the vector. It is important to call this method with - * appropriate value before calling {@link #setValueCount(int)}. - * - * @param value desired index of last non-null element. - */ - @Override - public void setLastSet(int value) { - lastSet = value; - } - - /** - * Get the index of last non-null element in the vector. - * - * @return index of the last non-null element - */ - @Override - public int getLastSet() { - return lastSet; - } - - /** - * Get the starting position (offset) in the data stream for a given element in the vector. - * - * @param index position of the element in the vector - * @return starting offset for the element - */ - public long getStartEnd(int index) { - return offsetBuffer.getLong((long) index * OFFSET_WIDTH); - } - - /** - * Mark the particular position in the vector as non-null. - * - * @param index position of the element. - */ - @Override - public void setIndexDefined(int index) { - // We need to check and realloc both validity and offset buffer - while (index >= getValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - BitVectorHelper.setBit(validityBuffer, index); - } - - /** - * Sets the value length for an element. - * - * @param index position of the element to set - * @param length length of the element - */ - @Override - public void setValueLengthSafe(int index, int length) { - assert index >= 0; - handleSafe(index, length); - fillHoles(index); - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length); - lastSet = index; - } - - /** - * Get the variable length element at specified index as Text. - * - * @param index position of element to get - * @return greater than 0 length for non-null element, 0 otherwise - */ - @Override - public int getValueLength(int index) { - assert index >= 0; - if (isSet(index) == 0) { - return 0; - } - final int startOffset = getStartOffset(index); - final int dataLength = getEndOffset(index) - startOffset; - return dataLength; - } - - /** - * Set the variable length element at the specified index to the supplied byte array. This is same - * as using {@link #set(int, byte[], int, int)} with start as 0 and length as value.length - * - * @param index position of the element to set - * @param value array of bytes to write - */ - @Override - public void set(int index, byte[] value) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, 0, value.length); - lastSet = index; - } - - /** - * Same as {@link #set(int, byte[])} except that it handles the case where index and length of new - * element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value array of bytes to write - */ - @Override - public void setSafe(int index, byte[] value) { - assert index >= 0; - handleSafe(index, value.length); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, 0, value.length); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the supplied byte array. - * - * @param index position of the element to set - * @param value array of bytes to write - * @param start start index in array of bytes - * @param length length of data in array of bytes - */ - public void set(int index, byte[] value, int start, int length) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, byte[], int, int)} except that it handles the case where index and - * length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value array of bytes to write - * @param start start index in array of bytes - * @param length length of data in array of bytes - */ - public void setSafe(int index, byte[] value, int start, int length) { - assert index >= 0; - handleSafe(index, length); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, start, length); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the content in supplied ByteBuffer. - * - * @param index position of the element to set - * @param value ByteBuffer with data - * @param start start index in ByteBuffer - * @param length length of data in ByteBuffer - */ - @Override - public void set(int index, ByteBuffer value, int start, int length) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length); - valueBuffer.setBytes(startOffset, value, start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the case where index and - * length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value ByteBuffer with data - * @param start start index in ByteBuffer - * @param length length of data in ByteBuffer - */ - public void setSafe(int index, ByteBuffer value, int start, int length) { - assert index >= 0; - handleSafe(index, length); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length); - valueBuffer.setBytes(startOffset, value, start, length); - lastSet = index; - } - - /** - * Set the element at the given index to null. - * - * @param index position of element - */ - @Override - public void setNull(int index) { - // We need to check and realloc both validity and offset buffer - while (index >= getValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - BitVectorHelper.unsetBit(validityBuffer, index); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param start start position of data in buffer - * @param end end position of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void set(int index, int isSet, int start, int end, ArrowBuf buffer) { - assert index >= 0; - final int dataLength = end - start; - fillHoles(index); - BitVectorHelper.setValidityBit(validityBuffer, index, isSet); - final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, buffer, start, dataLength); - lastSet = index; - } - - /** - * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case when index - * is greater than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param start start position of data in buffer - * @param end end position of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) { - assert index >= 0; - final int dataLength = end - start; - handleSafe(index, dataLength); - fillHoles(index); - BitVectorHelper.setValidityBit(validityBuffer, index, isSet); - final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); - offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, buffer, start, dataLength); - lastSet = index; - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param start start position of data in buffer - * @param length length of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void set(int index, int start, int length, ArrowBuf buffer) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH); - offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - final ArrowBuf bb = buffer.slice(start, length); - valueBuffer.setBytes(startOffset, bb); - lastSet = index; - } - - /** - * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case when index - * is greater than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param start start position of data in buffer - * @param length length of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void setSafe(int index, int start, int length, ArrowBuf buffer) { - assert index >= 0; - handleSafe(index, length); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - final ArrowBuf bb = buffer.slice(start, length); - valueBuffer.setBytes(startOffset, bb); - lastSet = index; - } - - /*----------------------------------------------------------------* - | | - | helper methods for setters | - | | - *----------------------------------------------------------------*/ - - protected final void fillHoles(int index) { - for (int i = lastSet + 1; i < index; i++) { - setBytes(i, emptyByteArray, 0, emptyByteArray.length); - } - lastSet = index - 1; - } - - protected final void setBytes(int index, byte[] value, int start, int length) { - /* end offset of current last element in the vector. this will - * be the start offset of new element we are trying to store. - */ - final int startOffset = getStartOffset(index); - /* set new end offset */ - offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length); - /* store the var length data in value buffer */ - valueBuffer.setBytes(startOffset, value, start, length); - } - - public final int getStartOffset(int index) { - return offsetBuffer.getInt((long) index * OFFSET_WIDTH); - } - - protected final void handleSafe(int index, int dataLength) { - /* - * IMPORTANT: - * value buffer for variable length vectors moves independent - * of the companion validity and offset buffers. This is in - * contrast to what we have for fixed width vectors. - * - * Here there is no concept of getValueCapacity() in the - * data stream. getValueCapacity() is applicable only to validity - * and offset buffers. - * - * So even though we may have setup an initial capacity of 1024 - * elements in the vector, it is quite possible - * that we need to reAlloc() the data buffer when we are setting - * the 5th element in the vector simply because previous - * variable length elements have exhausted the buffer capacity. - * However, we really don't need to reAlloc() validity and - * offset buffers until we try to set the 1025th element - * This is why we do a separate check for safe methods to - * determine which buffer needs reallocation. - */ - while (index >= getValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - final long startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1); - final long targetCapacity = startOffset + dataLength; - if (valueBuffer.capacity() < targetCapacity) { - reallocDataBuffer(targetCapacity); - } - } - - /** - * Method used by Json Writer to read a variable width element from the variable width vector and - * write to Json. - * - *

    This method should not be used externally. - * - * @param data buffer storing the variable width vector elements - * @param offset buffer storing the offsets of variable width vector elements - * @param index position of the element in the vector - * @return array of bytes - */ - public static byte[] get(final ArrowBuf data, final ArrowBuf offset, int index) { - final int currentStartOffset = offset.getInt((long) index * OFFSET_WIDTH); - final int dataLength = offset.getInt((long) (index + 1) * OFFSET_WIDTH) - currentStartOffset; - final byte[] result = new byte[dataLength]; - data.getBytes(currentStartOffset, result, 0, dataLength); - return result; - } - - /** - * Method used by Json Reader to explicitly set the offsets of the variable width vector data. The - * method takes care of allocating the memory for offsets if the caller hasn't done so. - * - *

    This method should not be used externally. - * - * @param buffer ArrowBuf to store offsets for variable width elements - * @param allocator memory allocator - * @param valueCount number of elements - * @param index position of the element - * @param value offset of the element - * @return buffer holding the offsets - */ - public static ArrowBuf set( - ArrowBuf buffer, BufferAllocator allocator, int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer((long) valueCount * OFFSET_WIDTH); - } - buffer.setInt((long) index * OFFSET_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex((long) valueCount * OFFSET_WIDTH); - } - - return buffer; - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - if (from.isNull(fromIndex)) { - fillHoles(thisIndex); - BitVectorHelper.unsetBit(this.validityBuffer, thisIndex); - final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH); - offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart); - } else { - final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH); - final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH); - final int length = end - start; - fillHoles(thisIndex); - BitVectorHelper.setBit(this.validityBuffer, thisIndex); - final int copyStart = getStartOffset(thisIndex); - from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length); - offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length); - } - lastSet = thisIndex; - } - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - if (from.isNull(fromIndex)) { - handleSafe(thisIndex, 0); - fillHoles(thisIndex); - BitVectorHelper.unsetBit(this.validityBuffer, thisIndex); - final int copyStart = getStartOffset(thisIndex); - offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart); - } else { - final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH); - final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH); - final int length = end - start; - handleSafe(thisIndex, length); - fillHoles(thisIndex); - BitVectorHelper.setBit(this.validityBuffer, thisIndex); - final int copyStart = getStartOffset(thisIndex); - from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length); - offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length); - } - lastSet = thisIndex; - } - - @Override - public ArrowBufPointer getDataPointer(int index) { - return getDataPointer(index, new ArrowBufPointer()); - } - - @Override - public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { - if (isNull(index)) { - reuse.set(null, 0, 0); - } else { - int offset = getStartOffset(index); - int length = getEndOffset(index) - offset; - reuse.set(valueBuffer, offset, length); - } - return reuse; - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isNull(index)) { - return ArrowBufPointer.NULL_HASH_CODE; - } - final int start = getStartOffset(index); - final int end = getEndOffset(index); - return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end); - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - /** Gets the ending offset of a record, given its index. */ - public final int getEndOffset(int index) { - return offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java deleted file mode 100644 index 15d21827839e2..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ /dev/null @@ -1,1698 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; -import static org.apache.arrow.vector.util.DataSizeRoundingUtil.roundUpToMultipleOf16; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.ReusableBuffer; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * BaseVariableWidthViewVector is a base class providing functionality for strings/bytes types in - * view format. - */ -public abstract class BaseVariableWidthViewVector extends BaseValueVector - implements VariableWidthFieldVector { - // A single element of a view comprises 16 bytes - public static final int ELEMENT_SIZE = 16; - public static final int INITIAL_VIEW_VALUE_ALLOCATION = 4096; - private static final int INITIAL_BYTE_COUNT = INITIAL_VIEW_VALUE_ALLOCATION * ELEMENT_SIZE; - private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); - private int lastValueCapacity; - private long lastValueAllocationSizeInBytes; - - /* - * Variable Width View Vector comprises the following format - * - * Short strings, length <= 12 - * | Bytes 0-3 | Bytes 4-15 | - * |------------|---------------------------------------| - * | length | data (padded with 0) | - * |------------|---------------------------------------| - * - * Long strings, length > 12 - * | Bytes 0-3 | Bytes 4-7 | Bytes 8-11 | Bytes 12-15 | - * |------------|------------|------------|-------------| - * | length | prefix | buf.index | offset | - * |------------|------------|------------|-------------| - * - * */ - // 12 byte unsigned int to track inline views - public static final int INLINE_SIZE = 12; - // The first 4 bytes of view are allocated for length - public static final int LENGTH_WIDTH = 4; - // The second 4 bytes of view are allocated for prefix width - public static final int PREFIX_WIDTH = 4; - // The third 4 bytes of view are allocated for buffer index - public static final int BUF_INDEX_WIDTH = 4; - public static final byte[] EMPTY_BYTE_ARRAY = new byte[] {}; - protected ArrowBuf validityBuffer; - // The view buffer is used to store the variable width view elements - protected ArrowBuf viewBuffer; - // The external buffer which stores the long strings - protected List dataBuffers; - protected int initialDataBufferSize; - protected int valueCount; - protected int lastSet; - protected final Field field; - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector - * @param allocator The allocator to use for creating/resizing buffers - */ - public BaseVariableWidthViewVector(Field field, final BufferAllocator allocator) { - super(allocator); - this.field = field; - lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; - lastValueCapacity = INITIAL_VIEW_VALUE_ALLOCATION; - valueCount = 0; - lastSet = -1; - validityBuffer = allocator.getEmpty(); - viewBuffer = allocator.getEmpty(); - dataBuffers = new ArrayList<>(); - } - - @Override - public String getName() { - return field.getName(); - } - - /* TODO: - * see if getNullCount() can be made faster -- O(1) - */ - - /* TODO: - * Once the entire hierarchy has been refactored, move common functions - * like getNullCount(), splitAndTransferValidityBuffer to top level - * base class BaseValueVector. - * - * Along with this, some class members (validityBuffer) can also be - * abstracted out to top level base class. - * - * Right now BaseValueVector is the top level base class for other - * vector types in ValueVector hierarchy (non-nullable) and those - * vectors have not yet been refactored/removed so moving things to - * the top class as of now is not a good idea. - */ - - /* TODO: - * Implement TransferPair functionality - * https://github.com/apache/arrow/issues/40932 - * - */ - - /** - * Get buffer that manages the validity (NULL or NON-NULL nature) of elements in the vector. - * Consider it as a buffer for internal bit vector data structure. - * - * @return buffer - */ - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - /** - * Get the buffer that stores the data for elements in the vector. - * - * @return buffer - */ - @Override - public ArrowBuf getDataBuffer() { - return viewBuffer; - } - - /** - * Get the buffers that store the data for views in the vector. - * - * @return list of ArrowBuf - */ - public List getDataBuffers() { - return dataBuffers; - } - - /** - * BaseVariableWidthViewVector doesn't support offset buffer. - * - * @return throws UnsupportedOperationException - */ - @Override - public ArrowBuf getOffsetBuffer() { - throw new UnsupportedOperationException( - "Offset buffer is not supported in BaseVariableWidthViewVector"); - } - - /** - * BaseVariableWidthViewVector doesn't support offset buffer. - * - * @return throws UnsupportedOperationException - */ - @Override - public long getOffsetBufferAddress() { - throw new UnsupportedOperationException( - "Offset buffer is not supported in BaseVariableWidthViewVector"); - } - - /** - * Get the memory address of buffer that manages the validity (NULL or NON-NULL nature) of - * elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - /** - * Get the memory address of buffer that stores the data for elements in the vector. - * - * @return starting address of the buffer - */ - @Override - public long getDataBufferAddress() { - return viewBuffer.memoryAddress(); - } - - /** - * Sets the desired value capacity for the vector. This function doesn't allocate any memory for - * the vector. - * - * @param valueCount desired number of elements in the vector - */ - @Override - public void setInitialCapacity(int valueCount) { - final long size = (long) valueCount * ELEMENT_SIZE; - checkDataBufferSize(size); - lastValueAllocationSizeInBytes = (int) size; - lastValueCapacity = valueCount; - } - - /** - * Sets the desired value capacity for the vector. This function doesn't allocate any memory for - * the vector. - * - * @param valueCount desired number of elements in the vector - * @param density average number of bytes per variable width view element - */ - @Override - public void setInitialCapacity(int valueCount, double density) { - final long size = (long) valueCount * ELEMENT_SIZE; - initialDataBufferSize = (int) (valueCount * density); - checkDataBufferSize(size); - lastValueAllocationSizeInBytes = (int) size; - lastValueCapacity = valueCount; - } - - /** - * Get the density of this ListVector. - * - * @return density - */ - public double getDensity() { - if (valueCount == 0) { - return 0.0D; - } - final double totalListSize = getTotalValueLengthUpToIndex(valueCount); - return totalListSize / valueCount; - } - - /** - * Get the current capacity which does not exceed either validity buffer or value buffer. Note: - * Here the `getValueCapacity` has a relationship with the value buffer. - * - * @return number of elements that vector can hold. - */ - @Override - public int getValueCapacity() { - final int validityCapacity = getValidityBufferValueCapacity(); - final int valueBufferCapacity = Math.max(capAtMaxInt(viewBuffer.capacity() / ELEMENT_SIZE), 0); - return Math.min(valueBufferCapacity, validityCapacity); - } - - private int getValidityBufferValueCapacity() { - return capAtMaxInt(validityBuffer.capacity() * 8); - } - - /** zero out the vector and the data in associated buffers. */ - public void zeroVector() { - initValidityBuffer(); - viewBuffer.setZero(0, viewBuffer.capacity()); - clearDataBuffers(); - } - - /* zero out the validity buffer */ - private void initValidityBuffer() { - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - /** Reset the vector to initial state. Note that this method doesn't release any memory. */ - @Override - public void reset() { - zeroVector(); - lastSet = -1; - valueCount = 0; - } - - /** Close the vector and release the associated buffers. */ - @Override - public void close() { - clear(); - } - - /** Same as {@link #close()}. */ - @Override - public void clear() { - validityBuffer = releaseBuffer(validityBuffer); - viewBuffer = releaseBuffer(viewBuffer); - clearDataBuffers(); - lastSet = -1; - valueCount = 0; - } - - /** Release the data buffers and clear the list. */ - public void clearDataBuffers() { - for (ArrowBuf buffer : dataBuffers) { - releaseBuffer(buffer); - } - dataBuffers.clear(); - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - /** - * Initialize the children in schema for this Field. This operation is a NO-OP for scalar types - * since they don't have any children. - * - * @param children the schema - * @throws IllegalArgumentException if children is a non-empty list for scalar types. - */ - @Override - public void initializeChildrenFromFields(List children) { - if (!children.isEmpty()) { - throw new IllegalArgumentException("primitive type vector cannot have children"); - } - } - - /** - * Get the inner child vectors. - * - * @return list of child vectors for complex types, empty list for scalar vector types - */ - @Override - public List getChildrenFromFields() { - return Collections.emptyList(); - } - - /** - * Load the buffers of this vector with provided source buffers. The caller manages the source - * buffers and populates them before invoking this method. - * - * @param fieldNode the fieldNode indicating the value count - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - ArrowBuf bitBuf = ownBuffers.get(0); - ArrowBuf viewBuf = ownBuffers.get(1); - List dataBufs = ownBuffers.subList(2, ownBuffers.size()); - - this.clear(); - - this.viewBuffer = viewBuf.getReferenceManager().retain(viewBuf, allocator); - this.validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuf, allocator); - - for (ArrowBuf dataBuf : dataBufs) { - this.dataBuffers.add(dataBuf.getReferenceManager().retain(dataBuf, allocator)); - } - - lastSet = fieldNode.getLength() - 1; - valueCount = fieldNode.getLength(); - } - - /** - * Get the buffers belonging to this vector. - * - * @return the inner buffers. - */ - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(2 + dataBuffers.size()); - setReaderAndWriterIndex(); - result.add(validityBuffer); - result.add(viewBuffer); - // append data buffers - result.addAll(dataBuffers); - - return result; - } - - /** Set the reader and writer indexes for the inner buffers. */ - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - viewBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - viewBuffer.writerIndex(0); - } else { - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - viewBuffer.writerIndex(valueCount * ELEMENT_SIZE); - } - } - - /** Same as {@link #allocateNewSafe()}. */ - @Override - public void allocateNew() { - allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); - } - - /** - * Allocate memory for the vector. We internally use a default value count of 4096 to allocate - * memory for at least these many elements in the vector. See {@link #allocateNew(long, int)} for - * allocating memory for specific number of elements in the vector. - * - * @return false if memory allocation fails, true otherwise. - */ - @Override - public boolean allocateNewSafe() { - try { - allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); - return true; - } catch (Exception e) { - return false; - } - } - - /** - * Allocate memory for the vector to support storing at least the provided number of elements in - * the vector. This method must be called prior to using the ValueVector. - * - * @param totalBytes desired total memory capacity - * @param valueCount the desired number of elements in the vector - * @throws OutOfMemoryException if memory allocation fails - */ - @Override - public void allocateNew(long totalBytes, int valueCount) { - assert totalBytes >= 0; - - checkDataBufferSize(totalBytes); - - /* we are doing a new allocation -- release the current buffers */ - clear(); - - try { - allocateBytes(totalBytes, valueCount); - } catch (Exception e) { - clear(); - throw e; - } - } - - @Override - public void allocateNew(int valueCount) { - allocateNew(lastValueAllocationSizeInBytes, valueCount); - } - - /* Check if the data buffer size is within bounds. */ - private void checkDataBufferSize(long size) { - if (size > MAX_BUFFER_SIZE || size < 0) { - throw new OversizedAllocationException( - "Memory required for vector " - + "is (" - + size - + "), which is overflow or more than max allowed (" - + MAX_BUFFER_SIZE - + "). " - + "You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types"); - } - } - - /* allocate the inner buffers */ - private void allocateBytes(final long valueBufferSize, final int valueCount) { - /* allocate data buffer */ - viewBuffer = allocator.buffer(valueBufferSize); - viewBuffer.readerIndex(0); - - validityBuffer = allocator.buffer((valueCount + 7) / 8); - initValidityBuffer(); - - lastValueCapacity = getValueCapacity(); - lastValueAllocationSizeInBytes = capAtMaxInt(viewBuffer.capacity()); - } - - /** - * Resize the vector to increase the capacity. The internal behavior is to double the current - * value capacity. - */ - @Override - public void reAlloc() { - reallocViewBuffer(); - reallocViewDataBuffer(); - reallocValidityBuffer(); - } - - /** - * Reallocate the view buffer. View Buffer stores the views for VIEWVARCHAR or VIEWVARBINARY - * elements in the vector. The behavior is to double the size of buffer. - * - * @throws OversizedAllocationException if the desired new size is more than max allowed - * @throws OutOfMemoryException if the internal memory allocation fails - */ - public void reallocViewBuffer() { - long currentViewBufferCapacity = viewBuffer.capacity(); - - long newAllocationSize = currentViewBufferCapacity * 2; - if (newAllocationSize == 0) { - if (lastValueAllocationSizeInBytes > 0) { - newAllocationSize = lastValueAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_BYTE_COUNT * 2L; - } - } - - reallocViewBuffer(newAllocationSize); - } - - /** Reallocate the data buffer associated with view buffer. */ - public void reallocViewDataBuffer() { - long currentDataBufferCapacity = 0; - if (!dataBuffers.isEmpty()) { - currentDataBufferCapacity = dataBuffers.get(dataBuffers.size() - 1).capacity(); - } - - long newAllocationSize = currentDataBufferCapacity * 2; - if (newAllocationSize == 0) { - if (lastValueAllocationSizeInBytes > 0) { - newAllocationSize = lastValueAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_BYTE_COUNT * 2L; - } - } - - reallocViewDataBuffer(newAllocationSize); - } - - /** - * Reallocate the view buffer to given size. View Buffer stores the views for VIEWVARCHAR or - * VIEWVARBINARY elements in the vector. The actual allocated size may be larger than the request - * one because it will round up the provided value to the nearest power of two. - * - * @param desiredAllocSize the desired new allocation size - * @throws OversizedAllocationException if the desired new size is more than max allowed - * @throws OutOfMemoryException if the internal memory allocation fails - */ - public void reallocViewBuffer(long desiredAllocSize) { - if (desiredAllocSize == 0) { - return; - } - long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); - assert newAllocationSize >= 1; - - checkDataBufferSize(newAllocationSize); - // for each set operation, we have to allocate 16 bytes - // here we are adjusting the desired allocation-based allocation size - // to align with the 16bytes requirement. - newAllocationSize = roundUpToMultipleOf16(newAllocationSize); - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, viewBuffer, 0, viewBuffer.capacity()); - - viewBuffer.getReferenceManager().release(); - viewBuffer = newBuf; - lastValueAllocationSizeInBytes = viewBuffer.capacity(); - lastValueCapacity = getValueCapacity(); - } - - /** - * Reallocate the data buffer for views. - * - * @param desiredAllocSize allocation size in bytes - */ - public void reallocViewDataBuffer(long desiredAllocSize) { - if (desiredAllocSize == 0) { - return; - } - - if (dataBuffers.isEmpty()) { - return; - } - - ArrowBuf currentBuf = dataBuffers.get(dataBuffers.size() - 1); - if (currentBuf.capacity() - currentBuf.writerIndex() >= desiredAllocSize) { - return; - } - - final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); - assert newAllocationSize >= 1; - - checkDataBufferSize(newAllocationSize); - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - dataBuffers.add(newBuf); - } - - /** Reallocate Validity buffer. */ - public void reallocValidityBuffer() { - int targetValidityCount = capAtMaxInt((validityBuffer.capacity() * 8) * 2); - if (targetValidityCount == 0) { - if (lastValueCapacity > 0) { - targetValidityCount = lastValueCapacity; - } else { - targetValidityCount = 2 * INITIAL_VALUE_ALLOCATION; - } - } - - long validityBufferSize = computeValidityBufferSize(targetValidityCount); - - final ArrowBuf newValidityBuffer = allocator.buffer(validityBufferSize); - newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); - newValidityBuffer.setZero( - validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); - validityBuffer.getReferenceManager().release(); - validityBuffer = newValidityBuffer; - - lastValueCapacity = getValueCapacity(); - } - - private long computeValidityBufferSize(int valueCount) { - return (valueCount + 7) / 8; - } - - /** - * Get the size (number of bytes) of underlying view buffer. - * - * @return number of bytes in the view buffer - */ - @Override - public int getByteCapacity() { - return capAtMaxInt(viewBuffer.capacity()); - } - - @Override - public int sizeOfValueBuffer() { - throw new UnsupportedOperationException( - "sizeOfValueBuffer is not supported for BaseVariableWidthViewVector"); - } - - /** - * Get the size (number of bytes) of underlying elements in the view buffer. - * - * @return number of bytes used by data in the view buffer - */ - public int sizeOfViewBufferElements() { - if (valueCount == 0) { - return 0; - } - int totalSize = 0; - for (int i = 0; i < valueCount; i++) { - totalSize += getValueLength(i); - } - return totalSize; - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - return getBufferSizeFor(this.valueCount); - } - - /** - * Get the potential buffer size for a particular number of records. - * - * @param valueCount desired number of elements in the vector - * @return estimated size of underlying buffers if the vector holds a given number of elements - */ - @Override - public int getBufferSizeFor(final int valueCount) { - if (valueCount == 0) { - return 0; - } - - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - final int viewBufferSize = valueCount * ELEMENT_SIZE; - final int dataBufferSize = getDataBufferSize(); - return validityBufferSize + viewBufferSize + dataBufferSize; - } - - private int getDataBufferSize() { - int dataBufferSize = 0; - for (ArrowBuf buf : dataBuffers) { - dataBufferSize += (int) buf.writerIndex(); - } - return dataBufferSize; - } - - /** - * Get information about how this field is materialized. - * - * @return the field corresponding to this vector - */ - @Override - public Field getField() { - return field; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - final ArrowBuf[] buffers; - setReaderAndWriterIndex(); - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - final int dataBufferSize = dataBuffers.size(); - // validity and view buffers - final int fixedBufferSize = 2; - buffers = new ArrowBuf[fixedBufferSize + dataBufferSize]; - buffers[0] = validityBuffer; - buffers[1] = viewBuffer; - for (int i = fixedBufferSize; i < fixedBufferSize + dataBufferSize; i++) { - buffers[i] = dataBuffers.get(i - fixedBufferSize); - } - } - if (clear) { - for (final ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - /** Validate the scalar values held by this vector. */ - public void validateScalars() { - // No validation by default. - } - - /** - * Construct a transfer pair of this vector and another vector of the same type. - * - * @param field The field materialized by this vector. - * @param allocator allocator for the target vector - * @param callBack not used - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(field, allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @param callBack not used - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(ref, allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of the same type. - * - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(getName(), allocator); - } - - /** - * Construct a transfer pair of this vector and another vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); - - /** - * Construct a transfer pair of this vector and another vector of the same type. - * - * @param field The field materialized by this vector. - * @param allocator allocator for the target vector - * @return TransferPair - */ - @Override - public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); - - /** - * Transfer this vector's data to another vector. The memory associated with this vector is - * transferred to the allocator of target vector for accounting and management purposes. - * - * @param target destination vector for transfer - */ - public void transferTo(BaseVariableWidthViewVector target) { - compareTypes(target, "transferTo"); - target.clear(); - target.validityBuffer = transferBuffer(validityBuffer, target.allocator); - target.viewBuffer = transferBuffer(viewBuffer, target.allocator); - target.dataBuffers = new ArrayList<>(dataBuffers.size()); - for (int i = 0; i < dataBuffers.size(); i++) { - target.dataBuffers.add(transferBuffer(dataBuffers.get(i), target.allocator)); - } - - target.setLastSet(this.lastSet); - if (this.valueCount > 0) { - target.setValueCount(this.valueCount); - } - clear(); - } - - /** - * Slice this vector at desired index and length and transfer the corresponding data to the target - * vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - * @param target destination vector - */ - public void splitAndTransferTo(int startIndex, int length, BaseVariableWidthViewVector target) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - compareTypes(target, "splitAndTransferTo"); - target.clear(); - if (length > 0) { - splitAndTransferValidityBuffer(startIndex, length, target); - splitAndTransferViewBufferAndDataBuffer(startIndex, length, target); - target.setLastSet(length - 1); - target.setValueCount(length); - } - } - - /* allocate validity buffer */ - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - initValidityBuffer(); - } - - /* - * Transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, BaseVariableWidthViewVector target) { - if (length <= 0) { - return; - } - - final int firstByteSource = BitVectorHelper.byteIndex(startIndex); - final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - final int byteSizeTarget = getValidityBufferSizeFromCount(length); - final int offset = startIndex % 8; - - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); - return; - } - - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - this.validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - - /** - * In split and transfer, the view buffer and the data buffer will be allocated. Then the values - * will be copied from the source vector to the target vector. Allocation and setting are - * preferred over transfer since the buf index and buf offset needs to be overwritten when large - * strings are added. - * - * @param startIndex starting index - * @param length number of elements to be copied - * @param target target vector - */ - private void splitAndTransferViewBufferAndDataBuffer( - int startIndex, int length, BaseVariableWidthViewVector target) { - if (length == 0) { - return; - } - - if (target.viewBuffer != null) { - target.viewBuffer.getReferenceManager().release(); - } - - // allocate target view buffer - target.viewBuffer = target.allocator.buffer(length * ELEMENT_SIZE); - - for (int i = startIndex; i < startIndex + length; i++) { - final int stringLength = getValueLength(i); - - // keeping track of writing index in the target view buffer - int writePosition = (i - startIndex) * ELEMENT_SIZE; - // keeping track of reading index in the source view buffer - int readPosition = i * ELEMENT_SIZE; - - // set length - target.viewBuffer.setInt(writePosition, stringLength); - - if (stringLength <= INLINE_SIZE) { - // handle inline buffer - writePosition += LENGTH_WIDTH; - readPosition += LENGTH_WIDTH; - // set data by copying the required portion from the source buffer - target.viewBuffer.setBytes(writePosition, viewBuffer, readPosition, stringLength); - } else { - // handle non-inline buffer - final int readBufIndex = - viewBuffer.getInt(((long) i * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); - final int readBufOffset = - viewBuffer.getInt( - ((long) i * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); - final ArrowBuf dataBuf = dataBuffers.get(readBufIndex); - - // allocate data buffer - ArrowBuf currentDataBuf = target.allocateOrGetLastDataBuffer(stringLength); - final long currentOffSet = currentDataBuf.writerIndex(); - - writePosition += LENGTH_WIDTH; - readPosition += LENGTH_WIDTH; - // set prefix - target.viewBuffer.setBytes(writePosition, viewBuffer, readPosition, PREFIX_WIDTH); - writePosition += PREFIX_WIDTH; - // set buf id - target.viewBuffer.setInt(writePosition, target.dataBuffers.size() - 1); - writePosition += BUF_INDEX_WIDTH; - // set offset - target.viewBuffer.setInt(writePosition, (int) currentOffSet); - - currentDataBuf.setBytes(currentOffSet, dataBuf, readBufOffset, stringLength); - currentDataBuf.writerIndex(currentOffSet + stringLength); - } - } - } - - /*----------------------------------------------------------------* - | | - | common getters and setters | - | | - *----------------------------------------------------------------*/ - - /** - * Get the number of elements that are null in the vector. - * - * @return the number of null elements. - */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** - * Check if the given index is within the current value capacity of the vector. - * - * @param index position to check - * @return true if the index is within the current value capacity - */ - public boolean isSafe(int index) { - return index < getValueCapacity(); - } - - /** - * Check if an element at given index is null. - * - * @param index position of an element - * @return true if an element at given index is null - */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** - * Same as {@link #isNull(int)}. - * - * @param index position of an element - * @return 1 if element at given index is not null, 0 otherwise - */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Get the value count of vector. This will always be zero unless setValueCount(int) has been - * called prior to calling this. - * - * @return valueCount for the vector - */ - @Override - public int getValueCount() { - return valueCount; - } - - /** - * Sets the value count for the vector. - * - * @param valueCount value count - */ - @Override - public void setValueCount(int valueCount) { - assert valueCount >= 0; - this.valueCount = valueCount; - while (valueCount > getValueCapacity()) { - reallocViewBuffer(); - reallocValidityBuffer(); - } - lastSet = valueCount - 1; - setReaderAndWriterIndex(); - } - - /** - * Create holes in the vector upto the given index (exclusive). Holes will be created from the - * current last-set position in the vector. - * - * @param index target index - */ - @Override - public void fillEmpties(int index) { - handleSafe(index, EMPTY_BYTE_ARRAY.length); - lastSet = index - 1; - } - - /** - * Set the index of the last non-null element in the vector. It is important to call this method - * with appropriate value before calling {@link #setValueCount(int)}. - * - * @param value desired index of last non-null element. - */ - @Override - public void setLastSet(int value) { - lastSet = value; - } - - /** - * Get the index of the last non-null element in the vector. - * - * @return index of the last non-null element - */ - @Override - public int getLastSet() { - return lastSet; - } - - /** - * Mark the particular position in the vector as non-null. - * - * @param index position of the element. - */ - @Override - public void setIndexDefined(int index) { - // We need to check and reallocate the validity buffer - while (index >= getValueCapacity()) { - reallocValidityBuffer(); - } - BitVectorHelper.setBit(validityBuffer, index); - } - - /** - * Sets the value length for an element. - * - * @param index position of the element to set - * @param length length of the element - */ - @Override - public void setValueLengthSafe(int index, int length) { - assert index >= 0; - handleSafe(index, length); - lastSet = index; - } - - /** - * Get the length of the element at specified index. - * - * @param index position of an element to get - * @return greater than length 0 for a non-null element, 0 otherwise - */ - @Override - public int getValueLength(int index) { - assert index >= 0; - if (index < 0 || index >= viewBuffer.capacity() / ELEMENT_SIZE) { - throw new IndexOutOfBoundsException("Index out of bounds: " + index); - } - if (isSet(index) == 0) { - return 0; - } - return viewBuffer.getInt(((long) index * ELEMENT_SIZE)); - } - - /** - * Set the variable length element at the specified index to the supplied byte array. This is same - * as using {@link #set(int, byte[], int, int)} with start as Zero and length as #value.length - * - * @param index position of the element to set - * @param value array of bytes to write - */ - public void set(int index, byte[] value) { - assert index >= 0; - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, 0, value.length); - lastSet = index; - } - - /** - * Same as {@link #set(int, byte[])} except that it handles the case where index and length of a - * new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value array of bytes to write - */ - @Override - public void setSafe(int index, byte[] value) { - assert index >= 0; - // check if the current index can be populated - handleSafe(index, value.length); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, 0, value.length); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the supplied byte array. - * - * @param index position of the element to set - * @param value array of bytes to write - * @param start start index in an array of bytes - * @param length length of data in an array of bytes - */ - public void set(int index, byte[] value, int start, int length) { - assert index >= 0; - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, byte[], int, int)} except that it handles the case where index and - * length of a new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value array of bytes to write - * @param start start index in an array of bytes - * @param length length of data in an array of bytes - */ - public void setSafe(int index, byte[] value, int start, int length) { - assert index >= 0; - handleSafe(index, length); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value, start, length); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the content in supplied ByteBuffer. - * - * @param index position of the element to set - * @param value ByteBuffer with data - * @param start start index in ByteBuffer - * @param length length of data in ByteBuffer - */ - public void set(int index, ByteBuffer value, int start, int length) { - assert index >= 0; - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value.array(), start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the case where index and - * length of a new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param value ByteBuffer with data - * @param start start index in ByteBuffer - * @param length length of data in ByteBuffer - */ - public void setSafe(int index, ByteBuffer value, int start, int length) { - assert index >= 0; - handleSafe(index, length); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, value.array(), start, length); - lastSet = index; - } - - /** - * Set the element at the given index to null. - * - * @param index position of an element - */ - @Override - public void setNull(int index) { - handleSafe(index, 0); - BitVectorHelper.unsetBit(validityBuffer, index); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet Zero for NULL value, 1 otherwise - * @param start start position of data in buffer - * @param end end position of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void set(int index, int isSet, int start, int end, ArrowBuf buffer) { - assert index >= 0; - final int dataLength = end - start; - BitVectorHelper.setValidityBit(validityBuffer, index, isSet); - setBytes(index, buffer, start, dataLength); - lastSet = index; - } - - /** - * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case when index - * is greater than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet Zero for NULL value, 1 otherwise - * @param start start position of data in buffer - * @param end end position of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) { - assert index >= 0; - final int dataLength = end - start; - handleSafe(index, dataLength); - BitVectorHelper.setValidityBit(validityBuffer, index, isSet); - setBytes(index, buffer, start, dataLength); - lastSet = index; - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param start start position of data in buffer - * @param length length of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void set(int index, int start, int length, ArrowBuf buffer) { - assert index >= 0; - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, buffer, start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case when index - * is greater than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param start start position of data in buffer - * @param length length of data in buffer - * @param buffer data buffer containing the variable width element to be stored in the vector - */ - public void setSafe(int index, int start, int length, ArrowBuf buffer) { - assert index >= 0; - handleSafe(index, length); - BitVectorHelper.setBit(validityBuffer, index); - setBytes(index, buffer, start, length); - lastSet = index; - } - - /*----------------------------------------------------------------* - | | - | helper methods for setters | - | | - *----------------------------------------------------------------*/ - - protected ArrowBuf allocateOrGetLastDataBuffer(int length) { - long dataBufferSize; - if (initialDataBufferSize > 0) { - dataBufferSize = Math.max(initialDataBufferSize, length); - } else { - dataBufferSize = Math.max(lastValueAllocationSizeInBytes, length); - } - - if (dataBuffers.isEmpty() - || dataBuffers.get(dataBuffers.size() - 1).capacity() - - dataBuffers.get(dataBuffers.size() - 1).writerIndex() - < length) { - ArrowBuf newBuf = allocator.buffer(dataBufferSize); - dataBuffers.add(newBuf); - } - - return dataBuffers.get(dataBuffers.size() - 1); - } - - /** - * This method is used to create a view buffer for a variable width vector. It handles both inline - * and data buffers. - * - *

    If the length of the value is less than or equal to {@link #INLINE_SIZE}, the value is - * stored in the valueBuffer directly as an inline buffer. The valueBuffer stores the length of - * the value followed by the value itself. If the length of the value is greater than {@link - * #INLINE_SIZE}, a new buffer is allocated and added to dataBuffers to hold the value. The - * viewBuffer in this case stores the length of the value, a prefix of the value, the index of the - * new buffer in dataBuffers, and the offset of the value in the new buffer. - * - * @param index The index at which the new value will be inserted. - * @param value The byte array that contains the data to be inserted. - * @param start The start index in the byte array from where the data for the new value begins. - * @param length The length of the data in the byte array that belongs to the new value. - */ - protected final void setBytes(int index, byte[] value, int start, int length) { - int writePosition = index * ELEMENT_SIZE; - - // to clear the memory segment of view being written to - // this is helpful in case of overwriting the value - viewBuffer.setZero(writePosition, ELEMENT_SIZE); - - if (length <= INLINE_SIZE) { - // allocate inline buffer - // set length - viewBuffer.setInt(writePosition, length); - writePosition += LENGTH_WIDTH; - // set data - viewBuffer.setBytes(writePosition, value, start, length); - } else { - // allocate data buffer - ArrowBuf currentBuf = allocateOrGetLastDataBuffer(length); - - // set length - viewBuffer.setInt(writePosition, length); - writePosition += LENGTH_WIDTH; - // set prefix - viewBuffer.setBytes(writePosition, value, start, PREFIX_WIDTH); - writePosition += PREFIX_WIDTH; - // set buf id - viewBuffer.setInt(writePosition, dataBuffers.size() - 1); - writePosition += BUF_INDEX_WIDTH; - // set offset - viewBuffer.setInt(writePosition, (int) currentBuf.writerIndex()); - - currentBuf.setBytes(currentBuf.writerIndex(), value, start, length); - currentBuf.writerIndex(currentBuf.writerIndex() + length); - } - } - - /** - * This method is used to create a view buffer for a variable width vector. Similar to {@link - * #setBytes(int index, byte[] value, int start, int length)} - * - * @param index The index at which the new value will be inserted. - * @param valueBuf The byte array that contains the data to be inserted. - * @param start The start index in the byte array from where the data for the new value begins. - * @param length The length of the data in the byte array that belongs to the new value. - */ - protected final void setBytes(int index, ArrowBuf valueBuf, int start, int length) { - int writePosition = index * ELEMENT_SIZE; - - // to clear the memory segment of view being written to - // this is helpful in case of overwriting the value - viewBuffer.setZero(writePosition, ELEMENT_SIZE); - - if (length <= INLINE_SIZE) { - // allocate inline buffer - // set length - viewBuffer.setInt(writePosition, length); - writePosition += LENGTH_WIDTH; - // set data - viewBuffer.setBytes(writePosition, valueBuf, start, length); - } else { - // allocate data buffer - ArrowBuf currentBuf = allocateOrGetLastDataBuffer(length); - - // set length - viewBuffer.setInt(writePosition, length); - writePosition += LENGTH_WIDTH; - // set prefix - viewBuffer.setBytes(writePosition, valueBuf, start, PREFIX_WIDTH); - writePosition += PREFIX_WIDTH; - // set buf id - viewBuffer.setInt(writePosition, dataBuffers.size() - 1); - writePosition += BUF_INDEX_WIDTH; - // set offset - viewBuffer.setInt(writePosition, (int) currentBuf.writerIndex()); - - currentBuf.setBytes(currentBuf.writerIndex(), valueBuf, start, length); - currentBuf.writerIndex(currentBuf.writerIndex() + length); - } - } - - /** - * Get the total length of the elements up to the given index. - * - * @param index The index of the element in the vector. - * @return The total length up to the element at the given index. - */ - public final int getTotalValueLengthUpToIndex(int index) { - int totalLength = 0; - for (int i = 0; i < index - 1; i++) { - totalLength += getValueLength(i); - } - return totalLength; - } - - protected final void handleSafe(int index, int dataLength) { - final long targetCapacity = roundUpToMultipleOf16((long) index * ELEMENT_SIZE + dataLength); - if (viewBuffer.capacity() < targetCapacity) { - reallocViewBuffer(targetCapacity); - } - - while (index >= getValidityBufferValueCapacity()) { - reallocValidityBuffer(); - } - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(getMinorType() == from.getMinorType()); - if (from.isNull(fromIndex)) { - BitVectorHelper.unsetBit(validityBuffer, thisIndex); - } else { - final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE); - copyFromNotNull(fromIndex, thisIndex, from, viewLength); - } - lastSet = thisIndex; - } - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(getMinorType() == from.getMinorType()); - if (from.isNull(fromIndex)) { - handleSafe(thisIndex, 0); - BitVectorHelper.unsetBit(validityBuffer, thisIndex); - } else { - final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE); - handleSafe(thisIndex, viewLength); - copyFromNotNull(fromIndex, thisIndex, from, viewLength); - } - lastSet = thisIndex; - } - - private void copyFromNotNull(int fromIndex, int thisIndex, ValueVector from, int viewLength) { - BitVectorHelper.setBit(validityBuffer, thisIndex); - final int start = thisIndex * ELEMENT_SIZE; - final int copyStart = fromIndex * ELEMENT_SIZE; - if (viewLength > INLINE_SIZE) { - final int bufIndex = - from.getDataBuffer() - .getInt(((long) fromIndex * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); - final int dataOffset = - from.getDataBuffer() - .getInt( - ((long) fromIndex * ELEMENT_SIZE) - + LENGTH_WIDTH - + PREFIX_WIDTH - + BUF_INDEX_WIDTH); - final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex); - final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength); - - viewBuffer.setBytes(start, from.getDataBuffer(), copyStart, LENGTH_WIDTH + PREFIX_WIDTH); - int writePosition = start + LENGTH_WIDTH + PREFIX_WIDTH; - // set buf id - viewBuffer.setInt(writePosition, dataBuffers.size() - 1); - writePosition += BUF_INDEX_WIDTH; - // set offset - viewBuffer.setInt(writePosition, (int) thisDataBuf.writerIndex()); - - thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength); - thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength); - } else { - from.getDataBuffer().getBytes(copyStart, viewBuffer, start, ELEMENT_SIZE); - } - } - - @Override - public ArrowBufPointer getDataPointer(int index) { - return getDataPointer(index, new ArrowBufPointer()); - } - - @Override - public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { - if (isNull(index)) { - reuse.set(null, 0, 0); - } else { - int length = getValueLength(index); - if (length < INLINE_SIZE) { - int start = index * ELEMENT_SIZE + LENGTH_WIDTH; - reuse.set(viewBuffer, start, length); - } else { - final int bufIndex = - viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); - ArrowBuf dataBuf = dataBuffers.get(bufIndex); - reuse.set(dataBuf, 0, length); - } - } - return reuse; - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isNull(index)) { - return ArrowBufPointer.NULL_HASH_CODE; - } - final int length = getValueLength(index); - if (length < INLINE_SIZE) { - int start = index * ELEMENT_SIZE + LENGTH_WIDTH; - return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, start + length); - } else { - final int bufIndex = - viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); - final int dataOffset = - viewBuffer.getInt( - ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); - ArrowBuf dataBuf = dataBuffers.get(bufIndex); - return ByteFunctionHelpers.hash(hasher, dataBuf, dataOffset, dataOffset + length); - } - } - - /** - * Retrieves the data of a variable-width element at a given index in the vector. - * - *

    If the length of the data is greater than {@link #INLINE_SIZE}, the data is stored in an - * inline buffer. The method retrieves the buffer index and data offset from the viewBuffer, and - * then retrieves the data from the corresponding buffer in the dataBuffers list. - * - *

    If the length of the data is less than or equal to {@link #INLINE_SIZE}, the data is stored - * directly in the viewBuffer. The method retrieves the data directly from the viewBuffer. - * - * @param index position of the element in the vector - * @return byte array containing the data of the element - */ - protected byte[] getData(int index) { - final int dataLength = getValueLength(index); - byte[] result = new byte[dataLength]; - if (dataLength > INLINE_SIZE) { - // data is in the data buffer - // get buffer index - final int bufferIndex = - viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); - // get data offset - final int dataOffset = - viewBuffer.getInt( - ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); - dataBuffers.get(bufferIndex).getBytes(dataOffset, result, 0, dataLength); - } else { - // data is in the view buffer - viewBuffer.getBytes((long) index * ELEMENT_SIZE + BUF_INDEX_WIDTH, result, 0, dataLength); - } - return result; - } - - protected void getData(int index, ReusableBuffer buffer) { - final int dataLength = getValueLength(index); - if (dataLength > INLINE_SIZE) { - // data is in the data buffer - // get buffer index - final int bufferIndex = - viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); - // get data offset - final int dataOffset = - viewBuffer.getInt( - ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); - ArrowBuf dataBuf = dataBuffers.get(bufferIndex); - buffer.set(dataBuf, dataOffset, dataLength); - } else { - // data is in the value buffer - buffer.set(viewBuffer, ((long) index * ELEMENT_SIZE) + BUF_INDEX_WIDTH, dataLength); - } - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - /** - * Retrieves the export buffer count for the C Data Interface. - * - *

    For Variadic types, an additional buffer is kept to store the size of each variadic buffer - * since that information cannot be retrieved in the C Data import. - * - *

    In the C Data Interface, the binary view import expects at least three buffers. The variadic - * size buffer is merely allocated to determine the number of elements per each variadic buffer, - * and it is not part of the imported data. - * - *

    The count is set to 3 + dataBuffers.size(). Three is formed by validity, view, and variadic - * size buffer. - * - * @return the number of buffers to be exported - */ - @Override - public int getExportedCDataBufferCount() { - return 3 + dataBuffers.size(); - } - - /** - * Get the data buffer of the vector. Note that an additional buffer is appended to store the size - * of each variadic buffer's size. - * - * @param buffers list of buffers to be exported - * @param buffersPtr buffer to store the pointers to the exported buffers - * @param nullValue null value - */ - @Override - public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); - exportBuffer(viewBuffer, buffers, buffersPtr, nullValue, true); - - // allocating additional space to keep the number of variadic buffers - ArrowBuf variadicSizeBuffer = allocator.buffer((long) Long.BYTES * dataBuffers.size()); - // variadicSizeBuffer.setZero(0, variadicSizeBuffer.capacity()); - // export data buffers - for (int i = 0; i < dataBuffers.size(); i++) { - ArrowBuf dataBuf = dataBuffers.get(i); - // calculate sizes for variadic size buffer - variadicSizeBuffer.setLong((long) i * Long.BYTES, dataBuf.capacity()); - exportBuffer(dataBuf, buffers, buffersPtr, nullValue, true); - } - // export variadic size buffer - exportBuffer(variadicSizeBuffer, buffers, buffersPtr, nullValue, false); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java deleted file mode 100644 index 21fa39af8bf2a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.BigIntReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.BigIntHolder; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * BigIntVector implements a fixed width vector (8 bytes) of integer values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class BigIntVector extends BaseFixedWidthVector - implements BaseIntVector, ValueIterableVector { - public static final byte TYPE_WIDTH = 8; - - /** - * Instantiate a BigIntVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public BigIntVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.BIGINT.getType()), allocator); - } - - /** - * Instantiate a BigIntVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public BigIntVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a BigIntVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public BigIntVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new BigIntReaderImpl(BigIntVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.BIGINT; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public long get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableBigIntHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Long getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, long value) { - valueBuffer.setLong((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, long value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableBigIntHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, BigIntHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableBigIntHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableBigIntHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, BigIntHolder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, BigIntHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, long value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, long)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, long value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static long get(final ArrowBuf buffer, final int index) { - return buffer.getLong((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((BigIntVector) to); - } - - @Override - public void setWithPossibleTruncate(int index, long value) { - this.setSafe(index, value); - } - - @Override - public void setUnsafeWithPossibleTruncate(int index, long value) { - this.set(index, value); - } - - @Override - public long getValueAsLong(int index) { - return this.get(index); - } - - private class TransferImpl implements TransferPair { - BigIntVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new BigIntVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new BigIntVector(field, allocator); - } - - public TransferImpl(BigIntVector to) { - this.to = to; - } - - @Override - public BigIntVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, BigIntVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java deleted file mode 100644 index f8e3342625823..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java +++ /dev/null @@ -1,596 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.complex.impl.BitReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.BitHolder; -import org.apache.arrow.vector.holders.NullableBitHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * BitVector implements a fixed width (1 bit) vector of boolean values which could be null. Each - * value in the vector corresponds to a single bit in the underlying data stream backing the vector. - */ -public final class BitVector extends BaseFixedWidthVector implements ValueIterableVector { - - private static final int HASH_CODE_FOR_ZERO = 17; - - private static final int HASH_CODE_FOR_ONE = 19; - - /** - * Instantiate a BitVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public BitVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.BIT.getType()), allocator); - } - - /** - * Instantiate a BitVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public BitVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a BitVector. This doesn't allocate any memory for the data in vector. - * - * @param field the Field materialized by this vector - * @param allocator allocator for memory management. - */ - public BitVector(Field field, BufferAllocator allocator) { - super(field, allocator, 0); - } - - @Override - protected FieldReader getReaderImpl() { - return new BitReaderImpl(BitVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.BIT; - } - - /** - * Sets the desired value capacity for the vector. This function doesn't allocate any memory for - * the vector. - * - * @param valueCount desired number of elements in the vector - */ - @Override - public void setInitialCapacity(int valueCount) { - final int size = getValidityBufferSizeFromCount(valueCount); - if (size * 2L > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } - lastValueCapacity = valueCount; - } - - @Override - protected int getValueBufferValueCapacity() { - return capAtMaxInt(valueBuffer.capacity() * 8); - } - - /** - * Get the potential buffer size for a particular number of records. - * - * @param count desired number of elements in the vector - * @return estimated size of underlying buffers if the vector holds a given number of elements - */ - @Override - public int getBufferSizeFor(final int count) { - if (count == 0) { - return 0; - } - return 2 * getValidityBufferSizeFromCount(count); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - return getBufferSizeFor(valueCount); - } - - /** - * Slice this vector at desired index and length and transfer the corresponding data to the target - * vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - * @param target destination vector - */ - @Override - public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector target) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - compareTypes(target, "splitAndTransferTo"); - target.clear(); - target.validityBuffer = - splitAndTransferBuffer(startIndex, length, validityBuffer, target.validityBuffer); - target.valueBuffer = - splitAndTransferBuffer(startIndex, length, valueBuffer, target.valueBuffer); - target.refreshValueCapacity(); - - target.setValueCount(length); - } - - private ArrowBuf splitAndTransferBuffer( - int startIndex, int length, ArrowBuf sourceBuffer, ArrowBuf destBuffer) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - /* slice */ - if (destBuffer != null) { - destBuffer.getReferenceManager().release(); - } - destBuffer = sourceBuffer.slice(firstByteSource, byteSizeTarget); - destBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - destBuffer = allocator.buffer(byteSizeTarget); - destBuffer.readerIndex(0); - destBuffer.setZero(0, destBuffer.capacity()); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(sourceBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte(sourceBuffer, firstByteSource + i + 1, offset); - - destBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - sourceBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - sourceBuffer, firstByteSource + byteSizeTarget, offset); - - destBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - sourceBuffer, firstByteSource + byteSizeTarget - 1, offset); - destBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - - return destBuffer; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - private int getBit(int index) { - final int byteIndex = index >> 3; - final byte b = valueBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public int get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return getBit(index); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableBitHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = getBit(index); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Boolean getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getBit(index) != 0; - } - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - boolean fromIsSet = BitVectorHelper.get(from.getValidityBuffer(), fromIndex) != 0; - if (fromIsSet) { - BitVectorHelper.setBit(validityBuffer, thisIndex); - BitVectorHelper.setValidityBit(valueBuffer, thisIndex, ((BitVector) from).getBit(fromIndex)); - } else { - BitVectorHelper.unsetBit(validityBuffer, thisIndex); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - if (value != 0) { - BitVectorHelper.setBit(valueBuffer, index); - } else { - BitVectorHelper.unsetBit(valueBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableBitHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - if (holder.value != 0) { - BitVectorHelper.setBit(valueBuffer, index); - } else { - BitVectorHelper.unsetBit(valueBuffer, index); - } - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, BitHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - if (holder.value != 0) { - BitVectorHelper.setBit(valueBuffer, index); - } else { - BitVectorHelper.unsetBit(valueBuffer, index); - } - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableBitHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableBitHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, BitHolder)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, BitHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, int value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, int value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Set the element at the given index to one. - * - * @param index position of element - */ - public void setToOne(int index) { - BitVectorHelper.setBit(validityBuffer, index); - BitVectorHelper.setBit(valueBuffer, index); - } - - /** - * Same as {@link #setToOne(int)} except that it handles the case when index is greater than or - * equal to current value capacity of the vector. - * - * @param index position of the element - */ - public void setSafeToOne(int index) { - handleSafe(index); - setToOne(index); - } - - @Override - public ArrowBufPointer getDataPointer(int index) { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { - throw new UnsupportedOperationException(); - } - - @Override - public int hashCode(int index) { - if (isNull(index)) { - return ArrowBufPointer.NULL_HASH_CODE; - } else { - if (get(index) == 0) { - return HASH_CODE_FOR_ZERO; - } else { - return HASH_CODE_FOR_ONE; - } - } - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return hashCode(index); - } - - /** - * Set count bits to 1 in data starting at firstBitIndex. - * - * @param firstBitIndex the index of the first bit to set - * @param count the number of bits to set - */ - public void setRangeToOne(int firstBitIndex, int count) { - int startByteIndex = BitVectorHelper.byteIndex(firstBitIndex); - final int lastBitIndex = firstBitIndex + count; - final int endByteIndex = BitVectorHelper.byteIndex(lastBitIndex); - final int startByteBitIndex = BitVectorHelper.bitIndex(firstBitIndex); - final int endBytebitIndex = BitVectorHelper.bitIndex(lastBitIndex); - if (count < 8 && startByteIndex == endByteIndex) { - // handles the case where we don't have a first and a last byte - byte bitMask = 0; - for (int i = startByteBitIndex; i < endBytebitIndex; ++i) { - bitMask |= (byte) (1L << i); - } - BitVectorHelper.setBitMaskedByte(validityBuffer, startByteIndex, bitMask); - BitVectorHelper.setBitMaskedByte(valueBuffer, startByteIndex, bitMask); - } else { - // fill in first byte (if it's not full) - if (startByteBitIndex != 0) { - final byte bitMask = (byte) (0xFFL << startByteBitIndex); - BitVectorHelper.setBitMaskedByte(validityBuffer, startByteIndex, bitMask); - BitVectorHelper.setBitMaskedByte(valueBuffer, startByteIndex, bitMask); - ++startByteIndex; - } - - // fill in one full byte at a time - validityBuffer.setOne(startByteIndex, endByteIndex - startByteIndex); - valueBuffer.setOne(startByteIndex, endByteIndex - startByteIndex); - - // fill in the last byte (if it's not full) - if (endBytebitIndex != 0) { - final int byteIndex = BitVectorHelper.byteIndex(lastBitIndex - endBytebitIndex); - final byte bitMask = (byte) (0xFFL >>> ((8 - endBytebitIndex) & 7)); - BitVectorHelper.setBitMaskedByte(validityBuffer, byteIndex, bitMask); - BitVectorHelper.setBitMaskedByte(valueBuffer, byteIndex, bitMask); - } - } - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((BitVector) to); - } - - private class TransferImpl implements TransferPair { - BitVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new BitVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new BitVector(field, allocator); - } - - public TransferImpl(BitVector to) { - this.to = to; - } - - @Override - public BitVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, BitVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java deleted file mode 100644 index 0ac56691a6f6c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BoundsChecking; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; - -/** - * Helper class for performing generic operations on a bit vector buffer. External use of this class - * is not recommended. - */ -public class BitVectorHelper { - - private BitVectorHelper() {} - - /** Get the index of byte corresponding to bit index in validity buffer. */ - public static long byteIndex(long absoluteBitIndex) { - return absoluteBitIndex >> 3; - } - - /** Get the relative index of bit within the byte in validity buffer. */ - public static int bitIndex(long absoluteBitIndex) { - return checkedCastToInt(absoluteBitIndex & 7); - } - - /** Get the index of byte corresponding to bit index in validity buffer. */ - public static int byteIndex(int absoluteBitIndex) { - return absoluteBitIndex >> 3; - } - - /** Get the relative index of bit within the byte in validity buffer. */ - public static int bitIndex(int absoluteBitIndex) { - return absoluteBitIndex & 7; - } - - /** - * Set the bit at provided index to 1. - * - * @param validityBuffer validity buffer of the vector - * @param index index to be set - */ - public static void setBit(ArrowBuf validityBuffer, long index) { - // it can be observed that some logic is duplicate of the logic in setValidityBit. - // this is because JIT cannot always remove the if branch in setValidityBit, - // so we give a dedicated implementation for setting bits. - final long byteIndex = byteIndex(index); - final int bitIndex = bitIndex(index); - - // the byte is promoted to an int, because according to Java specification, - // bytes will be promoted to ints automatically, upon expression evaluation. - // by promoting it manually, we avoid the unnecessary conversions. - int currentByte = validityBuffer.getByte(byteIndex); - final int bitMask = 1 << bitIndex; - currentByte |= bitMask; - validityBuffer.setByte(byteIndex, currentByte); - } - - /** - * Set the bit at provided index to 0. - * - * @param validityBuffer validity buffer of the vector - * @param index index to be set - */ - public static void unsetBit(ArrowBuf validityBuffer, int index) { - // it can be observed that some logic is duplicate of the logic in setValidityBit. - // this is because JIT cannot always remove the if branch in setValidityBit, - // so we give a dedicated implementation for unsetting bits. - final int byteIndex = byteIndex(index); - final int bitIndex = bitIndex(index); - - // the byte is promoted to an int, because according to Java specification, - // bytes will be promoted to ints automatically, upon expression evaluation. - // by promoting it manually, we avoid the unnecessary conversions. - int currentByte = validityBuffer.getByte(byteIndex); - final int bitMask = 1 << bitIndex; - currentByte &= ~bitMask; - validityBuffer.setByte(byteIndex, currentByte); - } - - /** - * Set the bit at a given index to provided value (1 or 0). - * - * @param validityBuffer validity buffer of the vector - * @param index index to be set - * @param value value to set - */ - public static void setValidityBit(ArrowBuf validityBuffer, int index, int value) { - final int byteIndex = byteIndex(index); - final int bitIndex = bitIndex(index); - - // the byte is promoted to an int, because according to Java specification, - // bytes will be promoted to ints automatically, upon expression evaluation. - // by promoting it manually, we avoid the unnecessary conversions. - int currentByte = validityBuffer.getByte(byteIndex); - final int bitMask = 1 << bitIndex; - if (value != 0) { - currentByte |= bitMask; - } else { - currentByte &= ~bitMask; - } - validityBuffer.setByte(byteIndex, currentByte); - } - - /** - * Set the bit at a given index to provided value (1 or 0). Internally takes care of allocating - * the buffer if the caller didn't do so. - * - * @param validityBuffer validity buffer of the vector - * @param allocator allocator for the buffer - * @param valueCount number of values to allocate/set - * @param index index to be set - * @param value value to set - * @return ArrowBuf - */ - public static ArrowBuf setValidityBit( - ArrowBuf validityBuffer, BufferAllocator allocator, int valueCount, int index, int value) { - if (validityBuffer == null) { - validityBuffer = allocator.buffer(getValidityBufferSize(valueCount)); - } - setValidityBit(validityBuffer, index, value); - if (index == (valueCount - 1)) { - validityBuffer.writerIndex(getValidityBufferSize(valueCount)); - } - - return validityBuffer; - } - - /** - * Check if a bit at a given index is set or not. - * - * @param buffer buffer to check - * @param index index of the buffer - * @return 1 if bit is set, 0 otherwise. - */ - public static int get(final ArrowBuf buffer, int index) { - final int byteIndex = index >> 3; - final byte b = buffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Compute the size of validity buffer required to manage a given number of elements in a vector. - * - * @param valueCount number of elements in the vector - * @return buffer size - */ - public static int getValidityBufferSize(int valueCount) { - return DataSizeRoundingUtil.divideBy8Ceil(valueCount); - } - - /** - * Given a validity buffer, find the number of bits that are not set. This is used to compute the - * number of null elements in a nullable vector. - * - * @param validityBuffer validity buffer of the vector - * @param valueCount number of values in the vector - * @return number of bits not set. - */ - public static int getNullCount(final ArrowBuf validityBuffer, final int valueCount) { - if (valueCount == 0) { - return 0; - } - int count = 0; - final int sizeInBytes = getValidityBufferSize(valueCount); - // If value count is not a multiple of 8, then calculate number of used bits in the last byte - final int remainder = valueCount % 8; - final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1; - - int index = 0; - while (index + 8 <= fullBytesCount) { - long longValue = validityBuffer.getLong(index); - count += Long.bitCount(longValue); - index += 8; - } - - if (index + 4 <= fullBytesCount) { - int intValue = validityBuffer.getInt(index); - count += Integer.bitCount(intValue); - index += 4; - } - - while (index < fullBytesCount) { - byte byteValue = validityBuffer.getByte(index); - count += Integer.bitCount(byteValue & 0xFF); - index += 1; - } - - // handling with the last bits - if (remainder != 0) { - byte byteValue = validityBuffer.getByte(sizeInBytes - 1); - - // making the remaining bits all 1s if it is not fully filled - byte mask = (byte) (0xFF << remainder); - byteValue = (byte) (byteValue | mask); - count += Integer.bitCount(byteValue & 0xFF); - } - - return 8 * sizeInBytes - count; - } - - /** - * Tests if all bits in a validity buffer are equal 0 or 1, according to the specified parameter. - * - * @param validityBuffer the validity buffer. - * @param valueCount the bit count. - * @param checkOneBits if set to true, the method checks if all bits are equal to 1; otherwise, it - * checks if all bits are equal to 0. - * @return true if all bits are 0 or 1 according to the parameter, and false otherwise. - */ - public static boolean checkAllBitsEqualTo( - final ArrowBuf validityBuffer, final int valueCount, final boolean checkOneBits) { - if (valueCount == 0) { - return true; - } - final int sizeInBytes = getValidityBufferSize(valueCount); - - // boundary check - validityBuffer.checkBytes(0, sizeInBytes); - - // If value count is not a multiple of 8, then calculate number of used bits in the last byte - final int remainder = valueCount % 8; - final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1; - - // the integer number to compare against - final int intToCompare = checkOneBits ? -1 : 0; - - int index = 0; - while (index + 8 <= fullBytesCount) { - long longValue = MemoryUtil.getLong(validityBuffer.memoryAddress() + index); - if (longValue != (long) intToCompare) { - return false; - } - index += 8; - } - - if (index + 4 <= fullBytesCount) { - int intValue = MemoryUtil.getInt(validityBuffer.memoryAddress() + index); - if (intValue != intToCompare) { - return false; - } - index += 4; - } - - while (index < fullBytesCount) { - byte byteValue = MemoryUtil.getByte(validityBuffer.memoryAddress() + index); - if (byteValue != (byte) intToCompare) { - return false; - } - index += 1; - } - - // handling with the last bits - if (remainder != 0) { - byte byteValue = MemoryUtil.getByte(validityBuffer.memoryAddress() + sizeInBytes - 1); - byte mask = (byte) ((1 << remainder) - 1); - byteValue = (byte) (byteValue & mask); - if (checkOneBits) { - if ((mask & byteValue) != mask) { - return false; - } - } else { - if (byteValue != (byte) 0) { - return false; - } - } - } - return true; - } - - /** Returns the byte at index from data right-shifted by offset. */ - public static byte getBitsFromCurrentByte( - final ArrowBuf data, final int index, final int offset) { - return (byte) ((data.getByte(index) & 0xFF) >>> offset); - } - - /** Returns the byte at index from left-shifted by (8 - offset). */ - public static byte getBitsFromNextByte(ArrowBuf data, int index, int offset) { - return (byte) ((data.getByte(index) << (8 - offset))); - } - - /** - * Returns a new buffer if the source validity buffer is either all null or all not-null, - * otherwise returns a buffer pointing to the same memory as source. - * - * @param fieldNode The fieldNode containing the null count - * @param sourceValidityBuffer The source validity buffer that will have its position copied if - * there is a mix of null and non-null values - * @param allocator The allocator to use for creating a new buffer if necessary. - * @return A new buffer that is either allocated or points to the same memory as - * sourceValidityBuffer. - */ - public static ArrowBuf loadValidityBuffer( - final ArrowFieldNode fieldNode, - final ArrowBuf sourceValidityBuffer, - final BufferAllocator allocator) { - final int valueCount = fieldNode.getLength(); - ArrowBuf newBuffer = null; - - // Create a new validity buffer iff both of the following are true: - // - validity buffer is not present, that is, it is either null or empty (in the case of - // IPC for instance). - // - values are either all NULLs or all non-NULLs - boolean isValidityBufferNull = - sourceValidityBuffer == null || sourceValidityBuffer.capacity() == 0; - if (isValidityBufferNull - && (fieldNode.getNullCount() == 0 || fieldNode.getNullCount() == valueCount)) { - newBuffer = allocator.buffer(getValidityBufferSize(valueCount)); - newBuffer.setZero(0, newBuffer.capacity()); - if (fieldNode.getNullCount() != 0) { - /* all NULLs */ - return newBuffer; - } - /* all non-NULLs */ - int fullBytesCount = valueCount / 8; - newBuffer.setOne(0, fullBytesCount); - int remainder = valueCount % 8; - if (remainder > 0) { - byte bitMask = (byte) (0xFFL >>> ((8 - remainder) & 7)); - newBuffer.setByte(fullBytesCount, bitMask); - } - } else { - /* mixed byte pattern -- create another ArrowBuf associated with the - * target allocator - */ - newBuffer = - sourceValidityBuffer.getReferenceManager().retain(sourceValidityBuffer, allocator); - } - - return newBuffer; - } - - /** - * Set the byte of the given index in the data buffer by applying a bit mask to the current byte - * at that index. - * - * @param data buffer to set - * @param byteIndex byteIndex within the buffer - * @param bitMask bit mask to be set - */ - static void setBitMaskedByte(ArrowBuf data, int byteIndex, byte bitMask) { - byte currentByte = data.getByte(byteIndex); - currentByte |= bitMask; - data.setByte(byteIndex, currentByte); - } - - /** - * Concat two validity buffers. - * - * @param input1 the first validity buffer. - * @param numBits1 the number of bits in the first validity buffer. - * @param input2 the second validity buffer. - * @param numBits2 the number of bits in the second validity buffer. - * @param output the output validity buffer. It can be the same one as the first input. The caller - * must make sure the output buffer has enough capacity. - */ - public static void concatBits( - ArrowBuf input1, int numBits1, ArrowBuf input2, int numBits2, ArrowBuf output) { - int numBytes1 = DataSizeRoundingUtil.divideBy8Ceil(numBits1); - int numBytes2 = DataSizeRoundingUtil.divideBy8Ceil(numBits2); - int numBytesOut = DataSizeRoundingUtil.divideBy8Ceil(numBits1 + numBits2); - - if (BoundsChecking.BOUNDS_CHECKING_ENABLED) { - output.checkBytes(0, numBytesOut); - } - - // copy the first bit set - if (input1 != output) { - MemoryUtil.copyMemory(input1.memoryAddress(), output.memoryAddress(), numBytes1); - } - - if (bitIndex(numBits1) == 0) { - // The number of bits for the first bit set is a multiple of 8, so the boundary is at byte - // boundary. - // For this case, we have a shortcut to copy all bytes from the second set after the byte - // boundary. - MemoryUtil.copyMemory(input2.memoryAddress(), output.memoryAddress() + numBytes1, numBytes2); - return; - } - - // the number of bits to fill a full byte after the first input is processed - int numBitsToFill = 8 - bitIndex(numBits1); - - // mask to clear high bits - int mask = (1 << (8 - numBitsToFill)) - 1; - - int numFullBytes = numBits2 / 8; - - int prevByte = output.getByte(numBytes1 - 1) & mask; - for (int i = 0; i < numFullBytes; i++) { - int curByte = input2.getByte(i) & 0xff; - - // first fill the bits to a full byte - int byteToFill = (curByte << (8 - numBitsToFill)) & 0xff; - output.setByte(numBytes1 + i - 1, byteToFill | prevByte); - - // fill remaining bits in the current byte - // note that it is also the previous byte for the next iteration - prevByte = curByte >>> numBitsToFill; - } - - int lastOutputByte = prevByte; - - // the number of extra bits for the second input, relative to full bytes - int numTrailingBits = bitIndex(numBits2); - - if (numTrailingBits == 0) { - output.setByte(numBytes1 + numFullBytes - 1, lastOutputByte); - return; - } - - // process remaining bits from input2 - int remByte = input2.getByte(numBytes2 - 1) & 0xff; - - int byteToFill = remByte << (8 - numBitsToFill); - lastOutputByte |= byteToFill; - - output.setByte(numBytes1 + numFullBytes - 1, lastOutputByte); - - if (numTrailingBits > numBitsToFill) { - // clear all bits for the last byte before writing - output.setByte(numBytes1 + numFullBytes, 0); - - // some remaining bits cannot be filled in the previous byte - int leftByte = remByte >>> numBitsToFill; - output.setByte(numBytes1 + numFullBytes, leftByte); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java deleted file mode 100644 index 9af5e5397aecc..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; - -/** Content is backed by a buffer and can be loaded/unloaded. */ -public interface BufferBacked { - - void load(ArrowFieldNode fieldNode, ArrowBuf data); - - ArrowBuf unLoad(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java deleted file mode 100644 index 6c6fd919aba43..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.util.Preconditions; - -/** - * Metadata class that captures the "type" of an Arrow buffer. (e.g. data buffers, offset buffers - * for variable width types and validity buffers). - */ -public class BufferLayout { - - /** - * Enumeration of the different logical types a buffer can have. Data buffer is common to most of - * the layouts. Offset buffer is used for variable width types. Validity buffer is used for - * nullable types. Type buffer is used for Union types. Size buffer is used for ListView and - * LargeListView types. - */ - public enum BufferType { - DATA("DATA"), - OFFSET("OFFSET"), - VALIDITY("VALIDITY"), - TYPE("TYPE_ID"), - SIZE("SIZE"), - VIEWS("VIEWS"), - VARIADIC_DATA_BUFFERS("VARIADIC_DATA_BUFFERS"); - - private final String name; - - BufferType(String name) { - this.name = name; - } - - public String getName() { - return name; - } - } - - private static final BufferLayout VALIDITY_BUFFER = new BufferLayout(BufferType.VALIDITY, 1); - private static final BufferLayout OFFSET_BUFFER = new BufferLayout(BufferType.OFFSET, 32); - private static final BufferLayout LARGE_OFFSET_BUFFER = new BufferLayout(BufferType.OFFSET, 64); - private static final BufferLayout TYPE_BUFFER = new BufferLayout(BufferType.TYPE, 32); - private static final BufferLayout BIT_BUFFER = new BufferLayout(BufferType.DATA, 1); - private static final BufferLayout VALUES_256 = new BufferLayout(BufferType.DATA, 256); - private static final BufferLayout VALUES_128 = new BufferLayout(BufferType.DATA, 128); - private static final BufferLayout VALUES_64 = new BufferLayout(BufferType.DATA, 64); - private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32); - private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16); - private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8); - private static final BufferLayout LARGE_SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 64); - private static final BufferLayout SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 32); - private static final BufferLayout VIEW_BUFFER = new BufferLayout(BufferType.VIEWS, 16); - - public static BufferLayout typeBuffer() { - return TYPE_BUFFER; - } - - public static BufferLayout offsetBuffer() { - return OFFSET_BUFFER; - } - - public static BufferLayout largeOffsetBuffer() { - return LARGE_OFFSET_BUFFER; - } - - public static BufferLayout sizeBuffer() { - return SIZE_BUFFER; - } - - public static BufferLayout largeSizeBuffer() { - return LARGE_SIZE_BUFFER; - } - - /** - * Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128 - * inclusive. - */ - public static BufferLayout dataBuffer(int typeBitWidth) { - switch (typeBitWidth) { - case 8: - return VALUES_8; - case 16: - return VALUES_16; - case 32: - return VALUES_32; - case 64: - return VALUES_64; - case 128: - return VALUES_128; - case 256: - return VALUES_256; - default: - throw new IllegalArgumentException("only 8, 16, 32, 64, 128, or 256 bits supported"); - } - } - - public static BufferLayout booleanVector() { - return BIT_BUFFER; - } - - public static BufferLayout validityVector() { - return VALIDITY_BUFFER; - } - - public static BufferLayout byteVector() { - return dataBuffer(8); - } - - public static BufferLayout viewVector() { - return VIEW_BUFFER; - } - - private final short typeBitWidth; - - private final BufferType type; - - BufferLayout(BufferType type, int typeBitWidth) { - super(); - this.type = Preconditions.checkNotNull(type); - this.typeBitWidth = (short) typeBitWidth; - if (typeBitWidth <= 0) { - throw new IllegalArgumentException("bitWidth invalid: " + typeBitWidth); - } - } - - public int getTypeBitWidth() { - return typeBitWidth; - } - - public BufferType getType() { - return type; - } - - @Override - public String toString() { - return String.format("%s(%s)", type, typeBitWidth); - } - - @Override - public int hashCode() { - return 31 * (31 + type.hashCode()) + typeBitWidth; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof BufferLayout)) { - return false; - } - BufferLayout other = (BufferLayout) obj; - return type.equals(other.type) && (typeBitWidth == other.typeBitWidth); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java deleted file mode 100644 index 68e1e79bd8383..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.DateDayReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.DateDayHolder; -import org.apache.arrow.vector.holders.NullableDateDayHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * DateDayVector implements a fixed width (4 bytes) vector of date values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class DateDayVector extends BaseFixedWidthVector - implements ValueIterableVector { - - public static final byte TYPE_WIDTH = 4; - - /** - * Instantiate a DateDayVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public DateDayVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.DATEDAY.getType()), allocator); - } - - /** - * Instantiate a DateDayVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public DateDayVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a DateDayVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public DateDayVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new DateDayReaderImpl(DateDayVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.DATEDAY; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public int get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableDateDayHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Integer getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setInt((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableDateDayHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, DateDayHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableDateDayHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableDateDayHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, DateDayHolder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, DateDayHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, int value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, int value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static int get(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((DateDayVector) to); - } - - private class TransferImpl implements TransferPair { - DateDayVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new DateDayVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new DateDayVector(field, allocator); - } - - public TransferImpl(DateDayVector to) { - this.to = to; - } - - @Override - public DateDayVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, DateDayVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java deleted file mode 100644 index 70b955b8f8d2a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.LocalDateTime; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.DateMilliReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.DateMilliHolder; -import org.apache.arrow.vector.holders.NullableDateMilliHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DateUtility; -import org.apache.arrow.vector.util.TransferPair; - -/** - * DateMilliVector implements a fixed width vector (8 bytes) of date values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class DateMilliVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 8; - - /** - * Instantiate a DateMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public DateMilliVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.DATEMILLI.getType()), allocator); - } - - /** - * Instantiate a DateMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public DateMilliVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a DateMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public DateMilliVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new DateMilliReaderImpl(DateMilliVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.DATEMILLI; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public long get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableDateMilliHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public LocalDateTime getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - final long millis = valueBuffer.getLong((long) index * TYPE_WIDTH); - return DateUtility.getLocalDateTimeFromEpochMilli(millis); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, long value) { - valueBuffer.setLong((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, long value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableDateMilliHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, DateMilliHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableDateMilliHolder)} except that it handles the case when index - * is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableDateMilliHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, DateMilliHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, DateMilliHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, long value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, long)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, long value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static long get(final ArrowBuf buffer, final int index) { - return buffer.getLong((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((DateMilliVector) to); - } - - private class TransferImpl implements TransferPair { - DateMilliVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new DateMilliVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new DateMilliVector(field, allocator); - } - - public TransferImpl(DateMilliVector to) { - this.to = to; - } - - @Override - public DateMilliVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, DateMilliVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java deleted file mode 100644 index 42ad741c85f8b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java +++ /dev/null @@ -1,602 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.math.BigDecimal; -import java.nio.ByteOrder; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.vector.complex.impl.Decimal256ReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.Decimal256Holder; -import org.apache.arrow.vector.holders.NullableDecimal256Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DecimalUtility; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.validate.ValidateUtil; - -/** - * Decimal256Vector implements a fixed width vector (32 bytes) of decimal values which could be - * null. A validity buffer (bit vector) is maintained to track which elements in the vector are - * null. - */ -public final class Decimal256Vector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final int MAX_PRECISION = 76; - public static final byte TYPE_WIDTH = 32; - private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; - - private final int precision; - private final int scale; - - /** - * Instantiate a Decimal256Vector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public Decimal256Vector(String name, BufferAllocator allocator, int precision, int scale) { - this( - name, - FieldType.nullable(new ArrowType.Decimal(precision, scale, /*bitWidth=*/ TYPE_WIDTH * 8)), - allocator); - } - - /** - * Instantiate a Decimal256Vector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public Decimal256Vector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a Decimal256Vector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public Decimal256Vector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - ArrowType.Decimal arrowType = (ArrowType.Decimal) field.getFieldType().getType(); - this.precision = arrowType.getPrecision(); - this.scale = arrowType.getScale(); - } - - @Override - protected FieldReader getReaderImpl() { - return new Decimal256ReaderImpl(Decimal256Vector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.DECIMAL256; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public ArrowBuf get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableDecimal256Holder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.buffer = valueBuffer; - holder.precision = precision; - holder.scale = scale; - holder.start = ((long) index) * TYPE_WIDTH; - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public BigDecimal getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getObjectNotNull(index); - } - } - - /** - * Same as {@link #getObject(int)}, but does not check for null. - * - * @param index position of element - * @return element at given index - */ - public BigDecimal getObjectNotNull(int index) { - return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH); - } - - /** Return precision for the decimal value. */ - public int getPrecision() { - return precision; - } - - /** Return scale for the decimal value. */ - public int getScale() { - return scale; - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param buffer ArrowBuf containing decimal value. - */ - public void set(int index, ArrowBuf buffer) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, 0, TYPE_WIDTH); - } - - /** - * Set the decimal element at given index to the provided array of bytes. Decimal256 is now - * implemented as Native Endian. This API allows the user to pass a decimal value in the form of - * byte array in BE byte order. - * - *

    Consumers of Arrow code can use this API instead of first swapping the source bytes (doing a - * write and read) and then finally writing to ArrowBuf of decimal vector. - * - *

    This method takes care of adding the necessary padding if the length of byte array is less - * than 32 (length of decimal type). - * - * @param index position of element - * @param value array of bytes containing decimal in big endian byte order. - */ - public void setBigEndian(int index, byte[] value) { - BitVectorHelper.setBit(validityBuffer, index); - final int length = value.length; - - // do the bound check. - valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH); - - long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - if (length == 0) { - MemoryUtil.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH, (byte) 0); - return; - } - if (LITTLE_ENDIAN) { - // swap bytes to convert BE to LE - for (int byteIdx = 0; byteIdx < length; ++byteIdx) { - MemoryUtil.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]); - } - - if (length == TYPE_WIDTH) { - return; - } - - if (length < TYPE_WIDTH) { - // sign extend - final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); - return; - } - } else { - if (length <= TYPE_WIDTH) { - // copy data from value to outAddress - MemoryUtil.copyToMemory( - value, 0, outAddress + Decimal256Vector.TYPE_WIDTH - length, length); - // sign extend - final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad); - return; - } - } - throw new IllegalArgumentException( - "Invalid decimal value length. Valid length in [1 - 32], got " + length); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param start start index of data in the buffer - * @param buffer ArrowBuf containing decimal value. - */ - public void set(int index, long start, ArrowBuf buffer) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, start, TYPE_WIDTH); - } - - /** - * Sets the element at given index using the buffer whose size maybe <= 32 bytes. - * - * @param index index to write the decimal to - * @param start start of value in the buffer - * @param buffer contains the decimal in native endian bytes - * @param length length of the value in the buffer - */ - public void setSafe(int index, long start, ArrowBuf buffer, int length) { - handleSafe(index); - BitVectorHelper.setBit(validityBuffer, index); - - // do the bound checks. - buffer.checkBytes(start, start + length); - valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH); - - long inAddress = buffer.memoryAddress() + start; - long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - if (LITTLE_ENDIAN) { - MemoryUtil.copyMemory(inAddress, outAddress, length); - // sign extend - if (length < TYPE_WIDTH) { - byte msb = MemoryUtil.getByte(inAddress + length - 1); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); - } - } else { - MemoryUtil.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length); - // sign extend - if (length < TYPE_WIDTH) { - byte msb = MemoryUtil.getByte(inAddress); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad); - } - } - } - - /** - * Sets the element at given index using the buffer whose size maybe <= 32 bytes. - * - * @param index index to write the decimal to - * @param start start of value in the buffer - * @param buffer contains the decimal in big endian bytes - * @param length length of the value in the buffer - */ - public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length) { - handleSafe(index); - BitVectorHelper.setBit(validityBuffer, index); - - // do the bound checks. - buffer.checkBytes(start, start + length); - valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH); - - // not using buffer.getByte() to avoid boundary checks for every byte. - long inAddress = buffer.memoryAddress() + start; - long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - if (LITTLE_ENDIAN) { - // swap bytes to convert BE to LE - for (int byteIdx = 0; byteIdx < length; ++byteIdx) { - byte val = MemoryUtil.getByte((inAddress + length - 1) - byteIdx); - MemoryUtil.putByte(outAddress + byteIdx, val); - } - // sign extend - if (length < 32) { - byte msb = MemoryUtil.getByte(inAddress); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad); - } - } else { - MemoryUtil.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length); - // sign extend - if (length < TYPE_WIDTH) { - byte msb = MemoryUtil.getByte(inAddress); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad); - } - } - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value BigDecimal containing decimal value. - */ - public void set(int index, BigDecimal value) { - BitVectorHelper.setBit(validityBuffer, index); - DecimalUtility.checkPrecisionAndScale(value, precision, scale); - DecimalUtility.writeBigDecimalToArrowBuf(value, valueBuffer, index, TYPE_WIDTH); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value long value. - */ - public void set(int index, long value) { - BitVectorHelper.setBit(validityBuffer, index); - DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableDecimal256Holder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, Decimal256Holder holder) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH); - } - - /** - * Same as {@link #set(int, ArrowBuf)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param buffer ArrowBuf containing decimal value. - */ - public void setSafe(int index, ArrowBuf buffer) { - handleSafe(index); - set(index, buffer); - } - - /** - * Same as {@link #setBigEndian(int, byte[])} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - */ - public void setBigEndianSafe(int index, byte[] value) { - handleSafe(index); - setBigEndian(index, value); - } - - /** - * Same as {@link #set(int, int, ArrowBuf)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param start start index of data in the buffer - * @param buffer ArrowBuf containing decimal value. - */ - public void setSafe(int index, long start, ArrowBuf buffer) { - handleSafe(index); - set(index, start, buffer); - } - - /** - * Same as {@link #set(int, BigDecimal)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value BigDecimal containing decimal value. - */ - public void setSafe(int index, BigDecimal value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value long value. - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableDecimalHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableDecimal256Holder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, Decimal256Holder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, Decimal256Holder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param start start position of the value in the buffer - * @param buffer buffer containing the value to be stored in the vector - */ - public void set(int index, int isSet, long start, ArrowBuf buffer) { - if (isSet > 0) { - set(index, start, buffer); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #setSafe(int, int, int, ArrowBuf)} except that it handles the case when the - * position of new value is beyond the current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param start start position of the value in the buffer - * @param buffer buffer containing the value to be stored in the vector - */ - public void setSafe(int index, int isSet, long start, ArrowBuf buffer) { - handleSafe(index); - set(index, isSet, start, buffer); - } - - @Override - public void validateScalars() { - for (int i = 0; i < getValueCount(); ++i) { - BigDecimal value = getObject(i); - if (value != null) { - ValidateUtil.validateOrThrow( - DecimalUtility.checkPrecisionAndScaleNoThrow(value, getPrecision(), getScale()), - "Invalid value for Decimal256Vector at position " - + i - + ". Value does not fit in precision " - + getPrecision() - + " and scale " - + getScale() - + "."); - } - } - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((Decimal256Vector) to); - } - - private class TransferImpl implements TransferPair { - Decimal256Vector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = - new Decimal256Vector( - ref, allocator, Decimal256Vector.this.precision, Decimal256Vector.this.scale); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new Decimal256Vector(field, allocator); - } - - public TransferImpl(Decimal256Vector to) { - this.to = to; - } - - @Override - public Decimal256Vector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, Decimal256Vector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java deleted file mode 100644 index b4c55680b7305..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.math.BigDecimal; -import java.nio.ByteOrder; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.vector.complex.impl.DecimalReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.DecimalHolder; -import org.apache.arrow.vector.holders.NullableDecimalHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DecimalUtility; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.validate.ValidateUtil; - -/** - * DecimalVector implements a fixed width vector (16 bytes) of decimal values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class DecimalVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final int MAX_PRECISION = 38; - public static final byte TYPE_WIDTH = 16; - private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; - - private final int precision; - private final int scale; - - /** - * Instantiate a DecimalVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public DecimalVector(String name, BufferAllocator allocator, int precision, int scale) { - this( - name, - FieldType.nullable(new ArrowType.Decimal(precision, scale, TYPE_WIDTH * 8)), - allocator); - } - - /** - * Instantiate a DecimalVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public DecimalVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a DecimalVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public DecimalVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - ArrowType.Decimal arrowType = (ArrowType.Decimal) field.getFieldType().getType(); - this.precision = arrowType.getPrecision(); - this.scale = arrowType.getScale(); - } - - @Override - protected FieldReader getReaderImpl() { - return new DecimalReaderImpl(DecimalVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.DECIMAL; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public ArrowBuf get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableDecimalHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.buffer = valueBuffer; - holder.precision = precision; - holder.scale = scale; - holder.start = (long) index * TYPE_WIDTH; - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public BigDecimal getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getObjectNotNull(index); - } - } - - /** - * Same as {@link #getObject(int)} but does not check for null. - * - * @param index position of element - * @return element at given index - */ - public BigDecimal getObjectNotNull(int index) { - return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH); - } - - /** Return precision for the decimal value. */ - public int getPrecision() { - return precision; - } - - /** Return scale for the decimal value. */ - public int getScale() { - return scale; - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param buffer ArrowBuf containing decimal value. - */ - public void set(int index, ArrowBuf buffer) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, 0, TYPE_WIDTH); - } - - /** - * Set the decimal element at given index to the provided array of bytes. Decimal is now - * implemented as Native Endian. This API allows the user to pass a decimal value in the form of - * byte array in BE byte order. - * - *

    Consumers of Arrow code can use this API instead of first swapping the source bytes (doing a - * write and read) and then finally writing to ArrowBuf of decimal vector. - * - *

    This method takes care of adding the necessary padding if the length of byte array is less - * than 16 (length of decimal type). - * - * @param index position of element - * @param value array of bytes containing decimal in big endian byte order. - */ - public void setBigEndian(int index, byte[] value) { - BitVectorHelper.setBit(validityBuffer, index); - final int length = value.length; - - // do the bound check. - valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH); - - long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - if (length == 0) { - MemoryUtil.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte) 0); - return; - } - if (LITTLE_ENDIAN) { - // swap bytes to convert BE to LE - for (int byteIdx = 0; byteIdx < length; ++byteIdx) { - MemoryUtil.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]); - } - - if (length == TYPE_WIDTH) { - return; - } - - if (length < TYPE_WIDTH) { - // sign extend - final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad); - return; - } - } else { - if (length <= TYPE_WIDTH) { - // copy data from value to outAddress - MemoryUtil.copyToMemory(value, 0, outAddress + DecimalVector.TYPE_WIDTH - length, length); - // sign extend - final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad); - return; - } - } - throw new IllegalArgumentException( - "Invalid decimal value length. Valid length in [1 - 16], got " + length); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param start start index of data in the buffer - * @param buffer ArrowBuf containing decimal value. - */ - public void set(int index, long start, ArrowBuf buffer) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, start, TYPE_WIDTH); - } - - /** - * Sets the element at given index using the buffer whose size maybe <= 16 bytes. - * - * @param index index to write the decimal to - * @param start start of value in the buffer - * @param buffer contains the decimal in native endian bytes - * @param length length of the value in the buffer - */ - public void setSafe(int index, long start, ArrowBuf buffer, int length) { - handleSafe(index); - BitVectorHelper.setBit(validityBuffer, index); - - // do the bound checks. - buffer.checkBytes(start, start + length); - valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH); - - long inAddress = buffer.memoryAddress() + start; - long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - if (LITTLE_ENDIAN) { - MemoryUtil.copyMemory(inAddress, outAddress, length); - // sign extend - if (length < TYPE_WIDTH) { - byte msb = MemoryUtil.getByte(inAddress + length - 1); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad); - } - } else { - MemoryUtil.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length); - // sign extend - if (length < TYPE_WIDTH) { - byte msb = MemoryUtil.getByte(inAddress); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad); - } - } - } - - /** - * Sets the element at given index using the buffer whose size maybe <= 16 bytes. - * - * @param index index to write the decimal to - * @param start start of value in the buffer - * @param buffer contains the decimal in big endian bytes - * @param length length of the value in the buffer - */ - public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length) { - handleSafe(index); - BitVectorHelper.setBit(validityBuffer, index); - - // do the bound checks. - buffer.checkBytes(start, start + length); - valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH); - - // not using buffer.getByte() to avoid boundary checks for every byte. - long inAddress = buffer.memoryAddress() + start; - long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH; - if (LITTLE_ENDIAN) { - // swap bytes to convert BE to LE - for (int byteIdx = 0; byteIdx < length; ++byteIdx) { - byte val = MemoryUtil.getByte((inAddress + length - 1) - byteIdx); - MemoryUtil.putByte(outAddress + byteIdx, val); - } - // sign extend - if (length < TYPE_WIDTH) { - byte msb = MemoryUtil.getByte(inAddress); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad); - } - } else { - MemoryUtil.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length); - // sign extend - if (length < TYPE_WIDTH) { - byte msb = MemoryUtil.getByte(inAddress); - final byte pad = (byte) (msb < 0 ? 0xFF : 0x00); - MemoryUtil.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad); - } - } - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value BigDecimal containing decimal value. - */ - public void set(int index, BigDecimal value) { - BitVectorHelper.setBit(validityBuffer, index); - DecimalUtility.checkPrecisionAndScale(value, precision, scale); - DecimalUtility.writeBigDecimalToArrowBuf(value, valueBuffer, index, TYPE_WIDTH); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value long value. - */ - public void set(int index, long value) { - BitVectorHelper.setBit(validityBuffer, index); - DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableDecimalHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, DecimalHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH); - } - - /** - * Same as {@link #set(int, ArrowBuf)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param buffer ArrowBuf containing decimal value. - */ - public void setSafe(int index, ArrowBuf buffer) { - handleSafe(index); - set(index, buffer); - } - - /** - * Same as {@link #setBigEndian(int, byte[])} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - */ - public void setBigEndianSafe(int index, byte[] value) { - handleSafe(index); - setBigEndian(index, value); - } - - /** - * Same as {@link #set(int, long, ArrowBuf)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param start start index of data in the buffer - * @param buffer ArrowBuf containing decimal value. - */ - public void setSafe(int index, long start, ArrowBuf buffer) { - handleSafe(index); - set(index, start, buffer); - } - - /** - * Same as {@link #set(int, BigDecimal)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value BigDecimal containing decimal value. - */ - public void setSafe(int index, BigDecimal value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value long value. - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableDecimalHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableDecimalHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, DecimalHolder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, DecimalHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param start start position of the value in the buffer - * @param buffer buffer containing the value to be stored in the vector - */ - public void set(int index, int isSet, long start, ArrowBuf buffer) { - if (isSet > 0) { - set(index, start, buffer); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, long, ArrowBuf)} except that it handles the case when the - * position of new value is beyond the current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param start start position of the value in the buffer - * @param buffer buffer containing the value to be stored in the vector - */ - public void setSafe(int index, int isSet, long start, ArrowBuf buffer) { - handleSafe(index); - set(index, isSet, start, buffer); - } - - @Override - public void validateScalars() { - for (int i = 0; i < getValueCount(); ++i) { - BigDecimal value = getObject(i); - if (value != null) { - ValidateUtil.validateOrThrow( - DecimalUtility.checkPrecisionAndScaleNoThrow(value, getPrecision(), getScale()), - "Invalid value for DecimalVector at position " - + i - + ". Value does not fit in precision " - + getPrecision() - + " and scale " - + getScale() - + "."); - } - } - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((DecimalVector) to); - } - - private class TransferImpl implements TransferPair { - DecimalVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = - new DecimalVector(ref, allocator, DecimalVector.this.precision, DecimalVector.this.scale); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new DecimalVector(field, allocator); - } - - public TransferImpl(DecimalVector to) { - this.to = to; - } - - @Override - public DecimalVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, DecimalVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java deleted file mode 100644 index 59b84c47c0987..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -/** - * Vector that support density aware initial capacity settings. We use this for ListVector and - * VarCharVector as of now to control the memory allocated. - * - *

    For ListVector, we have been using a multiplier of 5 to compute the initial capacity of the - * inner data vector. For deeply nested lists and lists with lots of NULL values, this is - * over-allocation upfront. So density helps to be conservative when computing the value capacity of - * the inner vector. - * - *

    For example, a density value of 10 implies each position in the list vector has a list of 10 - * values. So we will provision an initial capacity of (valuecount * 10) for the inner vector. A - * density value of 0.1 implies out of 10 positions in the list vector, 1 position has a list of - * size 1 and remaining positions are null (no lists) or empty lists. This helps in tightly - * controlling the memory we provision for inner data vector. - * - *

    Similar analogy is applicable for VarCharVector where the capacity of the data buffer can be - * controlled using density multiplier instead of default multiplier of 8 (default size of average - * varchar length). - * - *

    Also from container vectors, we propagate the density down the inner vectors so that they can - * use it appropriately. - */ -public interface DensityAwareVector { - - /** - * Set value with density. - * - * @param valueCount the number of values in this vector - * @param density the density of the vector - */ - void setInitialCapacity(int valueCount, double density); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java deleted file mode 100644 index 817f95d9d5397..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java +++ /dev/null @@ -1,413 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static java.util.concurrent.TimeUnit.MICROSECONDS; -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.Duration; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.DurationReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.DurationHolder; -import org.apache.arrow.vector.holders.NullableDurationHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * DurationVector implements a fixed width vector (8 bytes) of a configurable TimeUnit granularity - * duration values which could be null. A validity buffer (bit vector) is maintained to track which - * elements in the vector are null. - */ -public final class DurationVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 8; - - private final TimeUnit unit; - - /** - * Instantiate a DurationVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public DurationVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a DurationVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public DurationVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - this.unit = ((ArrowType.Duration) field.getFieldType().getType()).getUnit(); - } - - @Override - protected FieldReader getReaderImpl() { - return new DurationReaderImpl(DurationVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.DURATION; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static long get(final ArrowBuf buffer, final int index) { - return buffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public ArrowBuf get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableDurationHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = get(valueBuffer, index); - holder.unit = this.unit; - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Duration getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getObjectNotNull(index); - } - } - - /** - * Same as {@link #getObject(int)} but does not check for null. - * - * @param index position of element - * @return element at given index - */ - public Duration getObjectNotNull(int index) { - final long value = get(valueBuffer, index); - return toDuration(value, unit); - } - - /** Converts the given value and unit to the appropriate {@link Duration}. */ - public static Duration toDuration(long value, TimeUnit unit) { - switch (unit) { - case SECOND: - return Duration.ofSeconds(value); - case MILLISECOND: - return Duration.ofMillis(value); - case NANOSECOND: - return Duration.ofNanos(value); - case MICROSECOND: - return Duration.ofNanos(MICROSECONDS.toNanos(value)); - default: - throw new IllegalArgumentException("Unknown timeunit: " + unit); - } - } - - /** - * Get the Interval value at a given index as a {@link StringBuilder} object. - * - * @param index position of the element - * @return String Builder object with Interval in java.time.Duration format. - */ - public StringBuilder getAsStringBuilder(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getAsStringBuilderHelper(index); - } - } - - private StringBuilder getAsStringBuilderHelper(int index) { - return new StringBuilder(getObject(index).toString()); - } - - /** Gets the time unit of the duration. */ - public TimeUnit getUnit() { - return unit; - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, ArrowBuf value) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value The duration value (in the timeunit associated with this vector) - */ - public void set(int index, long value) { - final long offsetIndex = (long) index * TYPE_WIDTH; - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setLong(offsetIndex, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableDurationHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (!this.unit.equals(holder.unit)) { - throw new IllegalArgumentException( - String.format("holder.unit: %s not equal to vector unit: %s", holder.unit, this.unit)); - } else if (holder.isSet > 0) { - set(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, DurationHolder holder) { - if (!this.unit.equals(holder.unit)) { - throw new IllegalArgumentException( - String.format("holder.unit: %s not equal to vector unit: %s", holder.unit, this.unit)); - } - set(index, holder.value); - } - - /** - * Same as {@link #set(int, ArrowBuf)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, ArrowBuf value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value duration in the time unit this vector was constructed with - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableDurationHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableDurationHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, DurationHolder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, DurationHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value The duration value (in the TimeUnit associated with this vector). - */ - public void set(int index, int isSet, long value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, long)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value The duration value (in the timeunit associated with this vector) - */ - public void setSafe(int index, int isSet, long value) { - handleSafe(index); - set(index, isSet, value); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((DurationVector) to); - } - - private class TransferImpl implements TransferPair { - DurationVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new DurationVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new DurationVector(field, allocator); - } - - public TransferImpl(DurationVector to) { - this.to = to; - } - - @Override - public DurationVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, DurationVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java deleted file mode 100644 index abfda6fe73a7d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.memory.util.ArrowBufPointer; - -/** - * Vector for which each data element resides in a continuous memory region, so it can be pointed to - * by an {@link org.apache.arrow.memory.util.ArrowBufPointer}. - */ -public interface ElementAddressableVector extends ValueVector { - - /** - * Gets the pointer for the data at the given index. - * - * @param index the index for the data. - * @return the pointer to the data. - */ - ArrowBufPointer getDataPointer(int index); - - /** - * Gets the pointer for the data at the given index. - * - * @param index the index for the data. - * @param reuse the data pointer to fill, this avoids creating a new pointer object. - * @return the pointer to the data, it should be the same one as the input parameter - */ - ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java deleted file mode 100644 index 3762fecd0bdcc..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.Iterator; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A vector that wraps an underlying vector, used to help implement extension types. - * - * @param The wrapped vector type. - */ -public abstract class ExtensionTypeVector - extends BaseValueVector implements FieldVector { - - private final T underlyingVector; - private final String name; - - /** - * Instantiate an extension type vector. - * - * @param name name of the vector - * @param allocator allocator for memory management - * @param underlyingVector underlying filed vector - */ - public ExtensionTypeVector(String name, BufferAllocator allocator, T underlyingVector) { - super(allocator); - Preconditions.checkNotNull(underlyingVector, "underlyingVector cannot be null."); - this.name = name; - this.underlyingVector = underlyingVector; - } - - /** - * Instantiate an extension type vector. - * - * @param field field materialized by this vector. - * @param allocator allocator for memory management - * @param underlyingVector underlying filed vector - */ - public ExtensionTypeVector(Field field, BufferAllocator allocator, T underlyingVector) { - this(field.getName(), allocator, underlyingVector); - } - - @Override - public String getName() { - return name; - } - - /** Get the underlying vector. */ - public T getUnderlyingVector() { - return underlyingVector; - } - - @Override - public void allocateNew() throws OutOfMemoryException { - this.underlyingVector.allocateNew(); - } - - @Override - public boolean allocateNewSafe() { - return this.underlyingVector.allocateNewSafe(); - } - - @Override - public void reAlloc() { - this.underlyingVector.reAlloc(); - } - - @Override - public void setInitialCapacity(int numRecords) { - this.underlyingVector.setInitialCapacity(numRecords); - } - - @Override - public int getValueCapacity() { - return this.underlyingVector.getValueCapacity(); - } - - @Override - public void reset() { - this.underlyingVector.reset(); - } - - @Override - public Field getField() { - return this.underlyingVector.getField(); - } - - @Override - public MinorType getMinorType() { - return MinorType.EXTENSIONTYPE; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return underlyingVector.getTransferPair(ref, allocator); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return underlyingVector.getTransferPair(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return underlyingVector.getTransferPair(field, allocator); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return underlyingVector.getTransferPair(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return underlyingVector.makeTransferPair(target); - } - - @Override - protected FieldReader getReaderImpl() { - return underlyingVector.getReader(); - } - - @Override - public int getBufferSize() { - return underlyingVector.getBufferSize(); - } - - @Override - public int getBufferSizeFor(int valueCount) { - return underlyingVector.getBufferSizeFor(valueCount); - } - - @Override - public ArrowBuf[] getBuffers(boolean clear) { - return underlyingVector.getBuffers(clear); - } - - @Override - public ArrowBuf getValidityBuffer() { - return underlyingVector.getValidityBuffer(); - } - - @Override - public ArrowBuf getDataBuffer() { - return underlyingVector.getDataBuffer(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - return underlyingVector.getOffsetBuffer(); - } - - @Override - public int getValueCount() { - return underlyingVector.getValueCount(); - } - - @Override - public void setValueCount(int valueCount) { - underlyingVector.setValueCount(valueCount); - } - - /** - * Get the extension object at the specified index. - * - *

    Generally, this should access the underlying vector and construct the corresponding Java - * object from the raw data. - */ - @Override - public abstract Object getObject(int index); - - @Override - public int getNullCount() { - return underlyingVector.getNullCount(); - } - - @Override - public boolean isNull(int index) { - return underlyingVector.isNull(index); - } - - @Override - public void setNull(int index) { - underlyingVector.setNull(index); - } - - @Override - public void initializeChildrenFromFields(List children) { - underlyingVector.initializeChildrenFromFields(children); - } - - @Override - public List getChildrenFromFields() { - return underlyingVector.getChildrenFromFields(); - } - - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - underlyingVector.loadFieldBuffers(fieldNode, ownBuffers); - } - - @Override - public List getFieldBuffers() { - return underlyingVector.getFieldBuffers(); - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - return underlyingVector.getFieldInnerVectors(); - } - - @Override - public long getValidityBufferAddress() { - return underlyingVector.getValidityBufferAddress(); - } - - @Override - public long getDataBufferAddress() { - return underlyingVector.getDataBufferAddress(); - } - - @Override - public long getOffsetBufferAddress() { - return underlyingVector.getOffsetBufferAddress(); - } - - @Override - public void clear() { - underlyingVector.clear(); - } - - @Override - public void close() { - underlyingVector.close(); - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return underlyingVector.getTransferPair(allocator); - } - - @Override - public Iterator iterator() { - return underlyingVector.iterator(); - } - - @Override - public BufferAllocator getAllocator() { - return underlyingVector.getAllocator(); - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java deleted file mode 100644 index 391ef778169f5..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.Field; - -/** - * A vector corresponding to a Field in the schema. It has inner vectors backed by buffers - * (validity, offsets, data, ...) - */ -public interface FieldVector extends ValueVector { - - /** - * Initializes the child vectors to be later loaded with loadBuffers. - * - * @param children the schema - */ - void initializeChildrenFromFields(List children); - - /** - * The returned list is the same size as the list passed to initializeChildrenFromFields. - * - * @return the children according to schema (empty for primitive types) - */ - List getChildrenFromFields(); - - /** - * Loads data in the vectors. (ownBuffers must be the same size as getFieldVectors()) - * - * @param fieldNode the fieldNode - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers); - - /** - * Get the buffers of the fields, (same size as getFieldVectors() since it is their content). - * - * @return the buffers containing the data for this vector (ready for reading) - */ - List getFieldBuffers(); - - /** - * Retrieves the export buffer count for the C Data Interface. - * - * @return the number of variadic buffers - */ - default int getExportedCDataBufferCount() { - return getFieldBuffers().size(); - } - - /** - * Export a given buffer and its memory address into a list of buffers and a pointer to the list - * of buffers. - * - * @param buffer the buffer to export - * @param buffers the list of buffers - * @param buffersPtr the pointer to the list of buffers - * @param nullValue the null value to use for null buffer - * @param retain whether to retain the buffer when exporting - */ - default void exportBuffer( - ArrowBuf buffer, - List buffers, - ArrowBuf buffersPtr, - long nullValue, - boolean retain) { - if (buffer != null) { - if (retain) { - buffer.getReferenceManager().retain(); - } - buffersPtr.writeLong(buffer.memoryAddress()); - } else { - buffersPtr.writeLong(nullValue); - } - buffers.add(buffer); - } - - /** - * Export the buffers of the fields for C Data Interface. This method traverse the buffers and - * export buffer and buffer's memory address into a list of buffers and a pointer to the list of - * buffers. - * - *

    By default, when exporting a buffer, it will increase ref count for exported buffer that - * counts the usage at imported side. - */ - default void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - List fieldBuffers = getFieldBuffers(); - - for (ArrowBuf arrowBuf : fieldBuffers) { - exportBuffer(arrowBuf, buffers, buffersPtr, nullValue, true); - } - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - List getFieldInnerVectors(); - - /** - * Gets the starting address of the underlying buffer associated with validity vector. - * - * @return buffer address - */ - long getValidityBufferAddress(); - - /** - * Gets the starting address of the underlying buffer associated with data vector. - * - * @return buffer address - */ - long getDataBufferAddress(); - - /** - * Gets the starting address of the underlying buffer associated with offset vector. - * - * @return buffer address - */ - long getOffsetBufferAddress(); - - /** - * Set the element at the given index to null. - * - * @param index the value to change - */ - void setNull(int index); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java deleted file mode 100644 index 4add7293586ca..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReusableBuffer; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.complex.impl.FixedSizeBinaryReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; -import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.validate.ValidateUtil; - -/** - * FixedSizeBinaryVector implements a fixed width vector of binary values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public class FixedSizeBinaryVector extends BaseFixedWidthVector - implements ValueIterableVector { - private final int byteWidth; - - /** - * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - * @param byteWidth byte width of the binary values - */ - public FixedSizeBinaryVector(String name, BufferAllocator allocator, int byteWidth) { - this(name, FieldType.nullable(new FixedSizeBinary(byteWidth)), allocator); - } - - /** - * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public FixedSizeBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public FixedSizeBinaryVector(Field field, BufferAllocator allocator) { - super(field, allocator, ((FixedSizeBinary) field.getFieldType().getType()).getByteWidth()); - byteWidth = ((FixedSizeBinary) field.getFieldType().getType()).getByteWidth(); - } - - @Override - protected FieldReader getReaderImpl() { - return new FixedSizeBinaryReaderImpl(FixedSizeBinaryVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.FIXEDSIZEBINARY; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public byte[] get(int index) { - assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - final byte[] dst = new byte[byteWidth]; - valueBuffer.getBytes((long) index * byteWidth, dst, 0, byteWidth); - return dst; - } - - /** - * Read the value at the given position to the given output buffer. The caller is responsible for - * checking for nullity first. - * - * @param index position of element. - * @param buffer the buffer to write into. - */ - public void read(int index, ReusableBuffer buffer) { - final int startOffset = index * byteWidth; - buffer.set(valueBuffer, startOffset, byteWidth); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - * @param holder nullable holder to carry the buffer - */ - public void get(int index, NullableFixedSizeBinaryHolder holder) { - assert index >= 0; - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.buffer = valueBuffer.slice((long) index * byteWidth, byteWidth); - holder.byteWidth = byteWidth; - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public byte[] getObject(int index) { - return get(index); - } - - public int getByteWidth() { - return byteWidth; - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** Sets the value at index to the provided one. */ - public void set(int index, byte[] value) { - assert index >= 0; - Preconditions.checkNotNull(value, "expecting a valid byte array"); - assert byteWidth <= value.length; - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * byteWidth, value, 0, byteWidth); - } - - /** - * Same as {@link #set(int, byte[])} but reallocates if index is larger than - * capacity. - */ - public void setSafe(int index, byte[] value) { - handleSafe(index); - set(index, value); - } - - /** Sets the value if isSet is positive, otherwise sets the index to null/invalid. */ - public void set(int index, int isSet, byte[] value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - public void setSafe(int index, int isSet, byte[] value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param buffer ArrowBuf containing binary value. - */ - public void set(int index, ArrowBuf buffer) { - assert index >= 0; - assert byteWidth <= buffer.capacity(); - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * byteWidth, buffer, 0, byteWidth); - } - - /** - * Same as {@link #set(int, ArrowBuf)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param buffer ArrowBuf containing binary value. - */ - public void setSafe(int index, ArrowBuf buffer) { - handleSafe(index); - set(index, buffer); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param buffer ArrowBuf containing binary value. - */ - public void set(int index, int isSet, ArrowBuf buffer) { - if (isSet > 0) { - set(index, buffer); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, ArrowBuf)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param buffer ArrowBuf containing binary value. - */ - public void setSafe(int index, int isSet, ArrowBuf buffer) { - handleSafe(index); - set(index, isSet, buffer); - } - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, FixedSizeBinaryHolder holder) { - if (this.byteWidth != holder.byteWidth) { - throw new IllegalArgumentException( - String.format( - "holder.byteWidth: %d not equal to vector byteWidth: %d", - holder.byteWidth, this.byteWidth)); - } - set(index, holder.buffer); - } - - /** - * Same as {@link #set(int, FixedSizeBinaryHolder)} except that it handles the case where index - * and length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, FixedSizeBinaryHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, NullableFixedSizeBinaryHolder holder) { - if (holder.isSet < 0) { - throw new IllegalArgumentException("holder has a negative isSet value"); - } else if (this.byteWidth != holder.byteWidth) { - throw new IllegalArgumentException( - String.format( - "holder.byteWidth: %d not equal to vector byteWidth: %d", - holder.byteWidth, this.byteWidth)); - } else if (holder.isSet > 0) { - set(index, holder.buffer); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, NullableFixedSizeBinaryHolder)} except that it handles the case where - * index and length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, NullableFixedSizeBinaryHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static byte[] get(final ArrowBuf buffer, final int index, final int byteWidth) { - final byte[] dst = new byte[byteWidth]; - buffer.getBytes((long) index * byteWidth, dst, 0, byteWidth); - return dst; - } - - @Override - public void validateScalars() { - for (int i = 0; i < getValueCount(); ++i) { - byte[] value = get(i); - if (value != null) { - ValidateUtil.validateOrThrow( - value.length == byteWidth, - "Invalid value for FixedSizeBinaryVector at position " - + i - + ". The length was " - + value.length - + " but the length of each element should be " - + byteWidth - + "."); - } - } - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((FixedSizeBinaryVector) to); - } - - private class TransferImpl implements TransferPair { - FixedSizeBinaryVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new FixedSizeBinaryVector(ref, allocator, FixedSizeBinaryVector.this.byteWidth); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new FixedSizeBinaryVector(field, allocator); - } - - public TransferImpl(FixedSizeBinaryVector to) { - this.to = to; - } - - @Override - public FixedSizeBinaryVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, FixedSizeBinaryVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java deleted file mode 100644 index e22a973f3b823..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -/** - * Interface for all fixed width {@link ElementAddressableVector} (e.g. integer, fixed size binary, - * etc). - */ -public interface FixedWidthVector extends ElementAddressableVector { - - /** - * Allocate a new memory space for this vector. Must be called prior to using the ValueVector. - * - * @param valueCount Number of values in the vector. - */ - void allocateNew(int valueCount); - - /** Zero out the underlying buffer backing this vector. */ - void zeroVector(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java deleted file mode 100644 index 021e035d57333..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.Float16; -import org.apache.arrow.vector.complex.impl.Float2ReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.Float2Holder; -import org.apache.arrow.vector.holders.NullableFloat2Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Float2Vector implements a fixed width (2 bytes) vector of short values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class Float2Vector extends BaseFixedWidthVector - implements FloatingPointVector, ValueIterableVector { - public static final byte TYPE_WIDTH = 2; - - /** - * Instantiate a Float2Vector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public Float2Vector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator); - } - - /** - * Instantiate a Float2Vector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public Float2Vector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a Float2Vector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public Float2Vector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new Float2ReaderImpl(Float2Vector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.FLOAT2; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public short get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getShort((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableFloat2Holder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Short getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getShort((long) index * TYPE_WIDTH); - } - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - static short get(final ArrowBuf buffer, final int index) { - return buffer.getShort((long) index * TYPE_WIDTH); - } - - @Override - public double getValueAsDouble(int index) { - return getValueAsFloat(index); - } - - public float getValueAsFloat(int index) { - return Float16.toFloat(this.get(index)); - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, short value) { - valueBuffer.setShort((long) index * TYPE_WIDTH, value); - } - - private void setValue(int index, float value) { - valueBuffer.setShort((long) index * TYPE_WIDTH, Float16.toFloat16(value)); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, short value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void setWithPossibleTruncate(int index, float value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableFloat2Holder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, Float2Holder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, short)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, short value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #setWithPossibleTruncate(int, float)} except that it handles the case when index - * is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafeWithPossibleTruncate(int index, float value) { - handleSafe(index); - setWithPossibleTruncate(index, value); - } - - /** - * Same as {@link #set(int, NullableFloat2Holder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableFloat2Holder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, Float2Holder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, Float2Holder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, short value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setWithPossibleTruncate(int index, int isSet, float value) { - if (isSet > 0) { - setWithPossibleTruncate(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, short)} except that it handles the case when index is greater - * than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, short value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Same as {@link #set(int, int, short)} except that it handles the case when index is greater - * than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafeWithPossibleTruncate(int index, int isSet, float value) { - handleSafe(index); - setWithPossibleTruncate(index, isSet, value); - } - - @Override - public void setWithPossibleTruncate(int index, double value) { - throw new UnsupportedOperationException( - "The operation for double data types is not supported."); - } - - @Override - public void setSafeWithPossibleTruncate(int index, double value) { - throw new UnsupportedOperationException( - "The operation for double data types is not supported."); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((Float2Vector) to); - } - - private class TransferImpl implements TransferPair { - Float2Vector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new Float2Vector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new Float2Vector(field, allocator); - } - - public TransferImpl(Float2Vector to) { - this.to = to; - } - - @Override - public Float2Vector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, Float2Vector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java deleted file mode 100644 index 2b81149204be0..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.Float4ReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.Float4Holder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Float4Vector implements a fixed width vector (4 bytes) of float values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class Float4Vector extends BaseFixedWidthVector - implements FloatingPointVector, ValueIterableVector { - public static final byte TYPE_WIDTH = 4; - - /** - * Instantiate a Float4Vector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public Float4Vector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.FLOAT4.getType()), allocator); - } - - /** - * Instantiate a Float4Vector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public Float4Vector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a Float4Vector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public Float4Vector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new Float4ReaderImpl(Float4Vector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.FLOAT4; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public float get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getFloat((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableFloat4Holder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getFloat((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Float getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getFloat((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, float value) { - valueBuffer.setFloat((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, float value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableFloat4Holder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, Float4Holder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, float)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, float value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableFloat4Holder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableFloat4Holder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, Float4Holder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, Float4Holder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, float value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, float)} except that it handles the case when index is greater - * than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, float value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static float get(final ArrowBuf buffer, final int index) { - return buffer.getFloat((long) index * TYPE_WIDTH); - } - - @Override - public void setWithPossibleTruncate(int index, double value) { - set(index, (float) value); - } - - @Override - public void setSafeWithPossibleTruncate(int index, double value) { - setSafe(index, (float) value); - } - - @Override - public double getValueAsDouble(int index) { - return get(index); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((Float4Vector) to); - } - - private class TransferImpl implements TransferPair { - Float4Vector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new Float4Vector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new Float4Vector(field, allocator); - } - - public TransferImpl(Float4Vector to) { - this.to = to; - } - - @Override - public Float4Vector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, Float4Vector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java deleted file mode 100644 index 6758ffde5ce7d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.Float8ReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.Float8Holder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Float8Vector implements a fixed width vector (8 bytes) of double values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class Float8Vector extends BaseFixedWidthVector - implements FloatingPointVector, ValueIterableVector { - public static final byte TYPE_WIDTH = 8; - - /** - * Instantiate a Float8Vector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public Float8Vector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.FLOAT8.getType()), allocator); - } - - /** - * Instantiate a Float8Vector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public Float8Vector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a Float8Vector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public Float8Vector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new Float8ReaderImpl(Float8Vector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.FLOAT8; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public double get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getDouble((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableFloat8Holder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getDouble((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Double getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getDouble((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, double value) { - valueBuffer.setDouble((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, double value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableFloat8Holder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, Float8Holder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, double)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, double value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableFloat8Holder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableFloat8Holder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, Float8Holder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, Float8Holder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, double value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, double)} except that it handles the case when index is greater - * than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, double value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static double get(final ArrowBuf buffer, final int index) { - return buffer.getDouble((long) index * TYPE_WIDTH); - } - - @Override - public void setWithPossibleTruncate(int index, double value) { - set(index, value); - } - - @Override - public void setSafeWithPossibleTruncate(int index, double value) { - setSafe(index, value); - } - - @Override - public double getValueAsDouble(int index) { - return get(index); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((Float8Vector) to); - } - - private class TransferImpl implements TransferPair { - Float8Vector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new Float8Vector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new Float8Vector(field, allocator); - } - - public TransferImpl(Float8Vector to) { - this.to = to; - } - - @Override - public Float8Vector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, Float8Vector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java deleted file mode 100644 index 343c74e9748de..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -/** The interface for vectors with floating point values. */ -public interface FloatingPointVector extends ValueVector { - - /** - * Sets the value at the given index, note this value may be truncated internally. - * - * @param index the index to set. - * @param value the value to set. - */ - void setWithPossibleTruncate(int index, double value); - - /** - * Sets the value at the given index, note this value may be truncated internally. Any - * expansion/reallocation is handled automatically. - * - * @param index the index to set. - * @param value the value to set. - */ - void setSafeWithPossibleTruncate(int index, double value); - - /** - * Gets the value at the given index. - * - * @param index the index to retrieve the value. - * @return the value at the index. - */ - double getValueAsDouble(int index); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java deleted file mode 100644 index e8250f9f57f2a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.math.BigDecimal; -import java.nio.charset.Charset; - -/** - * Helper class to generate test data for Nullable fixed and variable width scalar vectors. Previous - * implementations of java vector classes provided generateTestData(now deprecated) API to populate - * the vector with sample data. This class should be used for that purpose. - */ -public class GenerateSampleData { - private GenerateSampleData() {} - - /** Populates vector with valueCount random values. */ - public static void generateTestData(final ValueVector vector, final int valueCount) { - if (vector instanceof IntVector) { - writeIntData((IntVector) vector, valueCount); - } else if (vector instanceof DecimalVector) { - writeDecimalData((DecimalVector) vector, valueCount); - } else if (vector instanceof BitVector) { - writeBooleanData((BitVector) vector, valueCount); - } else if (vector instanceof VarCharVector) { - writeVarCharData((VarCharVector) vector, valueCount); - } else if (vector instanceof VarBinaryVector) { - writeVarBinaryData((VarBinaryVector) vector, valueCount); - } else if (vector instanceof BigIntVector) { - writeBigIntData((BigIntVector) vector, valueCount); - } else if (vector instanceof Float4Vector) { - writeFloatData((Float4Vector) vector, valueCount); - } else if (vector instanceof Float8Vector) { - writeDoubleData((Float8Vector) vector, valueCount); - } else if (vector instanceof DateDayVector) { - writeDateDayData((DateDayVector) vector, valueCount); - } else if (vector instanceof DateMilliVector) { - writeDateMilliData((DateMilliVector) vector, valueCount); - } else if (vector instanceof IntervalDayVector) { - writeIntervalDayData((IntervalDayVector) vector, valueCount); - } else if (vector instanceof IntervalYearVector) { - writeIntervalYearData((IntervalYearVector) vector, valueCount); - } else if (vector instanceof SmallIntVector) { - writeSmallIntData((SmallIntVector) vector, valueCount); - } else if (vector instanceof TinyIntVector) { - writeTinyIntData((TinyIntVector) vector, valueCount); - } else if (vector instanceof TimeMicroVector) { - writeTimeMicroData((TimeMicroVector) vector, valueCount); - } else if (vector instanceof TimeMilliVector) { - writeTimeMilliData((TimeMilliVector) vector, valueCount); - } else if (vector instanceof TimeNanoVector) { - writeTimeNanoData((TimeNanoVector) vector, valueCount); - } else if (vector instanceof TimeSecVector) { - writeTimeSecData((TimeSecVector) vector, valueCount); - } else if (vector instanceof TimeStampSecVector) { - writeTimeStampData((TimeStampSecVector) vector, valueCount); - } else if (vector instanceof TimeStampMicroVector) { - writeTimeStampData((TimeStampMicroVector) vector, valueCount); - } else if (vector instanceof TimeStampMilliVector) { - writeTimeStampData((TimeStampMilliVector) vector, valueCount); - } else if (vector instanceof TimeStampNanoVector) { - writeTimeStampData((TimeStampNanoVector) vector, valueCount); - } else if (vector instanceof TimeStampSecTZVector) { - writeTimeStampData((TimeStampSecTZVector) vector, valueCount); - } else if (vector instanceof TimeStampMicroTZVector) { - writeTimeStampData((TimeStampMicroTZVector) vector, valueCount); - } else if (vector instanceof TimeStampMilliTZVector) { - writeTimeStampData((TimeStampMilliTZVector) vector, valueCount); - } else if (vector instanceof TimeStampNanoTZVector) { - writeTimeStampData((TimeStampNanoTZVector) vector, valueCount); - } else if (vector instanceof UInt1Vector) { - writeUInt1Data((UInt1Vector) vector, valueCount); - } else if (vector instanceof UInt2Vector) { - writeUInt2Data((UInt2Vector) vector, valueCount); - } else if (vector instanceof UInt4Vector) { - writeUInt4Data((UInt4Vector) vector, valueCount); - } else if (vector instanceof UInt8Vector) { - writeUInt8Data((UInt8Vector) vector, valueCount); - } - } - - private static void writeTimeStampData(TimeStampVector vector, int valueCount) { - final long even = 100000; - final long odd = 200000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeDecimalData(DecimalVector vector, int valueCount) { - final BigDecimal even = new BigDecimal("0.0543278923"); - final BigDecimal odd = new BigDecimal("2.0543278923"); - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeIntData(IntVector vector, int valueCount) { - final int even = 1000; - final int odd = 2000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeBooleanData(BitVector vector, int valueCount) { - final int even = 0; - final int odd = 1; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeIntervalYearData(IntervalYearVector vector, int valueCount) { - final int even = 1; - final int odd = 2; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeIntervalDayData(IntervalDayVector vector, int valueCount) { - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, 1, 50); - } else { - vector.setSafe(i, 2, 100); - } - } - vector.setValueCount(valueCount); - } - - private static void writeTimeSecData(TimeSecVector vector, int valueCount) { - final int even = 500; - final int odd = 900; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeTimeMilliData(TimeMilliVector vector, int valueCount) { - final int even = 1000; - final int odd = 2000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeTimeMicroData(TimeMicroVector vector, int valueCount) { - final long even = 1000000000; - final long odd = 2000000000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeTimeNanoData(TimeNanoVector vector, int valueCount) { - final long even = 1000000000; - final long odd = 2000000000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeDateDayData(DateDayVector vector, int valueCount) { - final int even = 1000; - final int odd = 2000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeDateMilliData(DateMilliVector vector, int valueCount) { - final long even = 1000000000; - final long odd = 2000000000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeSmallIntData(SmallIntVector vector, int valueCount) { - final short even = 10; - final short odd = 20; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeTinyIntData(TinyIntVector vector, int valueCount) { - final byte even = 1; - final byte odd = 2; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeUInt1Data(UInt1Vector vector, int valueCount) { - final byte even = 1; - final byte odd = 2; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeUInt2Data(UInt2Vector vector, int valueCount) { - final short even = 10; - final short odd = 20; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeUInt4Data(UInt4Vector vector, int valueCount) { - final int even = 1000; - final int odd = 2000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeUInt8Data(UInt8Vector vector, int valueCount) { - final long even = 1000000000; - final long odd = 2000000000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeBigIntData(BigIntVector vector, int valueCount) { - final long even = 1000000000; - final long odd = 2000000000; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeFloatData(Float4Vector vector, int valueCount) { - final float even = 20.3f; - final float odd = 40.2f; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeDoubleData(Float8Vector vector, int valueCount) { - final double even = 20.2373; - final double odd = 40.2378; - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeVarBinaryData(VarBinaryVector vector, int valueCount) { - Charset utf8Charset = Charset.forName("UTF-8"); - final byte[] even = "AAAAA1".getBytes(utf8Charset); - final byte[] odd = "BBBBBBBBB2".getBytes(utf8Charset); - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } - - private static void writeVarCharData(VarCharVector vector, int valueCount) { - Charset utf8Charset = Charset.forName("UTF-8"); - final byte[] even = "AAAAA1".getBytes(utf8Charset); - final byte[] odd = "BBBBBBBBB2".getBytes(utf8Charset); - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java deleted file mode 100644 index 6058539a2d731..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.IntReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.IntHolder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * IntVector implements a fixed width (4 bytes) vector of integer values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class IntVector extends BaseFixedWidthVector - implements BaseIntVector, ValueIterableVector { - public static final byte TYPE_WIDTH = 4; - - /** - * Instantiate a IntVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public IntVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.INT.getType()), allocator); - } - - /** - * Instantiate a IntVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public IntVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a IntVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public IntVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new IntReaderImpl(IntVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.INT; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public int get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableIntHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Integer getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setInt((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableIntHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, IntHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableIntHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableIntHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, IntHolder)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, IntHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, int value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, int value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static int get(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((IntVector) to); - } - - @Override - public void setWithPossibleTruncate(int index, long value) { - this.setSafe(index, (int) value); - } - - @Override - public void setUnsafeWithPossibleTruncate(int index, long value) { - this.set(index, (int) value); - } - - @Override - public long getValueAsLong(int index) { - return this.get(index); - } - - private class TransferImpl implements TransferPair { - IntVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new IntVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new IntVector(field, allocator); - } - - public TransferImpl(IntVector to) { - this.to = to; - } - - @Override - public IntVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, IntVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java deleted file mode 100644 index a0ee849707b90..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java +++ /dev/null @@ -1,437 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.Duration; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.IntervalDayReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.IntervalDayHolder; -import org.apache.arrow.vector.holders.NullableIntervalDayHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * IntervalDayVector implements a fixed width vector (8 bytes) of interval (days and milliseconds) - * values which could be null. A validity buffer (bit vector) is maintained to track which elements - * in the vector are null. - */ -public final class IntervalDayVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 8; - private static final byte MILLISECOND_OFFSET = 4; - - /** - * Instantiate a IntervalDayVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public IntervalDayVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.INTERVALDAY.getType()), allocator); - } - - /** - * Instantiate a IntervalDayVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public IntervalDayVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a IntervalDayVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public IntervalDayVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new IntervalDayReaderImpl(IntervalDayVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.INTERVALDAY; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Given a data buffer, get the number of days stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return day value stored at the index. - */ - public static int getDays(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Given a data buffer, get the get the number of milliseconds stored at a particular position in - * the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return milliseconds value stored at the index. - */ - public static int getMilliseconds(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH + MILLISECOND_OFFSET); - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public ArrowBuf get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableIntervalDayHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - final long startIndex = (long) index * TYPE_WIDTH; - holder.isSet = 1; - holder.days = valueBuffer.getInt(startIndex); - holder.milliseconds = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Duration getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getObjectNotNull(index); - } - } - - /** - * Same as {@link #getObject(int)} but does not check for null. - * - * @param index position of element - * @return element at given index - */ - public Duration getObjectNotNull(int index) { - final long startIndex = (long) index * TYPE_WIDTH; - final int days = valueBuffer.getInt(startIndex); - final int milliseconds = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET); - return Duration.ofDays(days).plusMillis(milliseconds); - } - - /** - * Get the Interval value at a given index as a {@link StringBuilder} object. - * - * @param index position of the element - * @return String Builder object with Interval value as [days, hours, minutes, seconds, millis] - */ - public StringBuilder getAsStringBuilder(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getAsStringBuilderHelper(index); - } - } - - private StringBuilder getAsStringBuilderHelper(int index) { - final long startIndex = (long) index * TYPE_WIDTH; - - final int days = valueBuffer.getInt(startIndex); - int millis = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET); - - final int hours = millis / org.apache.arrow.vector.util.DateUtility.hoursToMillis; - millis = millis % org.apache.arrow.vector.util.DateUtility.hoursToMillis; - - final int minutes = millis / org.apache.arrow.vector.util.DateUtility.minutesToMillis; - millis = millis % org.apache.arrow.vector.util.DateUtility.minutesToMillis; - - final int seconds = millis / org.apache.arrow.vector.util.DateUtility.secondsToMillis; - millis = millis % org.apache.arrow.vector.util.DateUtility.secondsToMillis; - - final String dayString = (Math.abs(days) == 1) ? " day " : " days "; - - return new StringBuilder() - .append(days) - .append(dayString) - .append(hours) - .append(":") - .append(minutes) - .append(":") - .append(seconds) - .append(".") - .append(millis); - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, ArrowBuf value) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param days days for the interval - * @param milliseconds milliseconds for the interval - */ - public void set(int index, int days, int milliseconds) { - final long offsetIndex = (long) index * TYPE_WIDTH; - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setInt(offsetIndex, days); - valueBuffer.setInt((offsetIndex + MILLISECOND_OFFSET), milliseconds); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableIntervalDayHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - set(index, holder.days, holder.milliseconds); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, IntervalDayHolder holder) { - set(index, holder.days, holder.milliseconds); - } - - /** - * Same as {@link #set(int, ArrowBuf)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, ArrowBuf value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, int, int)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param days days for the interval - * @param milliseconds milliseconds for the interval - */ - public void setSafe(int index, int days, int milliseconds) { - handleSafe(index); - set(index, days, milliseconds); - } - - /** - * Same as {@link #set(int, NullableIntervalDayHolder)} except that it handles the case when index - * is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableIntervalDayHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, IntervalDayHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, IntervalDayHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param days days component of interval - * @param milliseconds millisecond component of interval - */ - public void set(int index, int isSet, int days, int milliseconds) { - if (isSet > 0) { - set(index, days, milliseconds); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int, int)} except that it handles the case when index is greater - * than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param days days component of interval - * @param milliseconds millisecond component of interval - */ - public void setSafe(int index, int isSet, int days, int milliseconds) { - handleSafe(index); - set(index, isSet, days, milliseconds); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((IntervalDayVector) to); - } - - private class TransferImpl implements TransferPair { - IntervalDayVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new IntervalDayVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new IntervalDayVector(field, allocator); - } - - public TransferImpl(IntervalDayVector to) { - this.to = to; - } - - @Override - public IntervalDayVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, IntervalDayVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java deleted file mode 100644 index eece3b34b85ff..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.Duration; -import java.time.Period; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.IntervalMonthDayNanoReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.IntervalMonthDayNanoHolder; -import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * IntervalMonthDayNanoVector implements a fixed width vector (16 bytes) of interval (month, days - * and nanoseconds) values which could be null. A validity buffer (bit vector) is maintained to - * track which elements in the vector are null. - * - *

    Month, day and nanoseconds are independent from one another and there is no specific limits - * imposed on their values. - */ -public final class IntervalMonthDayNanoVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 16; - private static final byte DAY_OFFSET = 4; - private static final byte NANOSECOND_OFFSET = 8; - - /** - * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for the data in - * vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public IntervalMonthDayNanoVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.INTERVALMONTHDAYNANO.getType()), allocator); - } - - /** - * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for the data in - * vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public IntervalMonthDayNanoVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for the data in - * vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public IntervalMonthDayNanoVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new IntervalMonthDayNanoReaderImpl(IntervalMonthDayNanoVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.INTERVALMONTHDAYNANO; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Given a data buffer, get the number of months stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return day value stored at the index. - */ - public static int getMonths(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Given a data buffer, get the number of days stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return day value stored at the index. - */ - public static int getDays(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH + DAY_OFFSET); - } - - /** - * Given a data buffer, get the get the number of nanoseconds stored at a particular position in - * the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return nanoseconds value stored at the index. - */ - public static long getNanoseconds(final ArrowBuf buffer, final int index) { - return buffer.getLong((long) index * TYPE_WIDTH + NANOSECOND_OFFSET); - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public ArrowBuf get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableIntervalMonthDayNanoHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - final long startIndex = (long) index * TYPE_WIDTH; - holder.isSet = 1; - holder.months = valueBuffer.getInt(startIndex); - holder.days = valueBuffer.getInt(startIndex + DAY_OFFSET); - holder.nanoseconds = valueBuffer.getLong(startIndex + NANOSECOND_OFFSET); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public PeriodDuration getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - final long startIndex = (long) index * TYPE_WIDTH; - final int months = valueBuffer.getInt(startIndex); - final int days = valueBuffer.getInt(startIndex + DAY_OFFSET); - final long nanoseconds = valueBuffer.getLong(startIndex + NANOSECOND_OFFSET); - - return new PeriodDuration( - Period.ofMonths(months).plusDays(days), Duration.ofNanos(nanoseconds)); - } - } - - /** - * Get the Interval value at a given index as a {@link StringBuilder} object. - * - * @param index position of the element - * @return String Builder object with Interval value as - */ - public StringBuilder getAsStringBuilder(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getAsStringBuilderHelper(index); - } - } - - private StringBuilder getAsStringBuilderHelper(int index) { - return new StringBuilder().append(getObject(index).toString()).append(" "); - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, ArrowBuf value) { - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param months months component of interval - * @param days days component of interval - * @param nanoseconds nanosecond component of interval - */ - public void set(int index, int months, int days, long nanoseconds) { - final long offsetIndex = (long) index * TYPE_WIDTH; - BitVectorHelper.setBit(validityBuffer, index); - valueBuffer.setInt(offsetIndex, months); - valueBuffer.setInt(offsetIndex + DAY_OFFSET, days); - valueBuffer.setLong((offsetIndex + NANOSECOND_OFFSET), nanoseconds); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableIntervalMonthDayNanoHolder holder) - throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - set(index, holder.months, holder.days, holder.nanoseconds); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, IntervalMonthDayNanoHolder holder) { - set(index, holder.months, holder.days, holder.nanoseconds); - } - - /** - * Same as {@link #set(int, ArrowBuf)} except that it handles the case when index is greater than - * or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, ArrowBuf value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, int, int, long)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param months months for the interval - * @param days days for the interval - * @param nanoseconds nanoseconds for the interval - */ - public void setSafe(int index, int months, int days, long nanoseconds) { - handleSafe(index); - set(index, months, days, nanoseconds); - } - - /** - * Same as {@link #set(int, NullableIntervalMonthDayNanoHolder)} except that it handles the case - * when index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableIntervalMonthDayNanoHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, IntervalMonthDayNanoHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, IntervalMonthDayNanoHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param months months component of interval - * @param days days component of interval - * @param nanoseconds nanosecond component of interval - */ - public void set(int index, int isSet, int months, int days, long nanoseconds) { - if (isSet > 0) { - set(index, months, days, nanoseconds); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int, int, long)} except that it handles the case when index is - * greater than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param months months component of interval - * @param days days component of interval - * @param nanoseconds nanosecond component of interval - */ - public void setSafe(int index, int isSet, int months, int days, long nanoseconds) { - handleSafe(index); - set(index, isSet, months, days, nanoseconds); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((IntervalMonthDayNanoVector) to); - } - - private class TransferImpl implements TransferPair { - IntervalMonthDayNanoVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new IntervalMonthDayNanoVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new IntervalMonthDayNanoVector(field, allocator); - } - - public TransferImpl(IntervalMonthDayNanoVector to) { - this.to = to; - } - - @Override - public IntervalMonthDayNanoVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, IntervalMonthDayNanoVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java deleted file mode 100644 index 92b46593a3ab6..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.Period; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.IntervalYearReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.IntervalYearHolder; -import org.apache.arrow.vector.holders.NullableIntervalYearHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * IntervalYearVector implements a fixed width (4 bytes) vector of interval (years and months) - * values which could be null. A validity buffer (bit vector) is maintained to track which elements - * in the vector are null. - */ -public final class IntervalYearVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 4; - - /** - * Instantiate a IntervalYearVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public IntervalYearVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.INTERVALYEAR.getType()), allocator); - } - - /** - * Instantiate a IntervalYearVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public IntervalYearVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a IntervalYearVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public IntervalYearVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new IntervalYearReaderImpl(IntervalYearVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.INTERVALYEAR; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static int getTotalMonths(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public int get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableIntervalYearHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Period getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - final int interval = valueBuffer.getInt((long) index * TYPE_WIDTH); - // TODO: verify interval is in months - return Period.ofMonths(interval); - } - } - - /** - * Get the Interval value at a given index as a {@link StringBuilder} object. - * - * @param index position of the element - * @return String Builder object with Interval value as [years, months] - */ - public StringBuilder getAsStringBuilder(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getAsStringBuilderHelper(index); - } - } - - private StringBuilder getAsStringBuilderHelper(int index) { - int value = valueBuffer.getInt((long) index * TYPE_WIDTH); - - final int years = (value / org.apache.arrow.vector.util.DateUtility.yearsToMonths); - final int months = (value % org.apache.arrow.vector.util.DateUtility.yearsToMonths); - - final String yearString = (Math.abs(years) == 1) ? " year " : " years "; - final String monthString = (Math.abs(months) == 1) ? " month " : " months "; - - return new StringBuilder().append(years).append(yearString).append(months).append(monthString); - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setInt((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableIntervalYearHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, IntervalYearHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableIntervalYearHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableIntervalYearHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, IntervalYearHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, IntervalYearHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, int value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, int value) { - handleSafe(index); - set(index, isSet, value); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((IntervalYearVector) to); - } - - private class TransferImpl implements TransferPair { - IntervalYearVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new IntervalYearVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new IntervalYearVector(field, allocator); - } - - public TransferImpl(IntervalYearVector to) { - this.to = to; - } - - @Override - public IntervalYearVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, IntervalYearVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java deleted file mode 100644 index f38627b933b6c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java +++ /dev/null @@ -1,308 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReusableBuffer; -import org.apache.arrow.vector.complex.impl.LargeVarBinaryReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.LargeVarBinaryHolder; -import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * LargeVarBinaryVector implements a large variable width vector of binary values which could be - * NULL. A validity buffer (bit vector) is maintained to track which elements in the vector are - * null. The size of the underlying buffer can be over 2GB. - */ -public final class LargeVarBinaryVector extends BaseLargeVariableWidthVector - implements ValueIterableVector { - - /** - * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public LargeVarBinaryVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.LARGEVARBINARY.getType()), allocator); - } - - /** - * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public LargeVarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public LargeVarBinaryVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new LargeVarBinaryReaderImpl(LargeVarBinaryVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.LARGEVARBINARY; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the variable length element at specified index as byte array. - * - * @param index position of element to get - * @return array of bytes for non-null element, null otherwise - */ - public byte[] get(int index) { - assert index >= 0; - if (isSet(index) == 0) { - return null; - } - final long startOffset = getStartOffset(index); - final long dataLength = getEndOffset(index) - startOffset; - final byte[] result = new byte[(int) dataLength]; - valueBuffer.getBytes(startOffset, result, 0, (int) dataLength); - return result; - } - - /** - * Read the value at the given position to the given output buffer. The caller is responsible for - * checking for nullity first. - * - * @param index position of element. - * @param buffer the buffer to write into. - */ - @Override - public void read(int index, ReusableBuffer buffer) { - final long startOffset = getStartOffset(index); - final long dataLength = getEndOffset(index) - startOffset; - buffer.set(valueBuffer, startOffset, dataLength); - } - - /** - * Get the variable length element at specified index as Text. - * - * @param index position of element to get - * @return byte array for non-null element, null otherwise - */ - @Override - public byte[] getObject(int index) { - return get(index); - } - - /** - * Get the variable length element at specified index and sets the state in provided holder. - * - * @param index position of element to get - * @param holder data holder to be populated by this function - */ - public void get(int index, NullableLargeVarBinaryHolder holder) { - assert index >= 0; - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.start = getStartOffset(index); - holder.end = getEndOffset(index); - holder.buffer = valueBuffer; - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, LargeVarBinaryHolder holder) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int dataLength = (int) (holder.end - holder.start); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - lastSet = index; - } - - /** - * Same as {@link #set(int, LargeVarBinaryHolder)} except that it handles the case where index and - * length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, LargeVarBinaryHolder holder) { - assert index >= 0; - final int dataLength = (int) (holder.end - holder.start); - handleSafe(index, dataLength); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, NullableLargeVarBinaryHolder holder) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - final long startOffset = getStartOffset(index); - if (holder.isSet != 0) { - final int dataLength = (int) (holder.end - holder.start); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - } else { - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset); - } - lastSet = index; - } - - /** - * Same as {@link #set(int, NullableLargeVarBinaryHolder)} except that it handles the case where - * index and length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, NullableLargeVarBinaryHolder holder) { - assert index >= 0; - if (holder.isSet != 0) { - final int dataLength = (int) (holder.end - holder.start); - handleSafe(index, dataLength); - fillHoles(index); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - } else { - fillEmpties(index + 1); - } - BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - lastSet = index; - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((LargeVarBinaryVector) to); - } - - private class TransferImpl implements TransferPair { - LargeVarBinaryVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new LargeVarBinaryVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new LargeVarBinaryVector(field, allocator); - } - - public TransferImpl(LargeVarBinaryVector to) { - this.to = to; - } - - @Override - public LargeVarBinaryVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, LargeVarBinaryVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java deleted file mode 100644 index 07a9a172f0b74..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReusableBuffer; -import org.apache.arrow.vector.complex.impl.LargeVarCharReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.LargeVarCharHolder; -import org.apache.arrow.vector.holders.NullableLargeVarCharHolder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.validate.ValidateUtil; - -/** - * LargeVarCharVector implements a variable width vector of VARCHAR values which could be NULL. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - * - *

    The offset width of this vector is 8, so the underlying buffer can be larger than 2GB. - */ -public final class LargeVarCharVector extends BaseLargeVariableWidthVector - implements ValueIterableVector { - - /** - * Instantiate a LargeVarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public LargeVarCharVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(Types.MinorType.LARGEVARCHAR.getType()), allocator); - } - - /** - * Instantiate a LargeVarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public LargeVarCharVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a LargeVarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public LargeVarCharVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new LargeVarCharReaderImpl(LargeVarCharVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public Types.MinorType getMinorType() { - return Types.MinorType.LARGEVARCHAR; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the variable length element at specified index as byte array. - * - * @param index position of element to get - * @return array of bytes for non-null element, null otherwise - */ - @Override - public byte[] get(int index) { - assert index >= 0; - if (isSet(index) == 0) { - return null; - } - final long startOffset = getStartOffset(index); - final long dataLength = getEndOffset(index) - startOffset; - final byte[] result = new byte[(int) dataLength]; - valueBuffer.getBytes(startOffset, result, 0, (int) dataLength); - return result; - } - - /** - * Get the variable length element at specified index as Text. - * - * @param index position of element to get - * @return Text object for non-null element, null otherwise - */ - @Override - public Text getObject(int index) { - assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - - final Text result = new Text(); - read(index, result); - return result; - } - - /** - * Read the value at the given position to the given output buffer. The caller is responsible for - * checking for nullity first. - * - * @param index position of element. - * @param buffer the buffer to write into. - */ - @Override - public void read(int index, ReusableBuffer buffer) { - final long startOffset = getStartOffset(index); - final long dataLength = getEndOffset(index) - startOffset; - buffer.set(valueBuffer, startOffset, dataLength); - } - - /** - * Get the variable length element at specified index and sets the state in provided holder. - * - * @param index position of element to get - * @param holder data holder to be populated by this function - */ - public void get(int index, NullableLargeVarCharHolder holder) { - assert index >= 0; - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.start = getStartOffset(index); - holder.end = getEndOffset(index); - holder.buffer = valueBuffer; - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, LargeVarCharHolder holder) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int dataLength = (int) (holder.end - holder.start); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - lastSet = index; - } - - /** - * Same as {@link #set(int, LargeVarCharHolder)} except that it handles the case where index and - * length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, LargeVarCharHolder holder) { - assert index >= 0; - final int dataLength = (int) (holder.end - holder.start); - handleSafe(index, dataLength); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, NullableLargeVarCharHolder holder) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - final long startOffset = getStartOffset(index); - if (holder.isSet != 0) { - final int dataLength = (int) (holder.end - holder.start); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - } else { - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset); - } - lastSet = index; - } - - /** - * Same as {@link #set(int, NullableLargeVarCharHolder)} except that it handles the case where - * index and length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, NullableLargeVarCharHolder holder) { - assert index >= 0; - if (holder.isSet != 0) { - final int dataLength = (int) (holder.end - holder.start); - handleSafe(index, dataLength); - fillHoles(index); - final long startOffset = getStartOffset(index); - offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - } else { - fillHoles(index + 1); - } - BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the content in supplied Text. - * - * @param index position of the element to set - * @param text Text object with data - */ - public void set(int index, Text text) { - set(index, text.getBytes(), 0, (int) text.getLength()); - } - - /** - * Same as {@link #set(int, NullableLargeVarCharHolder)} except that it handles the case where - * index and length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set. - * @param text Text object with data - */ - public void setSafe(int index, Text text) { - setSafe(index, text.getBytes(), 0, (int) text.getLength()); - } - - @Override - public void validateScalars() { - for (int i = 0; i < getValueCount(); ++i) { - byte[] value = get(i); - if (value != null) { - ValidateUtil.validateOrThrow( - Text.validateUTF8NoThrow(value), - "Non-UTF-8 data in VarCharVector at position " + i + "."); - } - } - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new LargeVarCharVector.TransferImpl(ref, allocator); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new LargeVarCharVector.TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new LargeVarCharVector.TransferImpl((LargeVarCharVector) to); - } - - private class TransferImpl implements TransferPair { - LargeVarCharVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new LargeVarCharVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new LargeVarCharVector(field, allocator); - } - - public TransferImpl(LargeVarCharVector to) { - this.to = to; - } - - @Override - public LargeVarCharVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, LargeVarCharVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java b/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java deleted file mode 100644 index 92432c6c0e33f..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -/** - * Configuration class to determine if null checking should be enabled or disabled for the "get" - * methods. For example, the get method of class org.apache.arrow.vector.Float8Vector first checks - * if the value at the given index is null, before retrieving the value. This configuration will - * turn on and off such checks. - * - *

    Null checking is on by default. You can disable it by setting either the system property or - * the environmental variable to "false". The system property is named - * "arrow.enable_null_check_for_get" and the environmental variable is named - * "ARROW_ENABLE_NULL_CHECK_FOR_GET". When both the system property and the environmental variable - * are set, the system property takes precedence. - * - *

    Disabling null-checking in the "get" methods may lead to performance improvements. For - * example, suppose we have the following micro-benchmark: - * - *

    - * - *

    {@code
    - * Float8Vector vector = ...
    - *
    - * public void test() {
    - *   sum = 0;
    - *   for (int i = 0; i < 1024; i++) {
    - *     vector.set(i, i + 10.0);
    - *     safeSum += vector.get(i);
    - *   }
    - * }
    - *
    - * }
    - * - *

    Performance evaluations of the micro-benchmark with the JMH framework reveal that, disabling - * null checking has the following effects: 1. The amounts of byte code and assembly code generated - * by JIT are both smaller. 2. The performance improves by about 30% (2.819 ± 0.005 us/op vs. 4.069 - * ± 0.004 us/op). - * - *

    Therefore, for scenarios where the user can be sure that the null-checking is unnecessary, it - * is beneficial to disable it with this configuration. - */ -public class NullCheckingForGet { - - /** The flag to indicate if null checking is enabled for "get" methods. */ - public static final boolean NULL_CHECKING_ENABLED; - - static { - String envProperty = System.getenv("ARROW_ENABLE_NULL_CHECK_FOR_GET"); - String sysProperty = System.getProperty("arrow.enable_null_check_for_get"); - - // The system property has a higher priority than the environmental variable. - String flagValue = sysProperty; - if (flagValue == null) { - flagValue = envProperty; - } - - // The flag is set to false only if the system property/environmental - // variable is explicitly set to "false". - NULL_CHECKING_ENABLED = !"false".equals(flagValue); - } - - private NullCheckingForGet() {} -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java deleted file mode 100644 index 6bfe540d232fc..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java +++ /dev/null @@ -1,375 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME; - -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.NullReader; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.TransferPair; - -/** A null type vector. */ -public class NullVector implements FieldVector, ValueIterableVector { - - private int valueCount; - - protected Field field; - - /** - * Instantiate a NullVector. - * - * @param name name of the vector - */ - public NullVector(String name) { - this(name, FieldType.nullable(Types.MinorType.NULL.getType())); - } - - /** - * Instantiate a NullVector with the given number of values. - * - * @param name name of the vector - * @param valueCount number of values (i.e., nulls) in this vector. - */ - public NullVector(String name, int valueCount) { - this(new Field(name, FieldType.nullable(Types.MinorType.NULL.getType()), null), valueCount); - } - - /** - * Instantiate a NullVector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector. - */ - public NullVector(String name, FieldType fieldType) { - this(new Field(name, fieldType, null)); - } - - /** - * Instantiate a NullVector. - * - * @param field field materialized by this vector. - */ - public NullVector(Field field) { - this(field, 0); - } - - /** - * Instantiate a NullVector with the given number of values. - * - * @param field field materialized by this vector. - * @param valueCount number of values (i.e., nulls) in this vector. - */ - public NullVector(Field field, int valueCount) { - this.field = field; - this.valueCount = valueCount; - } - - @Deprecated - public NullVector() { - this(new Field(DATA_VECTOR_NAME, FieldType.nullable(new ArrowType.Null()), null)); - } - - @Override - public void close() {} - - @Override - public void clear() {} - - @Override - public void reset() {} - - @Override - public Field getField() { - return field; - } - - @Override - public Types.MinorType getMinorType() { - return Types.MinorType.NULL; - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(getName(), allocator); - } - - @Override - public Iterator iterator() { - return Collections.emptyIterator(); - } - - @Override - public int getBufferSize() { - return 0; - } - - @Override - public int getBufferSizeFor(final int valueCount) { - return 0; - } - - @Override - public ArrowBuf[] getBuffers(boolean clear) { - return new ArrowBuf[0]; - } - - @Override - public void allocateNew() throws OutOfMemoryException { - allocateNewSafe(); - } - - @Override - public boolean allocateNewSafe() { - return true; - } - - @Override - public void reAlloc() {} - - /* - * IMPORTANT NOTE - * It's essential that NullVector (and ZeroVector) do not require BufferAllocator for any data storage. - * However, some methods of the parent interface may require passing in a BufferAllocator, even if null. - * - * @return null - */ - @Override - public BufferAllocator getAllocator() { - return null; - } - - @Override - public void setInitialCapacity(int numRecords) {} - - @Override - public int getValueCapacity() { - return this.valueCount; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field.getName()); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(ref, allocator); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return getTransferPair(field, allocator); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((NullVector) target); - } - - @Override - public FieldReader getReader() { - return NullReader.INSTANCE; - } - - @Override - public void initializeChildrenFromFields(List children) { - if (!children.isEmpty()) { - throw new IllegalArgumentException("Null vector has no children"); - } - } - - @Override - public List getChildrenFromFields() { - return Collections.emptyList(); - } - - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - Preconditions.checkArgument(ownBuffers.isEmpty(), "Null vector has no buffers"); - valueCount = fieldNode.getLength(); - } - - @Override - public List getFieldBuffers() { - return Collections.emptyList(); - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - return Collections.emptyList(); - } - - @Override - public long getValidityBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getOffsetBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getValidityBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public int getValueCount() { - return this.valueCount; - } - - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - } - - @Override - public Object getObject(int index) { - return null; - } - - @Override - public int getNullCount() { - return this.valueCount; - } - - /** - * Set the element at the given index to null. In a NullVector, this is a no-op. - * - * @param index position of element - */ - @Override - public void setNull(int index) {} - - @Override - public boolean isNull(int index) { - return true; - } - - @Override - public int hashCode(int index) { - return 31; - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return 31; - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - throw new UnsupportedOperationException(); - } - - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - throw new UnsupportedOperationException(); - } - - @Override - public String getName() { - return this.getField().getName(); - } - - private class TransferImpl implements TransferPair { - NullVector to; - - public TransferImpl(String ref) { - to = new NullVector(ref); - } - - @Deprecated - public TransferImpl() { - to = new NullVector(); - } - - public TransferImpl(NullVector to) { - this.to = to; - } - - @Override - public NullVector getTo() { - return to; - } - - @Override - public void transfer() { - to.valueCount = valueCount; - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - to.valueCount = length; - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - if (toIndex > to.valueCount) { - to.valueCount = toIndex; - } - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java b/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java deleted file mode 100644 index e1c9324375f50..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static java.time.temporal.ChronoUnit.DAYS; -import static java.time.temporal.ChronoUnit.MONTHS; -import static java.time.temporal.ChronoUnit.NANOS; -import static java.time.temporal.ChronoUnit.SECONDS; -import static java.time.temporal.ChronoUnit.YEARS; - -import java.time.Duration; -import java.time.Period; -import java.time.temporal.ChronoUnit; -import java.time.temporal.Temporal; -import java.time.temporal.TemporalAmount; -import java.time.temporal.TemporalUnit; -import java.time.temporal.UnsupportedTemporalTypeException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.util.Preconditions; - -/** Combination of Period and Duration for representing this interval type as a POJO. */ -public class PeriodDuration implements TemporalAmount { - - private static final List SUPPORTED_UNITS = - Collections.unmodifiableList( - Arrays.asList(YEARS, MONTHS, DAYS, SECONDS, NANOS)); - private final Period period; - private final Duration duration; - - public PeriodDuration(Period period, Duration duration) { - this.period = Preconditions.checkNotNull(period); - this.duration = Preconditions.checkNotNull(duration); - } - - public Period getPeriod() { - return period; - } - - public Duration getDuration() { - return duration; - } - - @Override - public long get(TemporalUnit unit) { - if (unit instanceof ChronoUnit) { - switch ((ChronoUnit) unit) { - case YEARS: - return period.getYears(); - case MONTHS: - return period.getMonths(); - case DAYS: - return period.getDays(); - case SECONDS: - return duration.getSeconds(); - case NANOS: - return duration.getNano(); - default: - break; - } - } - throw new UnsupportedTemporalTypeException("Unsupported TemporalUnit: " + unit); - } - - @Override - public List getUnits() { - return SUPPORTED_UNITS; - } - - @Override - public Temporal addTo(Temporal temporal) { - return temporal.plus(period).plus(duration); - } - - @Override - public Temporal subtractFrom(Temporal temporal) { - return temporal.minus(period).minus(duration); - } - - /** - * Format this PeriodDuration as an ISO-8601 interval. - * - * @return An ISO-8601 formatted string representing the interval. - */ - public String toISO8601IntervalString() { - if (duration.isZero()) { - return period.toString(); - } - String durationString = duration.toString(); - if (period.isZero()) { - return durationString; - } - - // Remove 'P' from duration string and concatenate to produce an ISO-8601 representation - return period + durationString.substring(1); - } - - @Override - public String toString() { - return period.toString() + " " + duration.toString(); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof PeriodDuration)) { - return false; - } - PeriodDuration other = (PeriodDuration) o; - return this.period.equals(other.period) && this.duration.equals(other.duration); - } - - @Override - public int hashCode() { - return this.period.hashCode() * 31 + this.duration.hashCode(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java b/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java deleted file mode 100644 index bfb803af09b88..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.vector.util.CallBack; - -/** - * Callback for when the Schema for the Vector changes (generally happens when a vector is promoted - * to a union type from a single value type). - */ -public class SchemaChangeCallBack implements CallBack { - private boolean schemaChanged = false; - - /** Constructs a schema-change callback with the schema-changed state set to {@code false}. */ - public SchemaChangeCallBack() {} - - /** Sets the schema-changed state to {@code true}. */ - @Override - public void doWork() { - schemaChanged = true; - } - - /** - * Returns the value of schema-changed state, resetting the schema-changed state - * to {@code false}. - * - * @return the previous schema-changed state - */ - public boolean getSchemaChangedAndReset() { - final boolean current = schemaChanged; - schemaChanged = false; - return current; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java deleted file mode 100644 index c476694797c4d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.SmallIntReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableSmallIntHolder; -import org.apache.arrow.vector.holders.SmallIntHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * SmallIntVector implements a fixed width (2 bytes) vector of short values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class SmallIntVector extends BaseFixedWidthVector - implements BaseIntVector, ValueIterableVector { - public static final byte TYPE_WIDTH = 2; - - /** - * Instantiate a SmallIntVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public SmallIntVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.SMALLINT.getType()), allocator); - } - - /** - * Instantiate a SmallIntVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public SmallIntVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a SmallIntVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public SmallIntVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new SmallIntReaderImpl(SmallIntVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.SMALLINT; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public short get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getShort((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableSmallIntHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Short getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getShort((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setShort((long) index * TYPE_WIDTH, value); - } - - private void setValue(int index, short value) { - valueBuffer.setShort((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, short value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableSmallIntHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, SmallIntHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, short)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, short value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableSmallIntHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableSmallIntHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, SmallIntHolder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, SmallIntHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, short value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, short)} except that it handles the case when index is greater - * than or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, short value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static short get(final ArrowBuf buffer, final int index) { - return buffer.getShort((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((SmallIntVector) to); - } - - @Override - public void setWithPossibleTruncate(int index, long value) { - this.setSafe(index, (int) value); - } - - @Override - public void setUnsafeWithPossibleTruncate(int index, long value) { - this.set(index, (int) value); - } - - @Override - public long getValueAsLong(int index) { - return this.get(index); - } - - private class TransferImpl implements TransferPair { - SmallIntVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new SmallIntVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new SmallIntVector(field, allocator); - } - - public TransferImpl(SmallIntVector to) { - this.to = to; - } - - @Override - public SmallIntVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, SmallIntVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java deleted file mode 100644 index 175303bbafc96..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeMicroReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeMicroHolder; -import org.apache.arrow.vector.holders.TimeMicroHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeMicroVector implements a fixed width vector (8 bytes) of time (microsecond resolution) values - * which could be null. A validity buffer (bit vector) is maintained to track which elements in the - * vector are null. - */ -public final class TimeMicroVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 8; - - /** - * Instantiate a TimeMicroVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeMicroVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TIMEMICRO.getType()), allocator); - } - - /** - * Instantiate a TimeMicroVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeMicroVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a TimeMicroVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeMicroVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeMicroReaderImpl(TimeMicroVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMEMICRO; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public long get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeMicroHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Long getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, long value) { - valueBuffer.setLong((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, long value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeMicroHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeMicroHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableTimeMicroHolder)} except that it handles the case when index - * is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeMicroHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeMicroHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeMicroHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, long value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, long)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, long value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static long get(final ArrowBuf buffer, int index) { - return buffer.getLong((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeMicroVector) to); - } - - private class TransferImpl implements TransferPair { - TimeMicroVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new TimeMicroVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new TimeMicroVector(field, allocator); - } - - public TransferImpl(TimeMicroVector to) { - this.to = to; - } - - @Override - public TimeMicroVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, TimeMicroVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java deleted file mode 100644 index c9c673cd0a5d5..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java +++ /dev/null @@ -1,343 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.LocalDateTime; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeMilliReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeMilliHolder; -import org.apache.arrow.vector.holders.TimeMilliHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DateUtility; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeMilliVector implements a fixed width (4 bytes) vector of time (millisecond resolution) values - * which could be null. A validity buffer (bit vector) is maintained to track which elements in the - * vector are null. - */ -public final class TimeMilliVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 4; - - /** - * Instantiate a TimeMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeMilliVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TIMEMILLI.getType()), allocator); - } - - /** - * Instantiate a TimeMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeMilliVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a TimeMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeMilliVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeMilliReaderImpl(TimeMilliVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMEMILLI; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public int get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeMilliHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public LocalDateTime getObject(int index) { - if (isSet(index) == 0) { - return null; - } - final int millis = valueBuffer.getInt((long) index * TYPE_WIDTH); - // TODO: this doesn't seem right, time not from epoch - return DateUtility.getLocalDateTimeFromEpochMilli(millis); - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setInt((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeMilliHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeMilliHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableTimeMilliHolder)} except that it handles the case when index - * is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeMilliHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeMilliHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeMilliHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, int value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, int value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static int get(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeMilliVector) to); - } - - private class TransferImpl implements TransferPair { - TimeMilliVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new TimeMilliVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new TimeMilliVector(field, allocator); - } - - public TransferImpl(TimeMilliVector to) { - this.to = to; - } - - @Override - public TimeMilliVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, TimeMilliVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java deleted file mode 100644 index 105d53d25f051..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeNanoReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeNanoHolder; -import org.apache.arrow.vector.holders.TimeNanoHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeNanoVector implements a fixed width vector (8 bytes) of time (nanosecond resolution) values - * which could be null. A validity buffer (bit vector) is maintained to track which elements in the - * vector are null. - */ -public final class TimeNanoVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 8; - - /** - * Instantiate a TimeNanoVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeNanoVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TIMENANO.getType()), allocator); - } - - /** - * Instantiate a TimeNanoVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeNanoVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a TimeNanoVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeNanoVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeNanoReaderImpl(TimeNanoVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMENANO; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public long get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeNanoHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Long getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, long value) { - valueBuffer.setLong((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, long value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeNanoHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeNanoHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableTimeNanoHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeNanoHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeNanoHolder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeNanoHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, long value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, long)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, long value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static long get(final ArrowBuf buffer, final int index) { - return buffer.getLong((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeNanoVector) to); - } - - private class TransferImpl implements TransferPair { - TimeNanoVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new TimeNanoVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new TimeNanoVector(field, allocator); - } - - public TransferImpl(TimeNanoVector to) { - this.to = to; - } - - @Override - public TimeNanoVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, TimeNanoVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java deleted file mode 100644 index 1b9dbf9193196..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java +++ /dev/null @@ -1,340 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeSecReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeSecHolder; -import org.apache.arrow.vector.holders.TimeSecHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeSecVector implements a fixed width (4 bytes) vector of time (seconds resolution) values which - * could be null. A validity buffer (bit vector) is maintained to track which elements in the vector - * are null. - */ -public final class TimeSecVector extends BaseFixedWidthVector - implements ValueIterableVector { - public static final byte TYPE_WIDTH = 4; - - /** - * Instantiate a TimeSecVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeSecVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TIMESEC.getType()), allocator); - } - - /** - * Instantiate a TimeSecVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeSecVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a TimeSecVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeSecVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeSecReaderImpl(TimeSecVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESEC; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public int get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeSecHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Integer getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setInt((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeSecHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeSecHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableTimeSecHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeSecHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeSecHolder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeSecHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, int value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, int value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static int get(final ArrowBuf buffer, final int index) { - return buffer.getInt((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeSecVector) to); - } - - private class TransferImpl implements TransferPair { - TimeSecVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new TimeSecVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new TimeSecVector(field, allocator); - } - - public TransferImpl(TimeSecVector to) { - this.to = to; - } - - @Override - public TimeSecVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, TimeSecVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java deleted file mode 100644 index abaefcfc1238b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeStampMicroTZReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeStampMicroTZHolder; -import org.apache.arrow.vector.holders.TimeStampMicroTZHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampMicroTZVector implements a fixed width vector (8 bytes) of timestamp (microsecond - * resolution) values which could be null. A validity buffer (bit vector) is maintained to track - * which elements in the vector are null. - */ -public final class TimeStampMicroTZVector extends TimeStampVector - implements ValueIterableVector { - private final String timeZone; - - /** - * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeStampMicroTZVector(String name, BufferAllocator allocator, String timeZone) { - this( - name, - FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZone)), - allocator); - } - - /** - * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampMicroTZVector(String name, FieldType fieldType, BufferAllocator allocator) { - super(name, fieldType, allocator); - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType(); - timeZone = arrowType.getTimezone(); - } - - /** - * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampMicroTZVector(Field field, BufferAllocator allocator) { - super(field, allocator); - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType(); - timeZone = arrowType.getTimezone(); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeStampMicroTZReaderImpl(TimeStampMicroTZVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESTAMPMICROTZ; - } - - /** - * Get the time zone of the timestamps stored in this vector. - * - * @return the time zone of the timestamps stored in this vector. - */ - public String getTimeZone() { - return this.timeZone; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeStampMicroTZHolder holder) { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - holder.timezone = timeZone; - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Long getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeStampMicroTZHolder holder) - throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (!this.timeZone.equals(holder.timezone)) { - throw new IllegalArgumentException( - String.format( - "holder.timezone: %s not equal to vector timezone: %s", - holder.timezone, this.timeZone)); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeStampMicroTZHolder holder) { - if (!this.timeZone.equals(holder.timezone)) { - throw new IllegalArgumentException( - String.format( - "holder.timezone: %s not equal to vector timezone: %s", - holder.timezone, this.timeZone)); - } - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, NullableTimeStampMicroTZHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeStampMicroTZHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeStampMicroTZHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeStampMicroTZHolder holder) { - handleSafe(index); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - TimeStampMicroTZVector to = new TimeStampMicroTZVector(ref, field.getFieldType(), allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - TimeStampMicroTZVector to = new TimeStampMicroTZVector(field, allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeStampMicroTZVector) to); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java deleted file mode 100644 index 5f22d075cae28..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.LocalDateTime; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeStampMicroReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder; -import org.apache.arrow.vector.holders.TimeStampMicroHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DateUtility; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampMicroVector implements a fixed width vector (8 bytes) of timestamp (microsecond - * resolution) values which could be null. A validity buffer (bit vector) is maintained to track - * which elements in the vector are null. - */ -public final class TimeStampMicroVector extends TimeStampVector - implements ValueIterableVector { - - /** - * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeStampMicroVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TIMESTAMPMICRO.getType()), allocator); - } - - /** - * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampMicroVector(String name, FieldType fieldType, BufferAllocator allocator) { - super(name, fieldType, allocator); - } - - /** - * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampMicroVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeStampMicroReaderImpl(TimeStampMicroVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESTAMPMICRO; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeStampMicroHolder holder) { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public LocalDateTime getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - final long micros = valueBuffer.getLong((long) index * TYPE_WIDTH); - return DateUtility.getLocalDateTimeFromEpochMicro(micros); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeStampMicroHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeStampMicroHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, NullableTimeStampMicroHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeStampMicroHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeStampMicroHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeStampMicroHolder holder) { - handleSafe(index); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - TimeStampMicroVector to = new TimeStampMicroVector(ref, field.getFieldType(), allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - TimeStampMicroVector to = new TimeStampMicroVector(field, allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeStampMicroVector) to); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java deleted file mode 100644 index b5e5fb1be17b4..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeStampMilliTZReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampMilliTZVector implements a fixed width vector (8 bytes) of timestamp (millisecond - * resolution) values which could be null. A validity buffer (bit vector) is maintained to track - * which elements in the vector are null. - */ -public final class TimeStampMilliTZVector extends TimeStampVector - implements ValueIterableVector { - private final String timeZone; - - /** - * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeStampMilliTZVector(String name, BufferAllocator allocator, String timeZone) { - this( - name, - FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, timeZone)), - allocator); - } - - /** - * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampMilliTZVector(String name, FieldType fieldType, BufferAllocator allocator) { - super(name, fieldType, allocator); - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType(); - timeZone = arrowType.getTimezone(); - } - - /** - * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampMilliTZVector(Field field, BufferAllocator allocator) { - super(field, allocator); - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType(); - timeZone = arrowType.getTimezone(); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeStampMilliTZReaderImpl(TimeStampMilliTZVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESTAMPMILLITZ; - } - - /** - * Get the time zone of the timestamps stored in this vector. - * - * @return the time zone of the timestamps stored in this vector. - */ - public String getTimeZone() { - return this.timeZone; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeStampMilliTZHolder holder) { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - holder.timezone = timeZone; - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Long getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeStampMilliTZHolder holder) - throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (!this.timeZone.equals(holder.timezone)) { - throw new IllegalArgumentException( - String.format( - "holder.timezone: %s not equal to vector timezone: %s", - holder.timezone, this.timeZone)); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeStampMilliTZHolder holder) { - if (!this.timeZone.equals(holder.timezone)) { - throw new IllegalArgumentException( - String.format( - "holder.timezone: %s not equal to vector timezone: %s", - holder.timezone, this.timeZone)); - } - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, NullableTimeStampMilliTZHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeStampMilliTZHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeStampMilliTZHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeStampMilliTZHolder holder) { - handleSafe(index); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - TimeStampMilliTZVector to = new TimeStampMilliTZVector(ref, field.getFieldType(), allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - TimeStampMilliTZVector to = new TimeStampMilliTZVector(field, allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeStampMilliTZVector) to); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java deleted file mode 100644 index cf9a42770a6de..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.LocalDateTime; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeStampMilliReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; -import org.apache.arrow.vector.holders.TimeStampMilliHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DateUtility; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampMilliVector implements a fixed width vector (8 bytes) of timestamp (millisecond - * resolution) values which could be null. A validity buffer (bit vector) is maintained to track - * which elements in the vector are null. - */ -public final class TimeStampMilliVector extends TimeStampVector - implements ValueIterableVector { - - /** - * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeStampMilliVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TIMESTAMPMILLI.getType()), allocator); - } - - /** - * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampMilliVector(String name, FieldType fieldType, BufferAllocator allocator) { - super(name, fieldType, allocator); - } - - /** - * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampMilliVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeStampMilliReaderImpl(TimeStampMilliVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESTAMPMILLI; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeStampMilliHolder holder) { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public LocalDateTime getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - final long millis = valueBuffer.getLong((long) index * TYPE_WIDTH); - return DateUtility.getLocalDateTimeFromEpochMilli(millis); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeStampMilliHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeStampMilliHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, NullableTimeStampMilliHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeStampMilliHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeStampMilliHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeStampMilliHolder holder) { - handleSafe(index); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - TimeStampMilliVector to = new TimeStampMilliVector(ref, field.getFieldType(), allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - TimeStampMilliVector to = new TimeStampMilliVector(field, allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeStampMilliVector) to); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java deleted file mode 100644 index 2386b3a8598e3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeStampNanoTZReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; -import org.apache.arrow.vector.holders.TimeStampNanoTZHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampNanoTZVector implements a fixed width vector (8 bytes) of timestamp (nanosecond - * resolution) values which could be null. A validity buffer (bit vector) is maintained to track - * which elements in the vector are null. - */ -public final class TimeStampNanoTZVector extends TimeStampVector - implements ValueIterableVector { - private final String timeZone; - - /** - * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeStampNanoTZVector(String name, BufferAllocator allocator, String timeZone) { - this( - name, - FieldType.nullable(new ArrowType.Timestamp(TimeUnit.NANOSECOND, timeZone)), - allocator); - } - - /** - * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampNanoTZVector(String name, FieldType fieldType, BufferAllocator allocator) { - super(name, fieldType, allocator); - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType(); - timeZone = arrowType.getTimezone(); - } - - /** - * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampNanoTZVector(Field field, BufferAllocator allocator) { - super(field, allocator); - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType(); - timeZone = arrowType.getTimezone(); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeStampNanoTZReaderImpl(TimeStampNanoTZVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESTAMPNANOTZ; - } - - /** - * Get the time zone of the timestamps stored in this vector. - * - * @return the time zone of the timestamps stored in this vector. - */ - public String getTimeZone() { - return this.timeZone; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeStampNanoTZHolder holder) { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - holder.timezone = timeZone; - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Long getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeStampNanoTZHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (!this.timeZone.equals(holder.timezone)) { - throw new IllegalArgumentException( - String.format( - "holder.timezone: %s not equal to vector timezone: %s", - holder.timezone, this.timeZone)); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeStampNanoTZHolder holder) { - if (!this.timeZone.equals(holder.timezone)) { - throw new IllegalArgumentException( - String.format( - "holder.timezone: %s not equal to vector timezone: %s", - holder.timezone, this.timeZone)); - } - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, NullableTimeStampNanoTZHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeStampNanoTZHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeStampNanoTZHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeStampNanoTZHolder holder) { - handleSafe(index); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - TimeStampNanoTZVector to = new TimeStampNanoTZVector(ref, field.getFieldType(), allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - TimeStampNanoTZVector to = new TimeStampNanoTZVector(field, allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeStampNanoTZVector) to); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java deleted file mode 100644 index ee221990253ca..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.LocalDateTime; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeStampNanoReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder; -import org.apache.arrow.vector.holders.TimeStampNanoHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DateUtility; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampNanoVector implements a fixed width vector (8 bytes) of timestamp (nanosecond - * resolution) values which could be null. A validity buffer (bit vector) is maintained to track - * which elements in the vector are null. - */ -public final class TimeStampNanoVector extends TimeStampVector - implements ValueIterableVector { - - /** - * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeStampNanoVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TIMESTAMPNANO.getType()), allocator); - } - - /** - * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampNanoVector(String name, FieldType fieldType, BufferAllocator allocator) { - super(name, fieldType, allocator); - } - - /** - * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampNanoVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeStampNanoReaderImpl(TimeStampNanoVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESTAMPNANO; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeStampNanoHolder holder) { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public LocalDateTime getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - final long nanos = valueBuffer.getLong((long) index * TYPE_WIDTH); - return DateUtility.getLocalDateTimeFromEpochNano(nanos); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeStampNanoHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeStampNanoHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, NullableTimeStampNanoHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeStampNanoHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeStampNanoHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeStampNanoHolder holder) { - handleSafe(index); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - TimeStampNanoVector to = new TimeStampNanoVector(ref, field.getFieldType(), allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - TimeStampNanoVector to = new TimeStampNanoVector(field, allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeStampNanoVector) to); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java deleted file mode 100644 index f1774f27035cb..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeStampSecTZReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeStampSecTZHolder; -import org.apache.arrow.vector.holders.TimeStampSecTZHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampSecTZVector implements a fixed width vector (8 bytes) of timestamp (seconds resolution) - * values which could be null. A validity buffer (bit vector) is maintained to track which elements - * in the vector are null. - */ -public final class TimeStampSecTZVector extends TimeStampVector - implements ValueIterableVector { - private final String timeZone; - - /** - * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeStampSecTZVector(String name, BufferAllocator allocator, String timeZone) { - this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.SECOND, timeZone)), allocator); - } - - /** - * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampSecTZVector(String name, FieldType fieldType, BufferAllocator allocator) { - super(name, fieldType, allocator); - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType(); - timeZone = arrowType.getTimezone(); - } - - /** - * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampSecTZVector(Field field, BufferAllocator allocator) { - super(field, allocator); - ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType(); - timeZone = arrowType.getTimezone(); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeStampSecTZReaderImpl(TimeStampSecTZVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESTAMPSECTZ; - } - - /** - * Get the time zone of the timestamps stored in this vector. - * - * @return the time zone of the timestamps stored in this vector. - */ - public String getTimeZone() { - return this.timeZone; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeStampSecTZHolder holder) { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - holder.timezone = timeZone; - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - public Long getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeStampSecTZHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (!this.timeZone.equals(holder.timezone)) { - throw new IllegalArgumentException( - String.format( - "holder.timezone: %s not equal to vector timezone: %s", - holder.timezone, this.timeZone)); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeStampSecTZHolder holder) { - if (!this.timeZone.equals(holder.timezone)) { - throw new IllegalArgumentException( - String.format( - "holder.timezone: %s not equal to vector timezone: %s", - holder.timezone, this.timeZone)); - } - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, NullableTimeStampSecTZHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeStampSecTZHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeStampSecTZHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeStampSecTZHolder holder) { - handleSafe(index); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - TimeStampSecTZVector to = new TimeStampSecTZVector(ref, field.getFieldType(), allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - TimeStampSecTZVector to = new TimeStampSecTZVector(field, allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeStampSecTZVector) to); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java deleted file mode 100644 index fb79194cc0e19..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.time.LocalDateTime; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TimeStampSecReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTimeStampSecHolder; -import org.apache.arrow.vector.holders.TimeStampSecHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DateUtility; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampSecVector implements a fixed width vector (8 bytes) of timestamp (seconds resolution) - * values which could be null. A validity buffer (bit vector) is maintained to track which elements - * in the vector are null. - */ -public final class TimeStampSecVector extends TimeStampVector - implements ValueIterableVector { - - /** - * Instantiate a TimeStampSecVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TimeStampSecVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TIMESTAMPSEC.getType()), allocator); - } - - /** - * Instantiate a TimeStampSecVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampSecVector(String name, FieldType fieldType, BufferAllocator allocator) { - super(name, fieldType, allocator); - } - - /** - * Instantiate a TimeStampSecVector. This doesn't allocate any memory for the data in vector. - * - * @param field Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampSecVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new TimeStampSecReaderImpl(TimeStampSecVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TIMESTAMPSEC; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTimeStampSecHolder holder) { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public LocalDateTime getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - final long secs = valueBuffer.getLong((long) index * TYPE_WIDTH); - final long millis = java.util.concurrent.TimeUnit.SECONDS.toMillis(secs); - return DateUtility.getLocalDateTimeFromEpochMilli(millis); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTimeStampSecHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TimeStampSecHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, NullableTimeStampSecHolder)} except that it handles the case when - * index is greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTimeStampSecHolder holder) - throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TimeStampSecHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TimeStampSecHolder holder) { - handleSafe(index); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - TimeStampSecVector to = new TimeStampSecVector(ref, field.getFieldType(), allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - TimeStampSecVector to = new TimeStampSecVector(field, allocator); - return new TransferImpl(to); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TimeStampSecVector) to); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java deleted file mode 100644 index 59499581c321a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TimeStampVector is an abstract interface for fixed width vector (8 bytes) of timestamp values - * which could be null. A validity buffer (bit vector) is maintained to track which elements in the - * vector are null. - */ -public abstract class TimeStampVector extends BaseFixedWidthVector { - public static final byte TYPE_WIDTH = 8; - - /** - * Instantiate a TimeStampVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a TimeStampVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public TimeStampVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public long get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - protected void setValue(int index, long value) { - valueBuffer.setLong((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, long value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, long value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, long)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, long value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static long get(final ArrowBuf buffer, final int index) { - return buffer.getLong((long) index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** {@link TransferPair} for {@link TimeStampVector}. */ - public class TransferImpl implements TransferPair { - TimeStampVector to; - - public TransferImpl(TimeStampVector to) { - this.to = to; - } - - @Override - public TimeStampVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, TimeStampVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java deleted file mode 100644 index 022475e5e1b05..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.TinyIntReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableTinyIntHolder; -import org.apache.arrow.vector.holders.TinyIntHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * TinyIntVector implements a fixed width (1 bytes) vector of byte values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class TinyIntVector extends BaseFixedWidthVector - implements BaseIntVector, ValueIterableVector { - public static final byte TYPE_WIDTH = 1; - - /** - * Instantiate a TinyIntVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public TinyIntVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.TINYINT.getType()), allocator); - } - - /** - * Instantiate a TinyIntVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public TinyIntVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a TinyIntVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public TinyIntVector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new TinyIntReaderImpl(TinyIntVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.TINYINT; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public byte get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getByte(index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableTinyIntHolder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getByte(index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Byte getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getByte(index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setByte(index * TYPE_WIDTH, value); - } - - private void setValue(int index, byte value) { - valueBuffer.setByte(index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, byte value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableTinyIntHolder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, TinyIntHolder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, byte)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, byte value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableTinyIntHolder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableTinyIntHolder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, TinyIntHolder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, TinyIntHolder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Store the given value at a particular position in the vector. isSet indicates whether the value - * is NULL or not. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void set(int index, int isSet, byte value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, byte)} except that it handles the case when index is greater than - * or equal to current value capacity of the vector. - * - * @param index position of the new value - * @param isSet 0 for NULL value, 1 otherwise - * @param value element value - */ - public void setSafe(int index, int isSet, byte value) { - handleSafe(index); - set(index, isSet, value); - } - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method should not be used externally. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static byte get(final ArrowBuf buffer, final int index) { - return buffer.getByte(index * TYPE_WIDTH); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((TinyIntVector) to); - } - - @Override - public void setWithPossibleTruncate(int index, long value) { - this.setSafe(index, (int) value); - } - - @Override - public void setUnsafeWithPossibleTruncate(int index, long value) { - this.set(index, (int) value); - } - - @Override - public long getValueAsLong(int index) { - return this.get(index); - } - - private class TransferImpl implements TransferPair { - TinyIntVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new TinyIntVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new TinyIntVector(field, allocator); - } - - public TransferImpl(TinyIntVector to) { - this.to = to; - } - - @Override - public TinyIntVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, TinyIntVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java deleted file mode 100644 index fa75ef04577a3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ /dev/null @@ -1,539 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static java.util.Arrays.asList; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BufferLayout.BufferType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; -import org.apache.arrow.vector.types.pojo.ArrowType.Binary; -import org.apache.arrow.vector.types.pojo.ArrowType.BinaryView; -import org.apache.arrow.vector.types.pojo.ArrowType.Bool; -import org.apache.arrow.vector.types.pojo.ArrowType.Date; -import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; -import org.apache.arrow.vector.types.pojo.ArrowType.Duration; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; -import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.apache.arrow.vector.types.pojo.ArrowType.Interval; -import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; -import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; -import org.apache.arrow.vector.types.pojo.ArrowType.Map; -import org.apache.arrow.vector.types.pojo.ArrowType.Null; -import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.ArrowType.Time; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; -import org.apache.arrow.vector.types.pojo.ArrowType.Union; -import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.apache.arrow.vector.types.pojo.ArrowType.Utf8View; - -/** - * The buffer layout of vectors for a given type. It defines its own buffers followed by the buffers - * for the children if it is a nested type (Struct_, List, Union) - */ -public class TypeLayout { - - /** Constructs a new {@link TypeLayout} for the given arrowType. */ - public static TypeLayout getTypeLayout(final ArrowType arrowType) { - TypeLayout layout = - arrowType.accept( - new ArrowTypeVisitor() { - - @Override - public TypeLayout visit(Int type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); - } - - @Override - public TypeLayout visit(Union type) { - List vectors; - switch (type.getMode()) { - case Dense: - vectors = - asList( - BufferLayout.typeBuffer(), - BufferLayout.offsetBuffer() // offset to find the vector - ); - break; - case Sparse: - vectors = - asList( - BufferLayout.typeBuffer() // type of the value at the index or 0 if null - ); - break; - default: - throw new UnsupportedOperationException( - "Unsupported Union Mode: " + type.getMode()); - } - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(Struct type) { - List vectors = asList(BufferLayout.validityVector()); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(Timestamp type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - } - - @Override - public TypeLayout visit(ArrowType.List type) { - List vectors = - asList(BufferLayout.validityVector(), BufferLayout.offsetBuffer()); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(ArrowType.ListView type) { - List vectors = - asList( - BufferLayout.validityVector(), - BufferLayout.offsetBuffer(), - BufferLayout.sizeBuffer()); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(ArrowType.LargeListView type) { - List vectors = - asList( - BufferLayout.validityVector(), - BufferLayout.largeOffsetBuffer(), - BufferLayout.largeSizeBuffer()); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(ArrowType.LargeList type) { - List vectors = - asList(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer()); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(FixedSizeList type) { - List vectors = asList(BufferLayout.validityVector()); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(Map type) { - List vectors = - asList(BufferLayout.validityVector(), BufferLayout.offsetBuffer()); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(FloatingPoint type) { - int bitWidth; - switch (type.getPrecision()) { - case HALF: - bitWidth = 16; - break; - case SINGLE: - bitWidth = 32; - break; - case DOUBLE: - bitWidth = 64; - break; - default: - throw new UnsupportedOperationException( - "Unsupported Precision: " + type.getPrecision()); - } - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(bitWidth)); - } - - @Override - public TypeLayout visit(Decimal type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); - } - - @Override - public TypeLayout visit(FixedSizeBinary type) { - return newFixedWidthTypeLayout( - new BufferLayout(BufferType.DATA, type.getByteWidth() * 8)); - } - - @Override - public TypeLayout visit(Bool type) { - return newFixedWidthTypeLayout(BufferLayout.booleanVector()); - } - - @Override - public TypeLayout visit(Binary type) { - return newVariableWidthTypeLayout(); - } - - @Override - public TypeLayout visit(ArrowType.BinaryView type) { - return newVariableWidthViewTypeLayout(); - } - - @Override - public TypeLayout visit(Utf8 type) { - return newVariableWidthTypeLayout(); - } - - @Override - public TypeLayout visit(Utf8View type) { - return newVariableWidthViewTypeLayout(); - } - - @Override - public TypeLayout visit(LargeUtf8 type) { - return newLargeVariableWidthTypeLayout(); - } - - @Override - public TypeLayout visit(LargeBinary type) { - return newLargeVariableWidthTypeLayout(); - } - - private TypeLayout newVariableWidthTypeLayout() { - return newPrimitiveTypeLayout( - BufferLayout.validityVector(), - BufferLayout.offsetBuffer(), - BufferLayout.byteVector()); - } - - private TypeLayout newVariableWidthViewTypeLayout() { - return new TypeLayout( - false, BufferLayout.validityVector(), BufferLayout.viewVector()); - } - - private TypeLayout newLargeVariableWidthTypeLayout() { - // NOTE: only considers the non variadic buffers - return newPrimitiveTypeLayout( - BufferLayout.validityVector(), - BufferLayout.largeOffsetBuffer(), - BufferLayout.byteVector()); - } - - private TypeLayout newPrimitiveTypeLayout(BufferLayout... vectors) { - return new TypeLayout(asList(vectors)); - } - - public TypeLayout newFixedWidthTypeLayout(BufferLayout dataVector) { - return newPrimitiveTypeLayout(BufferLayout.validityVector(), dataVector); - } - - @Override - public TypeLayout visit(Null type) { - return new TypeLayout(Collections.emptyList()); - } - - @Override - public TypeLayout visit(Date type) { - switch (type.getUnit()) { - case DAY: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32)); - case MILLISECOND: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - default: - throw new UnsupportedOperationException("Unknown unit " + type.getUnit()); - } - } - - @Override - public TypeLayout visit(Time type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); - } - - @Override - public TypeLayout visit(Interval type) { - switch (type.getUnit()) { - case DAY_TIME: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - case YEAR_MONTH: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32)); - case MONTH_DAY_NANO: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(128)); - default: - throw new UnsupportedOperationException("Unknown unit " + type.getUnit()); - } - } - - @Override - public TypeLayout visit(Duration type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - } - - @Override - public TypeLayout visit(RunEndEncoded type) { - return new TypeLayout(Collections.emptyList()); - } - }); - return layout; - } - - /** Gets the number of {@link BufferLayout}s for the given arrowType. */ - public static int getTypeBufferCount(final ArrowType arrowType) { - return arrowType.accept( - new ArrowTypeVisitor() { - - /** - * All fixed width vectors have a common number of buffers 2: one validity buffer, plus a - * data buffer. - */ - static final int FIXED_WIDTH_BUFFER_COUNT = 2; - - /** - * All variable width vectors have a common number of buffers 3: a validity buffer, an - * offset buffer, and a data buffer. - */ - static final int VARIABLE_WIDTH_BUFFER_COUNT = 3; - - @Override - public Integer visit(Int type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Union type) { - switch (type.getMode()) { - case Dense: - // TODO: validate this - return 2; - case Sparse: - // type buffer - return 1; - default: - throw new UnsupportedOperationException( - "Unsupported Union Mode: " + type.getMode()); - } - } - - @Override - public Integer visit(Struct type) { - // validity buffer - return 1; - } - - @Override - public Integer visit(Timestamp type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(ArrowType.List type) { - // validity buffer + offset buffer - return 2; - } - - @Override - public Integer visit(ArrowType.ListView type) { - // validity buffer + offset buffer + size buffer - return 3; - } - - @Override - public Integer visit(ArrowType.LargeList type) { - // validity buffer + offset buffer - return 2; - } - - @Override - public Integer visit(ArrowType.LargeListView type) { - // validity buffer + offset buffer + size buffer - return 3; - } - - @Override - public Integer visit(FixedSizeList type) { - // validity buffer - return 1; - } - - @Override - public Integer visit(Map type) { - // validity buffer + offset buffer - return 2; - } - - @Override - public Integer visit(FloatingPoint type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Decimal type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(FixedSizeBinary type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Bool type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Binary type) { - return VARIABLE_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(BinaryView type) { - // NOTE: only consider the validity and view buffers - return 2; - } - - @Override - public Integer visit(Utf8 type) { - return VARIABLE_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Utf8View type) { - // NOTE: only consider the validity and view buffers - return 2; - } - - @Override - public Integer visit(LargeUtf8 type) { - return VARIABLE_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(LargeBinary type) { - return VARIABLE_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Null type) { - return 0; - } - - @Override - public Integer visit(Date type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Time type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Interval type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Duration type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(RunEndEncoded type) { - return 0; - } - }); - } - - private final List bufferLayouts; - - private final boolean isFixedBufferCount; - - /** - * Constructs a new {@link TypeLayout}. - * - * @param bufferLayouts the individual {@linkplain BufferLayout}s for the given type - * @param isFixedBufferCount whether the number of buffers is fixed - */ - public TypeLayout(List bufferLayouts, boolean isFixedBufferCount) { - super(); - this.bufferLayouts = Preconditions.checkNotNull(bufferLayouts); - this.isFixedBufferCount = isFixedBufferCount; - } - - public TypeLayout(List bufferLayouts) { - this(bufferLayouts, true); - } - - public TypeLayout(BufferLayout... bufferLayouts) { - this(asList(bufferLayouts), true); - } - - public TypeLayout(boolean isFixedBufferCount, BufferLayout... bufferLayouts) { - this(asList(bufferLayouts), isFixedBufferCount); - } - - /** Returns the individual {@linkplain BufferLayout}s for the given type. */ - public List getBufferLayouts() { - return bufferLayouts; - } - - /** - * Returns the types of each buffer for this layout. A layout can consist of multiple buffers for - * example a validity bitmap buffer, a value buffer or an offset buffer. - */ - public List getBufferTypes() { - List types = new ArrayList<>(bufferLayouts.size()); - for (BufferLayout vector : bufferLayouts) { - types.add(vector.getType()); - } - return types; - } - - /** - * Determines whether the buffer count is fixed for the given type. - * - * @return true if the buffer count is fixed, false otherwise - */ - public boolean isFixedBufferCount() { - return isFixedBufferCount; - } - - @Override - public String toString() { - return bufferLayouts.toString(); - } - - @Override - public int hashCode() { - return bufferLayouts.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof TypeLayout)) { - return false; - } - TypeLayout other = (TypeLayout) obj; - return bufferLayouts.equals(other.bufferLayouts); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java deleted file mode 100644 index 83f8d9eeb87d1..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.UInt1ReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableUInt1Holder; -import org.apache.arrow.vector.holders.UInt1Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * UInt1Vector implements a fixed width (1 bytes) vector of integer values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class UInt1Vector extends BaseFixedWidthVector - implements BaseIntVector, ValueIterableVector { - /** The mask to use when promoting the unsigned byte value to an integer. */ - public static final int PROMOTION_MASK = 0xFF; - - /** The maximum 8-bit unsigned integer. */ - public static final byte MAX_UINT1 = (byte) 0XFF; - - public static final byte TYPE_WIDTH = 1; - - public UInt1Vector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.UINT1.getType()), allocator); - } - - public UInt1Vector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Constructor for UInt1Vector. - * - * @param field Field type - * @param allocator Allocator type - */ - public UInt1Vector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new UInt1ReaderImpl(UInt1Vector.this); - } - - @Override - public MinorType getMinorType() { - return MinorType.UINT1; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    To avoid overflow, the returned type is one step up from the signed type. - * - *

    This method is mainly meant for integration tests. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static short getNoOverflow(final ArrowBuf buffer, final int index) { - byte b = buffer.getByte(index * TYPE_WIDTH); - return (short) (PROMOTION_MASK & b); - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public byte get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getByte(index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableUInt1Holder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getByte(index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Byte getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getByte(index * TYPE_WIDTH); - } - } - - /** - * Returns the value stored at index without the potential for overflow. - * - * @param index position of element - * @return element at given index - */ - public Short getObjectNoOverflow(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getNoOverflow(valueBuffer, index); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setByte(index * TYPE_WIDTH, value); - } - - private void setValue(int index, byte value) { - valueBuffer.setByte(index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, byte value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableUInt1Holder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, UInt1Holder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, byte)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, byte value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableUInt1Holder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableUInt1Holder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, UInt1Holder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, UInt1Holder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Sets the value at index to value isSet > 0, otherwise sets the index position to invalid/null. - */ - public void set(int index, int isSet, byte value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, byte)} but will reallocate the buffer if index is larger than - * current capacity. - */ - public void setSafe(int index, int isSet, byte value) { - handleSafe(index); - set(index, isSet, value); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((UInt1Vector) to); - } - - @Override - public void setWithPossibleTruncate(int index, long value) { - this.setSafe(index, (int) value); - } - - @Override - public void setUnsafeWithPossibleTruncate(int index, long value) { - this.set(index, (int) value); - } - - @Override - public long getValueAsLong(int index) { - return this.get(index) & PROMOTION_MASK; - } - - @Override - public String toString() { - return ValueVectorUtility.getToString( - this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i)); - } - - private class TransferImpl implements TransferPair { - UInt1Vector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new UInt1Vector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new UInt1Vector(field, allocator); - } - - public TransferImpl(UInt1Vector to) { - this.to = to; - } - - @Override - public UInt1Vector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, UInt1Vector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java deleted file mode 100644 index 35330f4f91b6a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.UInt2ReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableUInt2Holder; -import org.apache.arrow.vector.holders.UInt2Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * UInt2Vector implements a fixed width (2 bytes) vector of integer values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class UInt2Vector extends BaseFixedWidthVector - implements BaseIntVector, ValueIterableVector { - - /** The maximum 16-bit unsigned integer. */ - public static final char MAX_UINT2 = (char) 0XFFFF; - - public static final byte TYPE_WIDTH = 2; - - public UInt2Vector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.UINT2.getType()), allocator); - } - - public UInt2Vector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Constructor for UInt2Vector type. - * - * @param field Field type - * @param allocator Allocator type - */ - public UInt2Vector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new UInt2ReaderImpl(UInt2Vector.this); - } - - @Override - public MinorType getMinorType() { - return MinorType.UINT2; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    This method is mainly meant for integration tests. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static char get(final ArrowBuf buffer, final int index) { - return buffer.getChar((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public char get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getChar((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableUInt2Holder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getChar((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Character getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getChar((long) index * TYPE_WIDTH); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setChar((long) index * TYPE_WIDTH, value); - } - - private void setValue(int index, char value) { - valueBuffer.setChar((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, char value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableUInt2Holder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, UInt2Holder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, char)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, char value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableUInt2Holder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableUInt2Holder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, UInt2Holder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, UInt2Holder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Sets the given index to value is isSet is positive, otherwise sets the position as - * invalid/null. - */ - public void set(int index, int isSet, char value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, char)} but will reallocate the buffer if index is larger than - * current capacity. - */ - public void setSafe(int index, int isSet, char value) { - handleSafe(index); - set(index, isSet, value); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((UInt2Vector) to); - } - - @Override - public void setWithPossibleTruncate(int index, long value) { - this.setSafe(index, (int) value); - } - - @Override - public void setUnsafeWithPossibleTruncate(int index, long value) { - this.set(index, (int) value); - } - - @Override - public long getValueAsLong(int index) { - return this.get(index); - } - - @Override - public String toString() { - return ValueVectorUtility.getToString( - this, - 0, - getValueCount(), - (v, i) -> v.isNull(i) ? "null" : Integer.toString(v.get(i) & 0x0000ffff)); - } - - private class TransferImpl implements TransferPair { - UInt2Vector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new UInt2Vector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new UInt2Vector(field, allocator); - } - - public TransferImpl(UInt2Vector to) { - this.to = to; - } - - @Override - public UInt2Vector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, UInt2Vector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java deleted file mode 100644 index 22e023f6f22e6..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.UInt4ReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.holders.UInt4Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * UInt4Vector implements a fixed width (4 bytes) vector of integer values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class UInt4Vector extends BaseFixedWidthVector - implements BaseIntVector, ValueIterableVector { - - /** The mask to use when promoting the unsigned int value to a long int. */ - public static final long PROMOTION_MASK = 0x00000000FFFFFFFFL; - - /** The maximum 32-bit unsigned integer. */ - public static final int MAX_UINT4 = 0XFFFFFFFF; - - public static final byte TYPE_WIDTH = 4; - - public UInt4Vector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.UINT4.getType()), allocator); - } - - public UInt4Vector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Constructor for UInt4Vector. - * - * @param field Field type - * @param allocator Allocator type - */ - public UInt4Vector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new UInt4ReaderImpl(UInt4Vector.this); - } - - @Override - public MinorType getMinorType() { - return MinorType.UINT4; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    To avoid overflow, the returned type is one step up from the signed type. - * - *

    This method is mainly meant for integration tests. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static long getNoOverflow(final ArrowBuf buffer, final int index) { - long l = buffer.getInt((long) index * TYPE_WIDTH); - return PROMOTION_MASK & l; - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public int get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableUInt4Holder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Integer getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getInt((long) index * TYPE_WIDTH); - } - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - public Long getObjectNoOverflow(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getNoOverflow(valueBuffer, index); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, int value) { - valueBuffer.setInt((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, int value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableUInt4Holder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, UInt4Holder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, int)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, int value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableUInt4Holder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableUInt4Holder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, UInt4Holder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, UInt4Holder holder) { - handleSafe(index); - set(index, holder); - } - - /** - * Sets the value at index to value isSet > 0, otherwise sets the index position to invalid/null. - */ - public void set(int index, int isSet, int value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, int)} but will reallocate if the buffer if index is larger than - * the current capacity. - */ - public void setSafe(int index, int isSet, int value) { - handleSafe(index); - set(index, isSet, value); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((UInt4Vector) to); - } - - @Override - public void setWithPossibleTruncate(int index, long value) { - this.setSafe(index, (int) value); - } - - @Override - public void setUnsafeWithPossibleTruncate(int index, long value) { - this.set(index, (int) value); - } - - @Override - public long getValueAsLong(int index) { - return this.get(index) & PROMOTION_MASK; - } - - @Override - public String toString() { - return ValueVectorUtility.getToString( - this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i)); - } - - private class TransferImpl implements TransferPair { - UInt4Vector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new UInt4Vector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new UInt4Vector(field, allocator); - } - - public TransferImpl(UInt4Vector to) { - this.to = to; - } - - @Override - public UInt4Vector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, UInt4Vector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java deleted file mode 100644 index f026702bb46e4..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import java.math.BigInteger; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.impl.UInt8ReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableUInt8Holder; -import org.apache.arrow.vector.holders.UInt8Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * UInt8Vector implements a fixed width vector (8 bytes) of integer values which could be null. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class UInt8Vector extends BaseFixedWidthVector - implements BaseIntVector, ValueIterableVector { - - /** The maximum 64-bit unsigned long integer. */ - public static final long MAX_UINT8 = 0XFFFFFFFFFFFFFFFFL; - - public static final byte TYPE_WIDTH = 8; - - public UInt8Vector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.UINT8.getType()), allocator); - } - - public UInt8Vector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Constructor for UInt8Vector. - * - * @param field Field type - * @param allocator Allocator type. - */ - public UInt8Vector(Field field, BufferAllocator allocator) { - super(field, allocator, TYPE_WIDTH); - } - - @Override - protected FieldReader getReaderImpl() { - return new UInt8ReaderImpl(UInt8Vector.this); - } - - @Override - public MinorType getMinorType() { - return MinorType.UINT8; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - private static final BigInteger SAFE_CONVERSION_MASK = new BigInteger("ffffffffffffffff", 16); - - /** - * Given a data buffer, get the value stored at a particular position in the vector. - * - *

    To avoid overflow, the returned type is one step up from the signed type. - * - *

    This method is mainly meant for integration tests. - * - * @param buffer data buffer - * @param index position of the element. - * @return value stored at the index. - */ - public static BigInteger getNoOverflow(final ArrowBuf buffer, final int index) { - BigInteger l = BigInteger.valueOf(buffer.getLong((long) index * TYPE_WIDTH)); - return SAFE_CONVERSION_MASK.and(l); - } - - /** - * Get the element at the given index from the vector. - * - * @param index position of element - * @return element at given index - */ - public long get(int index) throws IllegalStateException { - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - throw new IllegalStateException("Value at index is null"); - } - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Get the element at the given index from the vector and sets the state in holder. If element at - * given index is null, holder.isSet will be zero. - * - * @param index position of element - */ - public void get(int index, NullableUInt8Holder holder) { - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH); - } - - /** - * Same as {@link #get(int)}. - * - * @param index position of element - * @return element at given index - */ - @Override - public Long getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return valueBuffer.getLong((long) index * TYPE_WIDTH); - } - } - - /** - * Returns the value stored at index without the potential for overflow. - * - * @param index position of element - * @return element at given index - */ - public BigInteger getObjectNoOverflow(int index) { - if (isSet(index) == 0) { - return null; - } else { - return getNoOverflow(valueBuffer, index); - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - private void setValue(int index, long value) { - valueBuffer.setLong((long) index * TYPE_WIDTH, value); - } - - /** - * Set the element at the given index to the given value. - * - * @param index position of element - * @param value value of element - */ - public void set(int index, long value) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, value); - } - - /** - * Set the element at the given index to the value set in data holder. If the value in holder is - * not indicated as set, element in the at the given index will be null. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void set(int index, NullableUInt8Holder holder) throws IllegalArgumentException { - if (holder.isSet < 0) { - throw new IllegalArgumentException(); - } else if (holder.isSet > 0) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Set the element at the given index to the value set in data holder. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void set(int index, UInt8Holder holder) { - BitVectorHelper.setBit(validityBuffer, index); - setValue(index, holder.value); - } - - /** - * Same as {@link #set(int, long)} except that it handles the case when index is greater than or - * equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param value value of element - */ - public void setSafe(int index, long value) { - handleSafe(index); - set(index, value); - } - - /** - * Same as {@link #set(int, NullableUInt8Holder)} except that it handles the case when index is - * greater than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder nullable data holder for value of element - */ - public void setSafe(int index, NullableUInt8Holder holder) throws IllegalArgumentException { - handleSafe(index); - set(index, holder); - } - - /** - * Same as {@link #set(int, UInt8Holder)} except that it handles the case when index is greater - * than or equal to existing value capacity {@link #getValueCapacity()}. - * - * @param index position of element - * @param holder data holder for value of element - */ - public void setSafe(int index, UInt8Holder holder) { - handleSafe(index); - set(index, holder); - } - - /** Sets value at index is isSet is positive otherwise sets the index to invalid/null. */ - public void set(int index, int isSet, long value) { - if (isSet > 0) { - set(index, value); - } else { - BitVectorHelper.unsetBit(validityBuffer, index); - } - } - - /** - * Same as {@link #set(int, int, long)} but will reallocate if index is greater than current - * capacity. - */ - public void setSafe(int index, int isSet, long value) { - handleSafe(index); - set(index, isSet, value); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param field Field object used by the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((UInt8Vector) to); - } - - @Override - public void setWithPossibleTruncate(int index, long value) { - this.setSafe(index, value); - } - - @Override - public void setUnsafeWithPossibleTruncate(int index, long value) { - this.set(index, value); - } - - @Override - public long getValueAsLong(int index) { - return this.get(index); - } - - @Override - public String toString() { - return ValueVectorUtility.getToString( - this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i)); - } - - private class TransferImpl implements TransferPair { - UInt8Vector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new UInt8Vector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new UInt8Vector(field, allocator); - } - - public TransferImpl(UInt8Vector to) { - this.to = to; - } - - @Override - public UInt8Vector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, UInt8Vector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueIterableVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueIterableVector.java deleted file mode 100644 index 8536af7eb839a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueIterableVector.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.Iterator; - -public interface ValueIterableVector extends ValueVector { - /** - * Get an Iterable that can be used to iterate over the values in the vector. - * - * @return an Iterable for the vector's values - */ - default Iterator getValueIterator() { - return new Iterator() { - private int index = 0; - - @Override - public boolean hasNext() { - return index < ValueIterableVector.this.getValueCount(); - } - - @Override - public T next() { - return (T) ValueIterableVector.this.getObject(index++); - } - }; - } - - /** - * Get an Iterator for the values in the vector. - * - * @return an Iterator for the values in the vector - */ - default Iterable getValueIterable() { - return this::getValueIterator; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java deleted file mode 100644 index 0a45409eb9860..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.io.Closeable; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * An abstraction that is used to store a sequence of values in an individual column. - * - *

    A {@link ValueVector value vector} stores underlying data in-memory in a columnar fashion that - * is compact and efficient. The column whose data is stored, is referred by {@link #getField()}. - * - *

    It is important that vector is allocated before attempting to read or write. - * - *

    There are a few "rules" around vectors: - * - *

      - *
    • values need to be written in order (e.g. index 0, 1, 2, 5) - *
    • null vectors start with all values as null before writing anything - *
    • for variable width types, the offset vector should be all zeros before writing - *
    • you must call setValueCount before a vector can be read - *
    • you should never write to a vector once it has been read. - *
    - * - *

    Please note that the current implementation doesn't enforce those rules, hence we may find few - * places that deviate from these rules (e.g. offset vectors in Variable Length and Repeated vector) - * - *

    This interface "should" strive to guarantee this order of operation: - * - *

    - * - * allocate > mutate > setvaluecount > access > clear (or allocate to start the process - * over). - * - *
    - */ -public interface ValueVector extends Closeable, Iterable { - /** - * Allocate new buffers. ValueVector implements logic to determine how much to allocate. - * - * @throws OutOfMemoryException Thrown if no memory can be allocated. - */ - void allocateNew() throws OutOfMemoryException; - - /** - * Allocates new buffers. ValueVector implements logic to determine how much to allocate. - * - * @return Returns true if allocation was successful. - */ - boolean allocateNewSafe(); - - /** - * Allocate new buffer with double capacity, and copy data into the new buffer. Replace vector's - * buffer with new buffer, and release old one - */ - void reAlloc(); - - /** - * Get the allocator associated with the vector. CAVEAT: Some ValueVector subclasses (e.g. - * NullVector) do not require an allocator for data storage and may return null. - * - * @return Returns nullable allocator. - */ - BufferAllocator getAllocator(); - - /** - * Set the initial record capacity. - * - * @param numRecords the initial record capacity. - */ - void setInitialCapacity(int numRecords); - - /** - * Returns the maximum number of values that can be stored in this vector instance. - * - * @return the maximum number of values that can be stored in this vector instance. - */ - int getValueCapacity(); - - /** Alternative to clear(). Allows use as an AutoCloseable in try-with-resources. */ - @Override - void close(); - - /** - * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the vector has - * any child vectors, they will also be cleared. - */ - void clear(); - - /** - * Reset the ValueVector to the initial state without releasing any owned ArrowBuf. Buffer - * capacities will remain unchanged and any previous data will be zeroed out. This includes - * buffers for data, validity, offset, etc. If the vector has any child vectors, they will also be - * reset. - */ - void reset(); - - /** - * Get information about how this field is materialized. - * - * @return the field corresponding to this vector - */ - Field getField(); - - MinorType getMinorType(); - - /** - * To transfer quota responsibility. - * - * @param allocator the target allocator - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - TransferPair getTransferPair(BufferAllocator allocator); - - /** - * To transfer quota responsibility. - * - * @param ref the name of the vector - * @param allocator the target allocator - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - TransferPair getTransferPair(String ref, BufferAllocator allocator); - - /** - * To transfer quota responsibility. - * - * @param field the Field object used by the target vector - * @param allocator the target allocator - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - TransferPair getTransferPair(Field field, BufferAllocator allocator); - - /** - * To transfer quota responsibility. - * - * @param ref the name of the vector - * @param allocator the target allocator - * @param callBack A schema change callback. - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack); - - /** - * To transfer quota responsibility. - * - * @param field the Field object used by the target vector - * @param allocator the target allocator - * @param callBack A schema change callback. - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack); - - /** - * Makes a new transfer pair used to transfer underlying buffers. - * - * @param target the target for the transfer - * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to - * transfer underlying buffers into the target vector. - */ - TransferPair makeTransferPair(ValueVector target); - - /** - * Get a reader for this vector. - * - * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports - * reading values from this vector. - */ - FieldReader getReader(); - - /** - * Get the number of bytes used by this vector. - * - * @return the number of bytes that is used by this vector instance. - */ - int getBufferSize(); - - /** - * Returns the number of bytes that is used by this vector if it holds the given number of values. - * The result will be the same as if setValueCount() were called, followed by calling - * getBufferSize(), but without any of the closing side-effects that setValueCount() implies wrt - * finishing off the population of a vector. Some operations might wish to use this to determine - * how much memory has been used by a vector so far, even though it is not finished being - * populated. - * - * @param valueCount the number of values to assume this vector contains - * @return the buffer size if this vector is holding valueCount values - */ - int getBufferSizeFor(int valueCount); - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer so it only should be used for in-context access. Also note - * that this buffer changes regularly thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning; the buffers will still be refcounted; - * but the returned array will be the only reference to them - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - ArrowBuf[] getBuffers(boolean clear); - - /** - * Gets the underlying buffer associated with validity vector. - * - * @return buffer - */ - ArrowBuf getValidityBuffer(); - - /** - * Gets the underlying buffer associated with data vector. - * - * @return buffer - */ - ArrowBuf getDataBuffer(); - - /** - * Gets the underlying buffer associated with offset vector. - * - * @return buffer - */ - ArrowBuf getOffsetBuffer(); - - /** - * Gets the number of values. - * - * @return number of values in the vector - */ - int getValueCount(); - - /** Set number of values in the vector. */ - void setValueCount(int valueCount); - - /** - * Get friendly type object from the vector. - * - * @param index index of object to get - * @return friendly type object - */ - Object getObject(int index); - - /** - * Returns number of null elements in the vector. - * - * @return number of null elements - */ - int getNullCount(); - - /** - * Check whether an element in the vector is null. - * - * @param index index to check for null - * @return true if element is null - */ - boolean isNull(int index); - - /** Returns hashCode of element in index with the default hasher. */ - int hashCode(int index); - - /** Returns hashCode of element in index with the given hasher. */ - int hashCode(int index, ArrowBufHasher hasher); - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - void copyFrom(int fromIndex, int thisIndex, ValueVector from); - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param fromIndex position to copy from in source vector - * @param thisIndex position to copy to in this vector - * @param from source vector - */ - void copyFromSafe(int fromIndex, int thisIndex, ValueVector from); - - /** - * Accept a generic {@link VectorVisitor} and return the result. - * - * @param the output result type. - * @param the input data together with visitor. - */ - OUT accept(VectorVisitor visitor, IN value); - - /** - * Gets the name of the vector. - * - * @return the name of the vector. - */ - String getName(); - - default void validate() { - ValueVectorUtility.validate(this); - } - - default void validateFull() { - ValueVectorUtility.validateFull(this); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java deleted file mode 100644 index 7196e9c910c13..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReusableBuffer; -import org.apache.arrow.vector.complex.impl.VarBinaryReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableVarBinaryHolder; -import org.apache.arrow.vector.holders.VarBinaryHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * VarBinaryVector implements a variable width vector of binary values which could be NULL. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class VarBinaryVector extends BaseVariableWidthVector - implements ValueIterableVector { - - /** - * Instantiate a VarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public VarBinaryVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.VARBINARY.getType()), allocator); - } - - /** - * Instantiate a VarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public VarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a VarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public VarBinaryVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new VarBinaryReaderImpl(VarBinaryVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.VARBINARY; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the variable length element at specified index as byte array. - * - * @param index position of element to get - * @return array of bytes for non-null element, null otherwise - */ - public byte[] get(int index) { - assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - final int startOffset = getStartOffset(index); - final int dataLength = getEndOffset(index) - startOffset; - final byte[] result = new byte[dataLength]; - valueBuffer.getBytes(startOffset, result, 0, dataLength); - return result; - } - - /** - * Read the value at the given position to the given output buffer. The caller is responsible for - * checking for nullity first. - * - * @param index position of element. - * @param buffer the buffer to write into. - */ - @Override - public void read(int index, ReusableBuffer buffer) { - final int startOffset = getStartOffset(index); - final int dataLength = getEndOffset(index) - startOffset; - buffer.set(valueBuffer, startOffset, dataLength); - } - - /** - * Get the variable length element at specified index as Text. - * - * @param index position of element to get - * @return byte array for non-null element, null otherwise - */ - @Override - public byte[] getObject(int index) { - return get(index); - } - - /** - * Get the variable length element at specified index and sets the state in provided holder. - * - * @param index position of element to get - * @param holder data holder to be populated by this function - */ - public void get(int index, NullableVarBinaryHolder holder) { - assert index >= 0; - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.start = getStartOffset(index); - holder.end = getEndOffset(index); - holder.buffer = valueBuffer; - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, VarBinaryHolder holder) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int dataLength = holder.end - holder.start; - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - lastSet = index; - } - - /** - * Same as {@link #set(int, VarBinaryHolder)} except that it handles the case where index and - * length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, VarBinaryHolder holder) { - assert index >= 0; - final int dataLength = holder.end - holder.start; - handleSafe(index, dataLength); - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, NullableVarBinaryHolder holder) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - final int startOffset = getStartOffset(index); - if (holder.isSet != 0) { - final int dataLength = holder.end - holder.start; - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - } else { - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset); - } - lastSet = index; - } - - /** - * Same as {@link #set(int, NullableVarBinaryHolder)} except that it handles the case where index - * and length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, NullableVarBinaryHolder holder) { - assert index >= 0; - if (holder.isSet != 0) { - final int dataLength = holder.end - holder.start; - handleSafe(index, dataLength); - fillHoles(index); - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - } else { - fillEmpties(index + 1); - } - BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - lastSet = index; - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((VarBinaryVector) to); - } - - private class TransferImpl implements TransferPair { - VarBinaryVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new VarBinaryVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new VarBinaryVector(field, allocator); - } - - public TransferImpl(VarBinaryVector to) { - this.to = to; - } - - @Override - public VarBinaryVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, VarBinaryVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java deleted file mode 100644 index c81e34558c6f6..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReusableBuffer; -import org.apache.arrow.vector.complex.impl.VarCharReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableVarCharHolder; -import org.apache.arrow.vector.holders.VarCharHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.validate.ValidateUtil; - -/** - * VarCharVector implements a variable width vector of VARCHAR values which could be NULL. A - * validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class VarCharVector extends BaseVariableWidthVector - implements ValueIterableVector { - - /** - * Instantiate a VarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public VarCharVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.VARCHAR.getType()), allocator); - } - - /** - * Instantiate a VarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public VarCharVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a VarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public VarCharVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new VarCharReaderImpl(VarCharVector.this); - } - - /** - * Get minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.VARCHAR; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the variable length element at specified index as byte array. - * - * @param index position of element to get - * @return array of bytes for non-null element, null otherwise - */ - public byte[] get(int index) { - assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - final int startOffset = getStartOffset(index); - final int dataLength = getEndOffset(index) - startOffset; - final byte[] result = new byte[dataLength]; - valueBuffer.getBytes(startOffset, result, 0, dataLength); - return result; - } - - /** - * Get the variable length element at specified index as Text. - * - * @param index position of element to get - * @return Text object for non-null element, null otherwise - */ - @Override - public Text getObject(int index) { - assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - - final Text result = new Text(); - read(index, result); - return result; - } - - /** - * Read the value at the given position to the given output buffer. The caller is responsible for - * checking for nullity first. - * - * @param index position of element. - * @param buffer the buffer to write into. - */ - @Override - public void read(int index, ReusableBuffer buffer) { - final int startOffset = getStartOffset(index); - final int dataLength = getEndOffset(index) - startOffset; - buffer.set(valueBuffer, startOffset, dataLength); - } - - /** - * Get the variable length element at specified index and sets the state in provided holder. - * - * @param index position of element to get - * @param holder data holder to be populated by this function - */ - public void get(int index, NullableVarCharHolder holder) { - assert index >= 0; - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - holder.start = getStartOffset(index); - holder.end = getEndOffset(index); - holder.buffer = valueBuffer; - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, VarCharHolder holder) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setBit(validityBuffer, index); - final int dataLength = holder.end - holder.start; - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - lastSet = index; - } - - /** - * Same as {@link #set(int, VarCharHolder)} except that it handles the case where index and length - * of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, VarCharHolder holder) { - assert index >= 0; - final int dataLength = holder.end - holder.start; - handleSafe(index, dataLength); - fillHoles(index); - - BitVectorHelper.setBit(validityBuffer, index); - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, NullableVarCharHolder holder) { - assert index >= 0; - fillHoles(index); - BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - final int startOffset = getStartOffset(index); - if (holder.isSet != 0) { - final int dataLength = holder.end - holder.start; - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - } else { - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset); - } - lastSet = index; - } - - /** - * Same as {@link #set(int, NullableVarCharHolder)} except that it handles the case where index - * and length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, NullableVarCharHolder holder) { - assert index >= 0; - if (holder.isSet != 0) { - final int dataLength = holder.end - holder.start; - handleSafe(index, dataLength); - fillHoles(index); - final int startOffset = getStartOffset(index); - offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength); - valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength); - } else { - fillEmpties(index + 1); - } - BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet); - lastSet = index; - } - - /** - * Set the variable length element at the specified index to the content in supplied Text. - * - * @param index position of the element to set - * @param text Text object with data - */ - public void set(int index, Text text) { - set(index, text.getBytes(), 0, (int) text.getLength()); - } - - /** - * Same as {@link #set(int, NullableVarCharHolder)} except that it handles the case where index - * and length of new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set. - * @param text Text object with data - */ - public void setSafe(int index, Text text) { - setSafe(index, text.getBytes(), 0, (int) text.getLength()); - } - - @Override - public void validateScalars() { - for (int i = 0; i < getValueCount(); ++i) { - byte[] value = get(i); - if (value != null) { - ValidateUtil.validateOrThrow( - Text.validateUTF8NoThrow(value), - "Non-UTF-8 data in VarCharVector at position " + i + "."); - } - } - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising of this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((VarCharVector) to); - } - - private class TransferImpl implements TransferPair { - VarCharVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new VarCharVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new VarCharVector(field, allocator); - } - - public TransferImpl(VarCharVector to) { - this.to = to; - } - - @Override - public VarCharVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, VarCharVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java deleted file mode 100644 index fc3c8384610a8..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.nio.ByteBuffer; -import org.apache.arrow.memory.ReusableBuffer; - -/** A base interface for common functionalities in variable width vectors. */ -public interface VariableWidthFieldVector - extends VariableWidthVector, FieldVector, VectorDefinitionSetter { - - /** - * Set the variable length element at the specified index to the supplied byte array. - * - * @param index position of the element to set - * @param value array of bytes with data - */ - void set(int index, byte[] value); - - /** - * Set the variable length element at the specified index to the supplied byte array. - * - * @param index position of the element to set - * @param value array of bytes with data - * @param start start position in the array - * @param length length of the data to write - */ - void set(int index, byte[] value, int start, int length); - - /** - * Set the variable length element at the specified index to the supplied ByteBuffer. - * - * @param index position of the element to set - * @param value ByteBuffer with data - * @param start start position in the ByteBuffer - * @param length length of the data to write - */ - void set(int index, ByteBuffer value, int start, int length); - - /** - * Set the variable length element at the specified index to the supplied byte array, and it - * handles the case where index and length of a new element are beyond the existing capacity of - * the vector. - * - * @param index position of the element to set - * @param value array of bytes to write - */ - void setSafe(int index, byte[] value); - - /** - * Set the variable length element at the specified index to the supplied byte array, and it - * handles the case where index and length of a new element are beyond the existing capacity. - * - * @param index position of the element to set - * @param value array of bytes with data - * @param start start position in the array - * @param length length of the data to write - */ - void setSafe(int index, byte[] value, int start, int length); - - /** - * Set the variable length element at the specified index to the supplied ByteBuffer, and it - * handles the case where index and length of a new element are beyond the existing capacity. - * - * @param index position of the element to set - * @param value ByteBuffer with data - * @param start start position in the ByteBuffer - * @param length length of the data to write - */ - void setSafe(int index, ByteBuffer value, int start, int length); - - /** - * Get the variable length element at the specified index. - * - * @param index position of the element to get - * @return byte array with the data - */ - byte[] get(int index); - - /** - * Get the variable length element at the specified index using a ReusableBuffer. - * - * @param index position of the element to get - * @param buffer ReusableBuffer to write the data to - */ - void read(int index, ReusableBuffer buffer); - - /** - * Get the index of the last non-null element in the vector. - * - * @return index of the last non-null element - */ - int getLastSet(); - - /** - * Set the index of the last non-null element in the vector. - * - * @param value desired index of last non-null element - */ - void setLastSet(int value); - - /** - * Get the variable length element at specified index as Text. - * - * @param index position of an element to get - * @return greater than length 0 for a non-null element, 0 otherwise - */ - int getValueLength(int index); - - /** - * Create holes in the vector upto the given index (exclusive). Holes will be created from the - * current last-set position in the vector. - * - * @param index target index - */ - void fillEmpties(int index); - - /** - * Sets the value length for an element. - * - * @param index position of the element to set - * @param length length of the element - */ - void setValueLengthSafe(int index, int length); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java deleted file mode 100644 index f95d6de24142d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -/** Interface vectors that contain variable width members (e.g. Strings, Lists, etc). */ -public interface VariableWidthVector extends ElementAddressableVector, DensityAwareVector { - - /** - * Allocate a new memory space for this vector. Must be called prior to using the ValueVector. - * - * @param totalBytes Desired size of the underlying data buffer. - * @param valueCount Number of values in the vector. - */ - void allocateNew(long totalBytes, int valueCount); - - /** - * Allocate a new memory space for this vector. Must be called prior to using the ValueVector. The - * initial size in bytes is either default (or) reused from previous allocation - * - * @param valueCount Number of values in the vector. - */ - void allocateNew(int valueCount); - - /** - * Provide the maximum amount of variable width bytes that can be stored in this vector. - * - * @return the byte capacity of this vector - */ - int getByteCapacity(); - - /** - * Provide the number of bytes contained in the valueBuffer. - * - * @return the number of bytes in valueBuffer. - */ - int sizeOfValueBuffer(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java deleted file mode 100644 index e8830327b2ada..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -/** Interface for setting a specific index values as defined/valid on a vector. */ -public interface VectorDefinitionSetter { - - void setIndexDefined(int index); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java deleted file mode 100644 index ecd3fb91241b1..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.util.Preconditions.checkArgument; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Field; - -/** Loads buffers into vectors. */ -public class VectorLoader { - - private final VectorSchemaRoot root; - - private final CompressionCodec.Factory factory; - - /** - * A flag indicating if decompression is needed. This will affect the behavior of releasing - * buffers. - */ - private boolean decompressionNeeded; - - /** - * Construct with a root to load and will create children in root based on schema. - * - * @param root the root to add vectors to based on schema - */ - public VectorLoader(VectorSchemaRoot root) { - this(root, CompressionCodec.Factory.INSTANCE); - } - - /** - * Construct with a root to load and will create children in root based on schema. - * - * @param root the root to add vectors to based on schema. - * @param factory the factory to create codec. - */ - public VectorLoader(VectorSchemaRoot root, CompressionCodec.Factory factory) { - this.root = root; - this.factory = factory; - } - - /** - * Loads the record batch in the vectors. will not close the record batch - * - * @param recordBatch the batch to load - */ - public void load(ArrowRecordBatch recordBatch) { - Iterator buffers = recordBatch.getBuffers().iterator(); - Iterator nodes = recordBatch.getNodes().iterator(); - CompressionUtil.CodecType codecType = - CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec()); - decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; - CompressionCodec codec = - decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; - Iterator variadicBufferCounts = Collections.emptyIterator(); - ; - if (recordBatch.getVariadicBufferCounts() != null - && !recordBatch.getVariadicBufferCounts().isEmpty()) { - variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); - } - - for (FieldVector fieldVector : root.getFieldVectors()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); - } - root.setRowCount(recordBatch.getLength()); - if (nodes.hasNext() || buffers.hasNext() || variadicBufferCounts.hasNext()) { - throw new IllegalArgumentException( - "not all nodes, buffers and variadicBufferCounts were consumed. nodes: " - + Collections2.toString(nodes) - + " buffers: " - + Collections2.toString(buffers) - + " variadicBufferCounts: " - + Collections2.toString(variadicBufferCounts)); - } - } - - private void loadBuffers( - FieldVector vector, - Field field, - Iterator buffers, - Iterator nodes, - CompressionCodec codec, - Iterator variadicBufferCounts) { - checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); - ArrowFieldNode fieldNode = nodes.next(); - // variadicBufferLayoutCount will be 0 for vectors of a type except BaseVariableWidthViewVector - long variadicBufferLayoutCount = 0; - if (vector instanceof BaseVariableWidthViewVector) { - if (variadicBufferCounts.hasNext()) { - variadicBufferLayoutCount = variadicBufferCounts.next(); - } else { - throw new IllegalStateException( - "No variadicBufferCounts available for BaseVariableWidthViewVector"); - } - } - int bufferLayoutCount = - (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); - List ownBuffers = new ArrayList<>(bufferLayoutCount); - for (int j = 0; j < bufferLayoutCount; j++) { - ArrowBuf nextBuf = buffers.next(); - // for vectors without nulls, the buffer is empty, so there is no need to decompress it. - ArrowBuf bufferToAdd = - nextBuf.writerIndex() > 0 ? codec.decompress(vector.getAllocator(), nextBuf) : nextBuf; - ownBuffers.add(bufferToAdd); - if (decompressionNeeded) { - // decompression performed - nextBuf.getReferenceManager().retain(); - } - } - try { - vector.loadFieldBuffers(fieldNode, ownBuffers); - if (decompressionNeeded) { - for (ArrowBuf buf : ownBuffers) { - buf.close(); - } - } - } catch (RuntimeException e) { - throw new IllegalArgumentException( - "Could not load buffers for field " + field + ". error message: " + e.getMessage(), e); - } - List children = field.getChildren(); - if (children.size() > 0) { - List childrenFromFields = vector.getChildrenFromFields(); - checkArgument( - children.size() == childrenFromFields.size(), - "should have as many children as in the schema: found %s expected %s", - childrenFromFields.size(), - children.size()); - for (int i = 0; i < childrenFromFields.size(); i++) { - Field child = children.get(i); - FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); - } - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java deleted file mode 100644 index a7cb9ced72141..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compare.ApproxEqualsVisitor; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.compare.VectorValueEqualizer; -import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Holder for a set of vectors to be loaded/unloaded. A VectorSchemaRoot is a container that can - * hold batches, batches flow through VectorSchemaRoot as part of a pipeline. Note this is different - * from other implementations (i.e. in C++ and Python, a RecordBatch is a collection of equal-length - * vector instances and was created each time for a new batch). - * - *

    The recommended usage for VectorSchemaRoot is creating a single VectorSchemaRoot based on the - * known schema and populated data over and over into the same VectorSchemaRoot in a stream of - * batches rather than create a new VectorSchemaRoot instance each time (see Flight or - * ArrowFileWriter for better understanding). Thus at any one point a VectorSchemaRoot may have data - * or may have no data (say it was transferred downstream or not yet populated). - */ -public class VectorSchemaRoot implements AutoCloseable { - - private Schema schema; - private int rowCount; - private final List fieldVectors; - private final Map fieldVectorsMap = new LinkedHashMap<>(); - - /** Constructs new instance containing each of the vectors. */ - public VectorSchemaRoot(Iterable vectors) { - this( - StreamSupport.stream(vectors.spliterator(), false) - .map(t -> t.getField()) - .collect(Collectors.toList()), - StreamSupport.stream(vectors.spliterator(), false).collect(Collectors.toList())); - } - - /** Constructs a new instance containing the children of parent but not the parent itself. */ - public VectorSchemaRoot(FieldVector parent) { - this(parent.getField().getChildren(), parent.getChildrenFromFields(), parent.getValueCount()); - } - - /** - * Constructs a new instance. - * - * @param fields The types of each vector. - * @param fieldVectors The data vectors (must be equal in size to fields. - */ - public VectorSchemaRoot(List fields, List fieldVectors) { - this( - new Schema(fields), - fieldVectors, - fieldVectors.size() == 0 ? 0 : fieldVectors.get(0).getValueCount()); - } - - /** - * Constructs a new instance. - * - * @param fields The types of each vector. - * @param fieldVectors The data vectors (must be equal in size to fields. - * @param rowCount The number of rows contained. - */ - public VectorSchemaRoot(List fields, List fieldVectors, int rowCount) { - this(new Schema(fields), fieldVectors, rowCount); - } - - /** - * Constructs a new instance. - * - * @param schema The schema for the vectors. - * @param fieldVectors The data vectors. - * @param rowCount The number of rows - */ - public VectorSchemaRoot(Schema schema, List fieldVectors, int rowCount) { - if (schema.getFields().size() != fieldVectors.size()) { - throw new IllegalArgumentException( - "Fields must match field vectors. Found " - + fieldVectors.size() - + " vectors and " - + schema.getFields().size() - + " fields"); - } - this.schema = schema; - this.rowCount = rowCount; - this.fieldVectors = fieldVectors; - for (int i = 0; i < schema.getFields().size(); ++i) { - Field field = schema.getFields().get(i); - FieldVector vector = fieldVectors.get(i); - fieldVectorsMap.put(field, vector); - } - } - - /** Creates a new set of empty vectors corresponding to the given schema. */ - public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) { - List fieldVectors = new ArrayList<>(schema.getFields().size()); - for (Field field : schema.getFields()) { - FieldVector vector = field.createVector(allocator); - fieldVectors.add(vector); - } - if (fieldVectors.size() != schema.getFields().size()) { - throw new IllegalArgumentException( - "The root vector did not create the right number of children. found " - + fieldVectors.size() - + " expected " - + schema.getFields().size()); - } - return new VectorSchemaRoot(schema, fieldVectors, 0); - } - - /** Constructs a new instance from vectors. */ - public static VectorSchemaRoot of(FieldVector... vectors) { - return new VectorSchemaRoot(Arrays.stream(vectors).collect(Collectors.toList())); - } - - /** - * Do an adaptive allocation of each vector for memory purposes. Sizes will be based on previously - * defined initial allocation for each vector (and subsequent size learned). - */ - public void allocateNew() { - for (FieldVector v : fieldVectors) { - v.allocateNew(); - } - rowCount = 0; - } - - /** - * Release all the memory for each vector held in this root. This DOES NOT remove vectors from the - * container. - */ - public void clear() { - for (FieldVector v : fieldVectors) { - v.clear(); - } - rowCount = 0; - } - - public List getFieldVectors() { - return Collections.unmodifiableList(fieldVectors); - } - - /** - * gets a vector by name. - * - *

    if name occurs multiple times this returns the first inserted entry for name - */ - public FieldVector getVector(String name) { - for (Map.Entry entry : fieldVectorsMap.entrySet()) { - if (entry.getKey().getName().equals(name)) { - return entry.getValue(); - } - } - return null; - } - - public FieldVector getVector(Field field) { - return fieldVectorsMap.get(field); - } - - public FieldVector getVector(int index) { - Preconditions.checkArgument(index >= 0 && index < fieldVectors.size()); - return fieldVectors.get(index); - } - - /** - * Add vector to the record batch, producing a new VectorSchemaRoot. - * - * @param index field index - * @param vector vector to be added. - * @return out VectorSchemaRoot with vector added - */ - public VectorSchemaRoot addVector(int index, FieldVector vector) { - Preconditions.checkNotNull(vector); - Preconditions.checkArgument(index >= 0 && index < fieldVectors.size()); - List newVectors = new ArrayList<>(); - for (int i = 0; i < fieldVectors.size(); i++) { - if (i == index) { - newVectors.add(vector); - } - newVectors.add(fieldVectors.get(i)); - } - return new VectorSchemaRoot(newVectors); - } - - /** - * Remove vector from the record batch, producing a new VectorSchemaRoot. - * - * @param index field index - * @return out VectorSchemaRoot with vector removed - */ - public VectorSchemaRoot removeVector(int index) { - Preconditions.checkArgument(index >= 0 && index < fieldVectors.size()); - List newVectors = new ArrayList<>(); - for (int i = 0; i < fieldVectors.size(); i++) { - if (i != index) { - newVectors.add(fieldVectors.get(i)); - } - } - return new VectorSchemaRoot(newVectors); - } - - public Schema getSchema() { - return schema; - } - - public int getRowCount() { - return rowCount; - } - - /** - * Set the row count of all the vectors in this container. Also sets the value count for each root - * level contained FieldVector. - * - * @param rowCount Number of records. - */ - public void setRowCount(int rowCount) { - this.rowCount = rowCount; - for (FieldVector v : fieldVectors) { - v.setValueCount(rowCount); - } - } - - @Override - public void close() { - try { - AutoCloseables.close(fieldVectors); - } catch (RuntimeException ex) { - throw ex; - } catch (Exception ex) { - // should never happen since FieldVector.close() doesn't throw IOException - throw new RuntimeException(ex); - } - } - - private void printRow(StringBuilder sb, List row) { - boolean first = true; - for (Object v : row) { - if (first) { - first = false; - } else { - sb.append("\t"); - } - sb.append(v); - } - sb.append("\n"); - } - - /** Returns a tab separated value of vectors (based on their java object representation). */ - public String contentToTSVString() { - StringBuilder sb = new StringBuilder(); - List row = new ArrayList<>(schema.getFields().size()); - for (Field field : schema.getFields()) { - row.add(field.getName()); - } - printRow(sb, row); - for (int i = 0; i < rowCount; i++) { - row.clear(); - for (FieldVector v : fieldVectors) { - row.add(v.getObject(i)); - } - printRow(sb, row); - } - return sb.toString(); - } - - /** - * Synchronizes the schema from the current vectors. In some cases, the schema and the actual - * vector structure may be different. This can be caused by a promoted writer (For details, please - * see {@link org.apache.arrow.vector.complex.impl.PromotableWriter}). For example, when writing - * different types of data to a {@link org.apache.arrow.vector.complex.ListVector} may lead to - * such a case. When this happens, this method should be called to bring the schema and vector - * structure in a synchronized state. - * - * @return true if the schema is updated, false otherwise. - */ - public boolean syncSchema() { - List oldFields = this.schema.getFields(); - List newFields = - this.fieldVectors.stream().map(ValueVector::getField).collect(Collectors.toList()); - if (!oldFields.equals(newFields)) { - this.schema = new Schema(newFields); - return true; - } - return false; - } - - /** - * Slice this root from desired index. - * - * @param index start position of the slice - * @return the sliced root - */ - public VectorSchemaRoot slice(int index) { - return slice(index, this.rowCount - index); - } - - /** - * Slice this root at desired index and length. - * - * @param index start position of the slice - * @param length length of the slice - * @return the sliced root - */ - public VectorSchemaRoot slice(int index, int length) { - Preconditions.checkArgument(index >= 0, "expecting non-negative index"); - Preconditions.checkArgument(length >= 0, "expecting non-negative length"); - Preconditions.checkArgument(index + length <= rowCount, "index + length should <= rowCount"); - - List sliceVectors = - fieldVectors.stream() - .map( - v -> { - TransferPair transferPair = v.getTransferPair(v.getAllocator()); - transferPair.splitAndTransfer(index, length); - return (FieldVector) transferPair.getTo(); - }) - .collect(Collectors.toList()); - - return new VectorSchemaRoot(sliceVectors); - } - - /** Determine if two VectorSchemaRoots are exactly equal. */ - public boolean equals(VectorSchemaRoot other) { - if (other == null) { - return false; - } - - if (!this.schema.equals(other.schema)) { - return false; - } - - if (this.rowCount != other.rowCount) { - return false; - } - - for (int i = 0; i < fieldVectors.size(); i++) { - FieldVector vector = fieldVectors.get(i); - FieldVector otherVector = other.fieldVectors.get(i); - if (!VectorEqualsVisitor.vectorEquals(vector, otherVector)) { - return false; - } - } - - return true; - } - - /** - * Determine if two VectorSchemaRoots are approximately equal using the given functions to - * calculate difference between float/double values. Note that approx equals are in regards to - * floating point values, other values are comparing to exactly equals. - * - * @param floatDiffFunction function to calculate difference between float values. - * @param doubleDiffFunction function to calculate difference between double values. - */ - public boolean approxEquals( - VectorSchemaRoot other, - VectorValueEqualizer floatDiffFunction, - VectorValueEqualizer doubleDiffFunction) { - - Preconditions.checkNotNull(floatDiffFunction); - Preconditions.checkNotNull(doubleDiffFunction); - - if (other == null) { - return false; - } - - if (!this.schema.equals(other.schema)) { - return false; - } - - if (this.rowCount != other.rowCount) { - return false; - } - - Range range = new Range(0, 0, 0); - for (int i = 0; i < fieldVectors.size(); i++) { - FieldVector vector = fieldVectors.get(i); - FieldVector otherVector = other.fieldVectors.get(i); - if (vector.getValueCount() != otherVector.getValueCount()) { - return false; - } - ApproxEqualsVisitor visitor = - new ApproxEqualsVisitor(vector, otherVector, floatDiffFunction, doubleDiffFunction); - range.setLength(vector.getValueCount()); - if (!visitor.rangeEquals(range)) { - return false; - } - } - - return true; - } - - /** - * Determine if two VectorSchemaRoots are approximately equal using default functions to calculate - * difference between float/double values. - */ - public boolean approxEquals(VectorSchemaRoot other) { - VectorValueEqualizer floatDiffFunction = - new ValueEpsilonEqualizers.Float4EpsilonEqualizer( - ApproxEqualsVisitor.DEFAULT_FLOAT_EPSILON); - VectorValueEqualizer doubleDiffFunction = - new ValueEpsilonEqualizers.Float8EpsilonEqualizer( - ApproxEqualsVisitor.DEFAULT_DOUBLE_EPSILON); - return approxEquals(other, floatDiffFunction, doubleDiffFunction); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java deleted file mode 100644 index 342f210b827df..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; - -/** - * Helper class that handles converting a {@link VectorSchemaRoot} to a {@link ArrowRecordBatch}. - */ -public class VectorUnloader { - - private final VectorSchemaRoot root; - private final boolean includeNullCount; - private final CompressionCodec codec; - private final boolean alignBuffers; - - /** Constructs a new instance of the given set of vectors. */ - public VectorUnloader(VectorSchemaRoot root) { - this(root, true, NoCompressionCodec.INSTANCE, true); - } - - /** - * Constructs a new instance. - * - * @param root The set of vectors to serialize to an {@link ArrowRecordBatch}. - * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} - * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. - */ - public VectorUnloader(VectorSchemaRoot root, boolean includeNullCount, boolean alignBuffers) { - this(root, includeNullCount, NoCompressionCodec.INSTANCE, alignBuffers); - } - - /** - * Constructs a new instance. - * - * @param root The set of vectors to serialize to an {@link ArrowRecordBatch}. - * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch} - * @param codec the codec for compressing data. If it is null, then no compression is needed. - * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries. - */ - public VectorUnloader( - VectorSchemaRoot root, - boolean includeNullCount, - CompressionCodec codec, - boolean alignBuffers) { - this.root = root; - this.includeNullCount = includeNullCount; - this.codec = codec == null ? NoCompressionCodec.INSTANCE : codec; - this.alignBuffers = alignBuffers; - } - - /** - * Performs the depth first traversal of the Vectors to create an {@link ArrowRecordBatch} - * suitable for serialization. - */ - public ArrowRecordBatch getRecordBatch() { - List nodes = new ArrayList<>(); - List buffers = new ArrayList<>(); - List variadicBufferCounts = new ArrayList<>(); - for (FieldVector vector : root.getFieldVectors()) { - appendNodes(vector, nodes, buffers, variadicBufferCounts); - } - // Do NOT retain buffers in ArrowRecordBatch constructor since we have already retained them. - return new ArrowRecordBatch( - root.getRowCount(), - nodes, - buffers, - CompressionUtil.createBodyCompression(codec), - variadicBufferCounts, - alignBuffers, /*retainBuffers*/ - false); - } - - private long getVariadicBufferCount(FieldVector vector) { - if (vector instanceof BaseVariableWidthViewVector) { - return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); - } - return 0L; - } - - private void appendNodes( - FieldVector vector, - List nodes, - List buffers, - List variadicBufferCounts) { - nodes.add( - new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); - List fieldBuffers = vector.getFieldBuffers(); - long variadicBufferCount = getVariadicBufferCount(vector); - int expectedBufferCount = - (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); - // only update variadicBufferCounts for vectors that have variadic buffers - if (vector instanceof BaseVariableWidthViewVector) { - variadicBufferCounts.add(variadicBufferCount); - } - if (fieldBuffers.size() != expectedBufferCount) { - throw new IllegalArgumentException( - String.format( - "wrong number of buffers for field %s in vector %s. found: %s", - vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); - } - for (ArrowBuf buf : fieldBuffers) { - // If the codec is NoCompressionCodec, then it will return the input buffer unchanged. In that - // case, - // we need to retain it for ArrowRecordBatch. Otherwise, it will return a new buffer, and also - // close - // the input buffer. In that case, we need to retain the input buffer still to avoid modifying - // the source VectorSchemaRoot. - buf.getReferenceManager().retain(); - buffers.add(codec.compress(vector.getAllocator(), buf)); - } - for (FieldVector child : vector.getChildrenFromFields()) { - appendNodes(child, nodes, buffers, variadicBufferCounts); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java deleted file mode 100644 index 80d6952e00674..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReusableBuffer; -import org.apache.arrow.vector.complex.impl.ViewVarBinaryReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableViewVarBinaryHolder; -import org.apache.arrow.vector.holders.ViewVarBinaryHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * ViewVarBinaryVector implements a variable width view vector of binary values which could be NULL. - * A validity buffer (bit vector) is maintained to track which elements in the vector are null. - */ -public final class ViewVarBinaryVector extends BaseVariableWidthViewVector - implements ValueIterableVector { - - /** - * Instantiate a ViewVarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public ViewVarBinaryVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.VIEWVARBINARY.getType()), allocator); - } - - /** - * Instantiate a ViewVarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public ViewVarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a ViewVarBinaryVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public ViewVarBinaryVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new ViewVarBinaryReaderImpl(ViewVarBinaryVector.this); - } - - /** - * Get a minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.VIEWVARBINARY; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the variable length element at specified index as a byte array. - * - * @param index position of an element to get - * @return array of bytes for a non-null element, null otherwise - */ - public byte[] get(int index) { - assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - return getData(index); - } - - /** - * Read the value at the given position to the given output buffer. The caller is responsible for - * checking for nullity first. - * - * @param index position of an element. - * @param buffer the buffer to write into. - */ - @Override - public void read(int index, ReusableBuffer buffer) { - getData(index, buffer); - } - - /** - * Get the variable length element at a specified index as a byte array. - * - * @param index position of an element to get - * @return byte array for a non-null element, null otherwise - */ - @Override - public byte[] getObject(int index) { - return get(index); - } - - /** - * Get the variable length element at specified index and sets the state in provided holder. - * - * @param index position of an element to get - * @param holder data holder to be populated by this function - */ - public void get(int index, NullableViewVarBinaryHolder holder) { - final int dataLength = getValueLength(index); - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - if (dataLength > INLINE_SIZE) { - // data is in the data buffer - // get buffer index - final int bufferIndex = - viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); - // get data offset - final int dataOffset = - viewBuffer.getInt( - ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); - holder.buffer = dataBuffers.get(bufferIndex); - holder.start = dataOffset; - holder.end = dataOffset + dataLength; - } else { - final long dataOffset = ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH; - // data is in the value buffer - holder.buffer = viewBuffer; - holder.start = (int) dataOffset; - holder.end = (int) dataOffset + dataLength; - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, ViewVarBinaryHolder holder) { - int start = holder.start; - int length = holder.end - start; - setBytes(index, holder.buffer, start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, ViewVarBinaryHolder)} except that it handles the case where index and - * length of a new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, ViewVarBinaryHolder holder) { - int length = holder.end - holder.start; - handleSafe(index, length); - set(index, holder); - } - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, NullableViewVarBinaryHolder holder) { - if (holder.isSet == 0) { - setNull(index); - } else { - BitVectorHelper.setBit(validityBuffer, index); - int start = holder.start; - int length = holder.end - start; - setBytes(index, holder.buffer, start, length); - } - lastSet = index; - } - - /** - * Same as {@link #set(int, NullableViewVarBinaryHolder)} except that it handles the case where - * index and length of a new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, NullableViewVarBinaryHolder holder) { - int length = holder.end - holder.start; - handleSafe(index, length); - set(index, holder); - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to target vector - * @return {@link TransferPair} - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((ViewVarBinaryVector) to); - } - - private class TransferImpl implements TransferPair { - ViewVarBinaryVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new ViewVarBinaryVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new ViewVarBinaryVector(field, allocator); - } - - public TransferImpl(ViewVarBinaryVector to) { - this.to = to; - } - - @Override - public ViewVarBinaryVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, ViewVarBinaryVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java deleted file mode 100644 index dc474b68e36cd..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReusableBuffer; -import org.apache.arrow.vector.complex.impl.ViewVarCharReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.NullableViewVarCharHolder; -import org.apache.arrow.vector.holders.ViewVarCharHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.apache.arrow.vector.validate.ValidateUtil; - -/** - * ViewVarCharVector implements a view of a variable width vector of VARCHAR values which could be - * NULL. A validity buffer (bit vector) is maintained to track which elements in the vector are - * null. A viewBuffer keeps track of all values in the vector, and an external data buffer is kept - * to keep longer strings (>12). - */ -public final class ViewVarCharVector extends BaseVariableWidthViewVector - implements ValueIterableVector { - - /** - * Instantiate a ViewVarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param allocator allocator for memory management. - */ - public ViewVarCharVector(String name, BufferAllocator allocator) { - this(name, FieldType.nullable(MinorType.VIEWVARCHAR.getType()), allocator); - } - - /** - * Instantiate a ViewVarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector - * @param allocator allocator for memory management. - */ - public ViewVarCharVector(String name, FieldType fieldType, BufferAllocator allocator) { - this(new Field(name, fieldType, null), allocator); - } - - /** - * Instantiate a ViewVarCharVector. This doesn't allocate any memory for the data in vector. - * - * @param field field materialized by this vector - * @param allocator allocator for memory management. - */ - public ViewVarCharVector(Field field, BufferAllocator allocator) { - super(field, allocator); - } - - @Override - protected FieldReader getReaderImpl() { - return new ViewVarCharReaderImpl(ViewVarCharVector.this); - } - - /** - * Get a minor type for this vector. The vector holds values belonging to a particular type. - * - * @return {@link org.apache.arrow.vector.types.Types.MinorType} - */ - @Override - public MinorType getMinorType() { - return MinorType.VIEWVARCHAR; - } - - /*----------------------------------------------------------------* - | | - | vector value retrieval methods | - | | - *----------------------------------------------------------------*/ - - /** - * Get the variable length element at specified index as a byte array. - * - * @param index position of an element to get - * @return array of bytes for a non-null element, null otherwise - */ - public byte[] get(int index) { - assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - return getData(index); - } - - /** - * Get the variable length element at specified index as Text. - * - * @param index position of an element to get - * @return Text object for a non-null element, null otherwise - */ - @Override - public Text getObject(int index) { - assert index >= 0; - if (NULL_CHECKING_ENABLED && isSet(index) == 0) { - return null; - } - - final Text result = new Text(); - read(index, result); - return result; - } - - /** - * Read the value at the given position to the given output buffer. The caller is responsible for - * checking for nullity first. - * - * @param index position of an element. - * @param buffer the buffer to write into. - */ - @Override - public void read(int index, ReusableBuffer buffer) { - getData(index, buffer); - } - - /** - * Get the variable length element at specified index and sets the state in provided holder. - * - * @param index position of an element to get - * @param holder data holder to be populated by this function - */ - public void get(int index, NullableViewVarCharHolder holder) { - final int dataLength = getValueLength(index); - if (isSet(index) == 0) { - holder.isSet = 0; - return; - } - holder.isSet = 1; - if (dataLength > INLINE_SIZE) { - // data is in the data buffer - // get buffer index - final int bufferIndex = - viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); - // get data offset - final int dataOffset = - viewBuffer.getInt( - ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); - holder.buffer = dataBuffers.get(bufferIndex); - holder.start = dataOffset; - holder.end = dataOffset + dataLength; - } else { - final long dataOffset = ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH; - // data is in the value buffer - holder.buffer = viewBuffer; - holder.start = (int) dataOffset; - holder.end = (int) dataOffset + dataLength; - } - } - - /*----------------------------------------------------------------* - | | - | vector value setter methods | - | | - *----------------------------------------------------------------*/ - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, ViewVarCharHolder holder) { - int start = holder.start; - int length = holder.end - start; - setBytes(index, holder.buffer, start, length); - lastSet = index; - } - - /** - * Same as {@link #set(int, ViewVarCharHolder)} except that it handles the case where index and - * length of a new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, ViewVarCharHolder holder) { - int length = holder.end - holder.start; - handleSafe(index, length); - set(index, holder); - } - - /** - * Set the variable length element at the specified index to the data buffer supplied in the - * holder. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void set(int index, NullableViewVarCharHolder holder) { - if (holder.isSet == 0) { - setNull(index); - } else { - BitVectorHelper.setBit(validityBuffer, index); - int start = holder.start; - int length = holder.end - start; - setBytes(index, holder.buffer, start, length); - } - lastSet = index; - } - - /** - * Same as {@link #set(int, NullableViewVarCharHolder)} except that it handles the case where - * index and length of a new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set - * @param holder holder that carries data buffer. - */ - public void setSafe(int index, NullableViewVarCharHolder holder) { - int length = holder.end - holder.start; - handleSafe(index, length); - set(index, holder); - } - - /** - * Set the variable length element at the specified index to the content in supplied Text. - * - * @param index position of the element to set - * @param text Text object with data - */ - public void set(int index, Text text) { - set(index, text.getBytes(), 0, (int) text.getLength()); - } - - /** - * Same as {@link #set(int, NullableViewVarCharHolder)} except that it handles the case where - * index and length of a new element are beyond the existing capacity of the vector. - * - * @param index position of the element to set. - * @param text Text object with data - */ - public void setSafe(int index, Text text) { - setSafe(index, text.getBytes(), 0, (int) text.getLength()); - } - - @Override - public void validateScalars() { - for (int i = 0; i < getValueCount(); ++i) { - byte[] value = get(i); - if (value != null) { - ValidateUtil.validateOrThrow( - Text.validateUTF8NoThrow(value), - "Non-UTF-8 data in VarCharVector at position " + i + "."); - } - } - } - - /*----------------------------------------------------------------* - | | - | vector transfer | - | | - *----------------------------------------------------------------*/ - - /** - * Construct a TransferPair comprising this and a target vector of the same type. - * - * @param ref name of the target vector - * @param allocator allocator for the target vector - * @return {@link TransferPair} (UnsupportedOperationException) - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new TransferImpl(ref, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param field The field materialized by this vector. - * @param allocator allocator for the target vector - * @return {@link TransferPair} (UnsupportedOperationException) - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator); - } - - /** - * Construct a TransferPair with a desired target vector of the same type. - * - * @param to the target for the transfer - * @return {@link TransferPair} (UnsupportedOperationException) - */ - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new TransferImpl((ViewVarCharVector) to); - } - - private class TransferImpl implements TransferPair { - ViewVarCharVector to; - - public TransferImpl(String ref, BufferAllocator allocator) { - to = new ViewVarCharVector(ref, field.getFieldType(), allocator); - } - - public TransferImpl(Field field, BufferAllocator allocator) { - to = new ViewVarCharVector(field, allocator); - } - - public TransferImpl(ViewVarCharVector to) { - this.to = to; - } - - @Override - public ViewVarCharVector getTo() { - return to; - } - - @Override - public void transfer() { - transferTo(to); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - splitAndTransferTo(startIndex, length, to); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - to.copyFromSafe(fromIndex, toIndex, ViewVarCharVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java deleted file mode 100644 index c838de60d841a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.TransferPair; - -/** A zero length vector of any type. */ -public final class ZeroVector extends NullVector { - public static final ZeroVector INSTANCE = new ZeroVector(); - - /** - * Instantiate a ZeroVector. - * - * @param name name of the vector - */ - public ZeroVector(String name) { - super(name); - } - - /** - * Instantiate a ZeroVector. - * - * @param name name of the vector - * @param fieldType type of Field materialized by this vector. - */ - public ZeroVector(String name, FieldType fieldType) { - super(name, fieldType); - } - - /** - * Instantiate a ZeroVector. - * - * @param field field materialized by this vector. - */ - public ZeroVector(Field field) { - super(field); - } - - @Deprecated - public ZeroVector() {} - - @Override - public int getValueCount() { - return 0; - } - - @Override - public void setValueCount(int valueCount) {} - - @Override - public int getNullCount() { - return 0; - } - - @Override - public boolean isNull(int index) { - throw new IndexOutOfBoundsException(); - } - - @Override - public int hashCode(int index) { - return 0; - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return ArrowBufPointer.NULL_HASH_CODE; - } - - @Override - public int getValueCapacity() { - return 0; - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return defaultPair; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return defaultPair; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return defaultPair; - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return defaultPair; - } - - private final TransferPair defaultPair = - new TransferPair() { - @Override - public void transfer() {} - - @Override - public void splitAndTransfer(int startIndex, int length) {} - - @Override - public ValueVector getTo() { - return ZeroVector.this; - } - - @Override - public void copyValueSafe(int from, int to) {} - }; -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java deleted file mode 100644 index a5adec66fd04e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -import java.util.function.BiFunction; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers; - -/** Visitor to compare floating point vectors approximately. */ -public class ApproxEqualsVisitor extends RangeEqualsVisitor { - - /** Functions to calculate difference between float/double values. */ - private final VectorValueEqualizer floatDiffFunction; - - private final VectorValueEqualizer doubleDiffFunction; - - /** Default epsilons for diff functions. */ - public static final float DEFAULT_FLOAT_EPSILON = 1.0E-6f; - - public static final double DEFAULT_DOUBLE_EPSILON = 1.0E-6; - - /** - * Constructs a new instance with default tolerances. - * - * @param left left vector - * @param right right vector - */ - public ApproxEqualsVisitor(ValueVector left, ValueVector right) { - this(left, right, DEFAULT_FLOAT_EPSILON, DEFAULT_DOUBLE_EPSILON); - } - - /** - * Constructs a new instance. - * - * @param left left vector - * @param right right vector - * @param floatEpsilon difference for float values - * @param doubleEpsilon difference for double values - */ - public ApproxEqualsVisitor( - ValueVector left, ValueVector right, float floatEpsilon, double doubleEpsilon) { - this( - left, - right, - new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon), - new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon)); - } - - /** Constructs a new instance. */ - public ApproxEqualsVisitor( - ValueVector left, - ValueVector right, - VectorValueEqualizer floatDiffFunction, - VectorValueEqualizer doubleDiffFunction) { - this(left, right, floatDiffFunction, doubleDiffFunction, DEFAULT_TYPE_COMPARATOR); - } - - /** - * Constructs a new instance. - * - * @param left the left vector. - * @param right the right vector. - * @param floatDiffFunction the equalizer for float values. - * @param doubleDiffFunction the equalizer for double values. - * @param typeComparator type comparator to compare vector type. - */ - public ApproxEqualsVisitor( - ValueVector left, - ValueVector right, - VectorValueEqualizer floatDiffFunction, - VectorValueEqualizer doubleDiffFunction, - BiFunction typeComparator) { - super(left, right, typeComparator); - this.floatDiffFunction = floatDiffFunction; - this.doubleDiffFunction = doubleDiffFunction; - } - - @Override - public Boolean visit(BaseFixedWidthVector left, Range range) { - if (left instanceof Float4Vector) { - if (!validate(left)) { - return false; - } - return float4ApproxEquals(range); - } else if (left instanceof Float8Vector) { - if (!validate(left)) { - return false; - } - return float8ApproxEquals(range); - } else { - return super.visit(left, range); - } - } - - @Override - protected ApproxEqualsVisitor createInnerVisitor( - ValueVector left, - ValueVector right, - BiFunction typeComparator) { - return new ApproxEqualsVisitor( - left, right, floatDiffFunction.clone(), doubleDiffFunction.clone(), typeComparator); - } - - private boolean float4ApproxEquals(Range range) { - Float4Vector leftVector = (Float4Vector) getLeft(); - Float4Vector rightVector = (Float4Vector) getRight(); - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - if (!floatDiffFunction.valuesEqual(leftVector, leftIndex, rightVector, rightIndex)) { - return false; - } - } - return true; - } - - private boolean float8ApproxEquals(Range range) { - Float8Vector leftVector = (Float8Vector) getLeft(); - Float8Vector rightVector = (Float8Vector) getRight(); - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - if (!doubleDiffFunction.valuesEqual(leftVector, leftIndex, rightVector, rightIndex)) { - return false; - } - } - return true; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java deleted file mode 100644 index 27bb04c8a5ea4..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -/** Wrapper for the parameters of comparing a range of values in two vectors. */ -public class Range { - - /** Start position in the left vector. */ - private int leftStart = -1; - - /** Start position in the right vector. */ - private int rightStart = -1; - - /** Length of the range. */ - private int length = -1; - - /** Constructs a new instance. */ - public Range() {} - - /** - * Constructs a new instance. - * - * @param leftStart start index in left vector - * @param rightStart start index in right vector - * @param length length of range - */ - public Range(int leftStart, int rightStart, int length) { - this.leftStart = leftStart; - this.rightStart = rightStart; - this.length = length; - } - - public int getLeftStart() { - return leftStart; - } - - public int getRightStart() { - return rightStart; - } - - public int getLength() { - return length; - } - - public Range setLeftStart(int leftStart) { - this.leftStart = leftStart; - return this; - } - - public Range setRightStart(int rightStart) { - this.rightStart = rightStart; - return this; - } - - public Range setLength(int length) { - this.length = length; - return this; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java deleted file mode 100644 index abcf312c5ecfc..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ /dev/null @@ -1,921 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import java.util.List; -import java.util.function.BiFunction; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; - -/** Visitor to compare a range of values for vectors. */ -public class RangeEqualsVisitor implements VectorVisitor { - - private ValueVector left; - private ValueVector right; - - private BiFunction typeComparator; - private boolean typeCompareResult; - - /** Default type comparator. */ - public static final BiFunction DEFAULT_TYPE_COMPARATOR = - (v1, v2) -> new TypeEqualsVisitor(v2).equals(v1); - - /** - * Constructs a new instance with default type comparator. - * - * @param left left vector - * @param right right vector - */ - public RangeEqualsVisitor(ValueVector left, ValueVector right) { - this(left, right, DEFAULT_TYPE_COMPARATOR); - } - - /** - * Constructs a new instance. - * - * @param left left vector - * @param right right vector - * @param typeComparator type comparator to compare vector type. - */ - public RangeEqualsVisitor( - ValueVector left, - ValueVector right, - BiFunction typeComparator) { - this.left = left; - this.right = right; - this.typeComparator = typeComparator; - - Preconditions.checkArgument(left != null, "left vector cannot be null"); - Preconditions.checkArgument(right != null, "right vector cannot be null"); - - // type usually checks only once unless the left vector is changed. - checkType(); - } - - private void checkType() { - if (typeComparator == null || left == right) { - typeCompareResult = true; - } else { - typeCompareResult = typeComparator.apply(left, right); - } - } - - /** Validate the passed left vector, if it is changed, reset and check type. */ - protected boolean validate(ValueVector left) { - if (left != this.left) { - this.left = left; - checkType(); - } - return typeCompareResult; - } - - /** Check range equals. */ - public boolean rangeEquals(Range range) { - if (!typeCompareResult) { - return false; - } - - Preconditions.checkArgument( - range.getLeftStart() >= 0, "leftStart %s must be non negative.", range.getLeftStart()); - Preconditions.checkArgument( - range.getRightStart() >= 0, "rightStart %s must be non negative.", range.getRightStart()); - - Preconditions.checkArgument( - range.getRightStart() + range.getLength() <= right.getValueCount(), - "(rightStart + length) %s out of range[0, %s].", - range.getRightStart() + range.getLength(), - right.getValueCount()); - Preconditions.checkArgument( - range.getLeftStart() + range.getLength() <= left.getValueCount(), - "(leftStart + length) %s out of range[0, %s].", - range.getLeftStart() + range.getLength(), - left.getValueCount()); - - return left.accept(this, range); - } - - public ValueVector getLeft() { - return left; - } - - public ValueVector getRight() { - return right; - } - - @Override - public Boolean visit(BaseFixedWidthVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareBaseFixedWidthVectors(range); - } - - @Override - public Boolean visit(BaseVariableWidthVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareBaseVariableWidthVectors(range); - } - - @Override - public Boolean visit(BaseLargeVariableWidthVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareBaseLargeVariableWidthVectors(range); - } - - @Override - public Boolean visit(BaseVariableWidthViewVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareBaseVariableWidthViewVectors(range); - } - - @Override - public Boolean visit(ListVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareListVectors(range); - } - - @Override - public Boolean visit(FixedSizeListVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareFixedSizeListVectors(range); - } - - @Override - public Boolean visit(LargeListVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareLargeListVectors(range); - } - - @Override - public Boolean visit(NonNullableStructVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareStructVectors(range); - } - - @Override - public Boolean visit(UnionVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareUnionVectors(range); - } - - @Override - public Boolean visit(DenseUnionVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareDenseUnionVectors(range); - } - - @Override - public Boolean visit(NullVector left, Range range) { - if (!validate(left)) { - return false; - } - return true; - } - - @Override - public Boolean visit(RunEndEncodedVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareRunEndEncodedVectors(range); - } - - @Override - public Boolean visit(ExtensionTypeVector left, Range range) { - if (!(right instanceof ExtensionTypeVector) || !validate(left)) { - return false; - } - ValueVector rightUnderlying = ((ExtensionTypeVector) right).getUnderlyingVector(); - TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightUnderlying); - RangeEqualsVisitor underlyingVisitor = - createInnerVisitor( - left.getUnderlyingVector(), rightUnderlying, (l, r) -> typeVisitor.equals(l)); - return underlyingVisitor.rangeEquals(range); - } - - @Override - public Boolean visit(ListViewVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareListViewVectors(range); - } - - @Override - public Boolean visit(LargeListViewVector left, Range range) { - if (!validate(left)) { - return false; - } - return compareLargeListViewVectors(range); - } - - protected boolean compareRunEndEncodedVectors(Range range) { - RunEndEncodedVector leftVector = (RunEndEncodedVector) left; - RunEndEncodedVector rightVector = (RunEndEncodedVector) right; - - final int leftRangeEnd = range.getLeftStart() + range.getLength(); - final int rightRangeEnd = range.getRightStart() + range.getLength(); - - FieldVector leftValuesVector = leftVector.getValuesVector(); - FieldVector rightValuesVector = rightVector.getValuesVector(); - - RangeEqualsVisitor innerVisitor = createInnerVisitor(leftValuesVector, rightValuesVector, null); - - int leftLogicalIndex = range.getLeftStart(); - int rightLogicalIndex = range.getRightStart(); - - while (leftLogicalIndex < leftRangeEnd) { - // TODO: implement it more efficient - // https://github.com/apache/arrow/issues/44157 - int leftPhysicalIndex = leftVector.getPhysicalIndex(leftLogicalIndex); - int rightPhysicalIndex = rightVector.getPhysicalIndex(rightLogicalIndex); - if (leftValuesVector.accept( - innerVisitor, new Range(leftPhysicalIndex, rightPhysicalIndex, 1))) { - int leftRunEnd = leftVector.getRunEnd(leftLogicalIndex); - int rightRunEnd = rightVector.getRunEnd(rightLogicalIndex); - - int leftRunLength = Math.min(leftRunEnd, leftRangeEnd) - leftLogicalIndex; - int rightRunLength = Math.min(rightRunEnd, rightRangeEnd) - rightLogicalIndex; - - if (leftRunLength != rightRunLength) { - return false; - } else { - leftLogicalIndex = leftRunEnd; - rightLogicalIndex = rightRunEnd; - } - } else { - return false; - } - } - - return true; - } - - protected RangeEqualsVisitor createInnerVisitor( - ValueVector leftInner, - ValueVector rightInner, - BiFunction typeComparator) { - return new RangeEqualsVisitor(leftInner, rightInner, typeComparator); - } - - protected boolean compareUnionVectors(Range range) { - UnionVector leftVector = (UnionVector) left; - UnionVector rightVector = (UnionVector) right; - - Range subRange = new Range(0, 0, 1); - for (int i = 0; i < range.getLength(); i++) { - subRange.setLeftStart(range.getLeftStart() + i).setRightStart(range.getRightStart() + i); - ValueVector leftSubVector = leftVector.getVector(range.getLeftStart() + i); - ValueVector rightSubVector = rightVector.getVector(range.getRightStart() + i); - - if (leftSubVector == null || rightSubVector == null) { - if (leftSubVector == rightSubVector) { - continue; - } else { - return false; - } - } - TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightSubVector); - RangeEqualsVisitor visitor = - createInnerVisitor( - leftSubVector, rightSubVector, (left, right) -> typeVisitor.equals(left)); - if (!visitor.rangeEquals(subRange)) { - return false; - } - } - return true; - } - - protected boolean compareDenseUnionVectors(Range range) { - DenseUnionVector leftVector = (DenseUnionVector) left; - DenseUnionVector rightVector = (DenseUnionVector) right; - - Range subRange = new Range(0, 0, 1); - for (int i = 0; i < range.getLength(); i++) { - boolean isLeftNull = leftVector.isNull(range.getLeftStart() + i); - boolean isRightNull = rightVector.isNull(range.getRightStart() + i); - - // compare nullabilities - if (isLeftNull || isRightNull) { - if (isLeftNull != isRightNull) { - // exactly one slot is null, unequal - return false; - } else { - // both slots are null, pass this iteration - continue; - } - } - - // compare type ids - byte leftTypeId = leftVector.getTypeId(range.getLeftStart() + i); - byte rightTypeId = rightVector.getTypeId(range.getRightStart() + i); - - if (leftTypeId != rightTypeId) { - return false; - } - - ValueVector leftSubVector = leftVector.getVectorByType(leftTypeId); - ValueVector rightSubVector = rightVector.getVectorByType(rightTypeId); - - if (leftSubVector == null || rightSubVector == null) { - if (leftSubVector != rightSubVector) { - // exactly one of the sub-vectors is null, unequal - return false; - } else { - // both sub-vectors are null, pass this iteration - continue; - } - } - - // compare values - int leftOffset = leftVector.getOffset(range.getLeftStart() + i); - int rightOffset = rightVector.getOffset(range.getRightStart() + i); - subRange.setLeftStart(leftOffset).setRightStart(rightOffset); - TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightSubVector); - RangeEqualsVisitor visitor = - createInnerVisitor( - leftSubVector, rightSubVector, (left, right) -> typeVisitor.equals(left)); - if (!visitor.rangeEquals(subRange)) { - return false; - } - } - return true; - } - - private boolean compareStructVectorsInternal( - NonNullableStructVector leftVector, NonNullableStructVector rightVector, Range range) { - List leftChildNames = leftVector.getChildFieldNames(); - for (String name : leftChildNames) { - RangeEqualsVisitor visitor = - createInnerVisitor( - leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null); - if (!visitor.rangeEquals(range)) { - return false; - } - } - return true; - } - - protected boolean compareStructVectors(Range range) { - NonNullableStructVector leftVector = (NonNullableStructVector) left; - NonNullableStructVector rightVector = (NonNullableStructVector) right; - - List leftChildNames = leftVector.getChildFieldNames(); - if (!leftChildNames.equals(rightVector.getChildFieldNames())) { - return false; - } - - if (!(leftVector instanceof StructVector || rightVector instanceof StructVector)) { - // neither struct vector is nullable - return compareStructVectorsInternal(leftVector, rightVector, range); - } - - Range subRange = new Range(0, 0, 0); - boolean lastIsNull = true; - int lastNullIndex = -1; - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - boolean isLeftNull = leftVector.isNull(leftIndex); - boolean isRightNull = rightVector.isNull(rightIndex); - - if (isLeftNull != isRightNull) { - // exactly one slot is null, unequal - return false; - } - if (isLeftNull) { - // slots are null - if (!lastIsNull) { - subRange - .setLeftStart(range.getLeftStart() + lastNullIndex + 1) - .setRightStart(range.getRightStart() + lastNullIndex + 1) - .setLength(i - (lastNullIndex + 1)); - if (!compareStructVectorsInternal(leftVector, rightVector, subRange)) { - return false; - } - } - lastIsNull = true; - lastNullIndex = i; - } else { - // slots are not null - lastIsNull = false; - } - } - if (!lastIsNull) { - subRange - .setLeftStart(range.getLeftStart() + lastNullIndex + 1) - .setRightStart(range.getRightStart() + lastNullIndex + 1) - .setLength(range.getLength() - (lastNullIndex + 1)); - return compareStructVectorsInternal(leftVector, rightVector, subRange); - } - return true; - } - - protected boolean compareBaseFixedWidthVectors(Range range) { - BaseFixedWidthVector leftVector = (BaseFixedWidthVector) left; - BaseFixedWidthVector rightVector = (BaseFixedWidthVector) right; - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - int typeWidth = leftVector.getTypeWidth(); - if (!isNull) { - if (!(leftVector instanceof BitVector)) { - int startIndexLeft = typeWidth * leftIndex; - int endIndexLeft = typeWidth * (leftIndex + 1); - - int startIndexRight = typeWidth * rightIndex; - int endIndexRight = typeWidth * (rightIndex + 1); - - int ret = - ByteFunctionHelpers.equal( - leftVector.getDataBuffer(), - startIndexLeft, - endIndexLeft, - rightVector.getDataBuffer(), - startIndexRight, - endIndexRight); - - if (ret == 0) { - return false; - } - } else { - boolean ret = - ((BitVector) leftVector).get(leftIndex) == ((BitVector) rightVector).get(rightIndex); - if (!ret) { - return false; - } - } - } - } - return true; - } - - protected boolean compareBaseVariableWidthVectors(Range range) { - BaseVariableWidthVector leftVector = (BaseVariableWidthVector) left; - BaseVariableWidthVector rightVector = (BaseVariableWidthVector) right; - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - int offsetWidth = BaseVariableWidthVector.OFFSET_WIDTH; - - if (!isNull) { - final int startIndexLeft = leftVector.getOffsetBuffer().getInt(leftIndex * offsetWidth); - final int endIndexLeft = leftVector.getOffsetBuffer().getInt((leftIndex + 1) * offsetWidth); - - final int startIndexRight = rightVector.getOffsetBuffer().getInt(rightIndex * offsetWidth); - final int endIndexRight = - rightVector.getOffsetBuffer().getInt((rightIndex + 1) * offsetWidth); - - int ret = - ByteFunctionHelpers.equal( - leftVector.getDataBuffer(), - startIndexLeft, - endIndexLeft, - rightVector.getDataBuffer(), - startIndexRight, - endIndexRight); - - if (ret == 0) { - return false; - } - } - } - return true; - } - - protected boolean compareBaseLargeVariableWidthVectors(Range range) { - BaseLargeVariableWidthVector leftVector = (BaseLargeVariableWidthVector) left; - BaseLargeVariableWidthVector rightVector = (BaseLargeVariableWidthVector) right; - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - int offsetWidth = BaseLargeVariableWidthVector.OFFSET_WIDTH; - - if (!isNull) { - final long startIndexLeft = - leftVector.getOffsetBuffer().getLong((long) leftIndex * offsetWidth); - final long endIndexLeft = - leftVector.getOffsetBuffer().getLong((long) (leftIndex + 1) * offsetWidth); - - final long startIndexRight = - rightVector.getOffsetBuffer().getLong((long) rightIndex * offsetWidth); - final long endIndexRight = - rightVector.getOffsetBuffer().getLong((long) (rightIndex + 1) * offsetWidth); - - int ret = - ByteFunctionHelpers.equal( - leftVector.getDataBuffer(), - startIndexLeft, - endIndexLeft, - rightVector.getDataBuffer(), - startIndexRight, - endIndexRight); - - if (ret == 0) { - return false; - } - } - } - return true; - } - - protected boolean compareBaseVariableWidthViewVectors(Range range) { - BaseVariableWidthViewVector leftVector = (BaseVariableWidthViewVector) left; - BaseVariableWidthViewVector rightVector = (BaseVariableWidthViewVector) right; - - final ArrowBuf leftViewBuffer = leftVector.getDataBuffer(); - final ArrowBuf rightViewBuffer = rightVector.getDataBuffer(); - - final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE; - final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH; - final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH; - final int bufIndexWidth = BaseVariableWidthViewVector.BUF_INDEX_WIDTH; - - List leftDataBuffers = leftVector.getDataBuffers(); - List rightDataBuffers = rightVector.getDataBuffers(); - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - if (isNull) { - continue; - } - - int startLeftByteOffset = leftIndex * elementSize; - - int startRightByteOffset = rightIndex * elementSize; - - int leftDataBufferValueLength = leftVector.getValueLength(leftIndex); - int rightDataBufferValueLength = rightVector.getValueLength(rightIndex); - - if (leftDataBufferValueLength != rightDataBufferValueLength) { - return false; - } - - if (leftDataBufferValueLength > BaseVariableWidthViewVector.INLINE_SIZE) { - // if the value is stored in the dataBuffers - int leftDataBufferIndex = - leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth); - int rightDataBufferIndex = - rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth); - - final int leftDataOffset = - leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth + bufIndexWidth); - final int rightDataOffset = - rightViewBuffer.getInt( - startRightByteOffset + lengthWidth + prefixWidth + bufIndexWidth); - - ArrowBuf leftDataBuffer = leftDataBuffers.get(leftDataBufferIndex); - ArrowBuf rightDataBuffer = rightDataBuffers.get(rightDataBufferIndex); - - // check equality in the considered string stored in the dataBuffers - int retDataBuf = - ByteFunctionHelpers.equal( - leftDataBuffer, - leftDataOffset, - leftDataOffset + leftDataBufferValueLength, - rightDataBuffer, - rightDataOffset, - rightDataOffset + rightDataBufferValueLength); - - if (retDataBuf == 0) { - return false; - } - } else { - // if the value is stored in the view - final int leftDataOffset = startLeftByteOffset + lengthWidth; - final int rightDataOffset = startRightByteOffset + lengthWidth; - - // check equality in the considered string stored in the view - int retDataBuf = - ByteFunctionHelpers.equal( - leftViewBuffer, - leftDataOffset, - leftDataOffset + leftDataBufferValueLength, - rightViewBuffer, - rightDataOffset, - rightDataOffset + rightDataBufferValueLength); - - if (retDataBuf == 0) { - return false; - } - } - } - return true; - } - - protected boolean compareListVectors(Range range) { - ListVector leftVector = (ListVector) left; - ListVector rightVector = (ListVector) right; - - RangeEqualsVisitor innerVisitor = - createInnerVisitor( - leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); - Range innerRange = new Range(); - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - int offsetWidth = BaseRepeatedValueVector.OFFSET_WIDTH; - - if (!isNull) { - final int startIndexLeft = leftVector.getOffsetBuffer().getInt(leftIndex * offsetWidth); - final int endIndexLeft = leftVector.getOffsetBuffer().getInt((leftIndex + 1) * offsetWidth); - - final int startIndexRight = rightVector.getOffsetBuffer().getInt(rightIndex * offsetWidth); - final int endIndexRight = - rightVector.getOffsetBuffer().getInt((rightIndex + 1) * offsetWidth); - - if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) { - return false; - } - - innerRange = - innerRange - .setRightStart(startIndexRight) - .setLeftStart(startIndexLeft) - .setLength(endIndexLeft - startIndexLeft); - if (!innerVisitor.rangeEquals(innerRange)) { - return false; - } - } - } - return true; - } - - protected boolean compareFixedSizeListVectors(Range range) { - FixedSizeListVector leftVector = (FixedSizeListVector) left; - FixedSizeListVector rightVector = (FixedSizeListVector) right; - - if (leftVector.getListSize() != rightVector.getListSize()) { - return false; - } - - int listSize = leftVector.getListSize(); - RangeEqualsVisitor innerVisitor = - createInnerVisitor( - leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); - Range innerRange = new Range(0, 0, listSize); - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - if (!isNull) { - final int startIndexLeft = leftIndex * listSize; - final int endIndexLeft = (leftIndex + 1) * listSize; - - final int startIndexRight = rightIndex * listSize; - final int endIndexRight = (rightIndex + 1) * listSize; - - if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) { - return false; - } - - innerRange = innerRange.setLeftStart(startIndexLeft).setRightStart(startIndexRight); - if (!innerVisitor.rangeEquals(innerRange)) { - return false; - } - } - } - return true; - } - - protected boolean compareLargeListVectors(Range range) { - LargeListVector leftVector = (LargeListVector) left; - LargeListVector rightVector = (LargeListVector) right; - - RangeEqualsVisitor innerVisitor = - createInnerVisitor( - leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); - Range innerRange = new Range(); - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - long offsetWidth = LargeListVector.OFFSET_WIDTH; - - if (!isNull) { - final long startIndexLeft = - leftVector.getOffsetBuffer().getLong((long) leftIndex * offsetWidth); - final long endIndexLeft = - leftVector.getOffsetBuffer().getLong((long) (leftIndex + 1) * offsetWidth); - - final long startIndexRight = - rightVector.getOffsetBuffer().getLong((long) rightIndex * offsetWidth); - final long endIndexRight = - rightVector.getOffsetBuffer().getLong((long) (rightIndex + 1) * offsetWidth); - - if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) { - return false; - } - - innerRange = - innerRange // TODO revisit these casts when long indexing is finished - .setRightStart(checkedCastToInt(startIndexRight)) - .setLeftStart(checkedCastToInt(startIndexLeft)) - .setLength(checkedCastToInt(endIndexLeft - startIndexLeft)); - if (!innerVisitor.rangeEquals(innerRange)) { - return false; - } - } - } - return true; - } - - protected boolean compareListViewVectors(Range range) { - ListViewVector leftVector = (ListViewVector) left; - ListViewVector rightVector = (ListViewVector) right; - - RangeEqualsVisitor innerVisitor = - createInnerVisitor( - leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); - Range innerRange = new Range(); - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - int offsetWidth = BaseRepeatedValueViewVector.OFFSET_WIDTH; - int sizeWidth = BaseRepeatedValueViewVector.SIZE_WIDTH; - - if (!isNull) { - final int startIndexLeft = - leftVector.getOffsetBuffer().getInt((long) leftIndex * offsetWidth); - final int leftSize = leftVector.getSizeBuffer().getInt((long) leftIndex * sizeWidth); - - final int startIndexRight = - rightVector.getOffsetBuffer().getInt((long) rightIndex * offsetWidth); - final int rightSize = rightVector.getSizeBuffer().getInt((long) rightIndex * sizeWidth); - - if (leftSize != rightSize) { - return false; - } - - innerRange = - innerRange - .setRightStart(startIndexRight) - .setLeftStart(startIndexLeft) - .setLength(leftSize); - if (!innerVisitor.rangeEquals(innerRange)) { - return false; - } - } - } - return true; - } - - protected boolean compareLargeListViewVectors(Range range) { - LargeListViewVector leftVector = (LargeListViewVector) left; - LargeListViewVector rightVector = (LargeListViewVector) right; - - RangeEqualsVisitor innerVisitor = - createInnerVisitor( - leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); - Range innerRange = new Range(); - - for (int i = 0; i < range.getLength(); i++) { - int leftIndex = range.getLeftStart() + i; - int rightIndex = range.getRightStart() + i; - - boolean isNull = leftVector.isNull(leftIndex); - if (isNull != rightVector.isNull(rightIndex)) { - return false; - } - - int offsetWidth = BaseLargeRepeatedValueViewVector.OFFSET_WIDTH; - int sizeWidth = BaseLargeRepeatedValueViewVector.SIZE_WIDTH; - - if (!isNull) { - final int startIndexLeft = - leftVector.getOffsetBuffer().getInt((long) leftIndex * offsetWidth); - final int leftSize = leftVector.getSizeBuffer().getInt((long) leftIndex * sizeWidth); - - final int startIndexRight = - rightVector.getOffsetBuffer().getInt((long) rightIndex * offsetWidth); - final int rightSize = rightVector.getSizeBuffer().getInt((long) rightIndex * sizeWidth); - - if (leftSize != rightSize) { - return false; - } - - innerRange = - innerRange - .setRightStart(startIndexRight) - .setLeftStart(startIndexLeft) - .setLength(leftSize); - if (!innerVisitor.rangeEquals(innerRange)) { - return false; - } - } - } - return true; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java deleted file mode 100644 index 30b2f511a0445..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -import java.util.List; -import java.util.Objects; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.pojo.Field; - -/** Visitor to compare type equals for vectors. */ -public class TypeEqualsVisitor implements VectorVisitor { - - private final ValueVector right; - - private final boolean checkName; - private final boolean checkMetadata; - - /** Construct an instance. */ - public TypeEqualsVisitor(ValueVector right) { - this(right, true, true); - } - - /** - * Construct an instance. - * - * @param right right vector - * @param checkName whether checks names - * @param checkMetadata whether checks metadata - */ - public TypeEqualsVisitor(ValueVector right, boolean checkName, boolean checkMetadata) { - this.right = right; - this.checkName = checkName; - this.checkMetadata = checkMetadata; - } - - /** Check type equals without passing IN param in VectorVisitor. */ - public boolean equals(ValueVector left) { - return left.accept(this, null); - } - - @Override - public Boolean visit(BaseFixedWidthVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(BaseVariableWidthVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(BaseLargeVariableWidthVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(BaseVariableWidthViewVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(ListVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(FixedSizeListVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(LargeListVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(NonNullableStructVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(UnionVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(DenseUnionVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(NullVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(ExtensionTypeVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(ListViewVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(LargeListViewVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - @Override - public Boolean visit(RunEndEncodedVector left, Void value) { - return compareField(left.getField(), right.getField()); - } - - private boolean compareField(Field leftField, Field rightField) { - - if (leftField == rightField) { - return true; - } - - return (!checkName || Objects.equals(leftField.getName(), rightField.getName())) - && Objects.equals(leftField.isNullable(), rightField.isNullable()) - && Objects.equals(leftField.getType(), rightField.getType()) - && Objects.equals(leftField.getDictionary(), rightField.getDictionary()) - && (!checkMetadata || Objects.equals(leftField.getMetadata(), rightField.getMetadata())) - && compareChildren(leftField.getChildren(), rightField.getChildren()); - } - - private boolean compareChildren(List leftChildren, List rightChildren) { - if (leftChildren.size() != rightChildren.size()) { - return false; - } - - for (int i = 0; i < leftChildren.size(); i++) { - if (!compareField(leftChildren.get(i), rightChildren.get(i))) { - return false; - } - } - return true; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java deleted file mode 100644 index 966ba4c4fbcf0..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -import static org.apache.arrow.vector.compare.RangeEqualsVisitor.DEFAULT_TYPE_COMPARATOR; - -import java.util.function.BiFunction; -import org.apache.arrow.vector.ValueVector; - -/** Visitor to compare vectors equal. */ -public class VectorEqualsVisitor { - - /** - * Checks if two vectors are equals with default type comparator. - * - * @param left the left vector to compare. - * @param right the right vector to compare. - * @return true if the vectors are equal, and false otherwise. - */ - public static boolean vectorEquals(ValueVector left, ValueVector right) { - return vectorEquals(left, right, DEFAULT_TYPE_COMPARATOR); - } - - /** - * Checks if two vectors are equals. - * - * @param left the left vector to compare. - * @param right the right vector to compare. - * @param typeComparator type comparator to compare vector type. - * @return true if the vectors are equal, and false otherwise. - */ - public static boolean vectorEquals( - ValueVector left, - ValueVector right, - BiFunction typeComparator) { - - if (left.getValueCount() != right.getValueCount()) { - return false; - } - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(left, right, typeComparator); - return visitor.rangeEquals(new Range(0, 0, left.getValueCount())); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java deleted file mode 100644 index ad90c29a5326f..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -import org.apache.arrow.vector.ValueVector; - -/** - * A function to determine if two vectors are equal at specified positions. - * - * @param the vector type. - */ -public interface VectorValueEqualizer extends Cloneable { - - /** - * Checks if the vectors are equal at the given positions, given that the values at both positions - * are non-null. - * - * @param vector1 the first vector. - * @param index1 index in the first vector. - * @param vector2 the second vector. - * @param index2 index in the second vector. - * @return true if the two values are considered to be equal, and false otherwise. - */ - boolean valuesEqual(V vector1, int index1, V vector2, int index2); - - /** - * Creates a equalizer of the same type. - * - * @return the newly created equalizer. - */ - VectorValueEqualizer clone(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java deleted file mode 100644 index 989c57a0c93d0..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.UnionVector; - -/** - * Generic visitor to visit a {@link org.apache.arrow.vector.ValueVector}. - * - * @param the output result type. - * @param the input data together with visitor. - */ -public interface VectorVisitor { - - OUT visit(BaseFixedWidthVector left, IN value); - - OUT visit(BaseVariableWidthVector left, IN value); - - OUT visit(BaseLargeVariableWidthVector left, IN value); - - OUT visit(BaseVariableWidthViewVector left, IN value); - - OUT visit(ListVector left, IN value); - - OUT visit(FixedSizeListVector left, IN value); - - OUT visit(LargeListVector left, IN value); - - OUT visit(NonNullableStructVector left, IN value); - - OUT visit(UnionVector left, IN value); - - OUT visit(DenseUnionVector left, IN value); - - OUT visit(NullVector left, IN value); - - OUT visit(ExtensionTypeVector left, IN value); - - default OUT visit(ListViewVector left, IN value) { - throw new UnsupportedOperationException("VectorVisitor for ListViewVector is not supported."); - } - - default OUT visit(LargeListViewVector left, IN value) { - throw new UnsupportedOperationException( - "VectorVisitor for LargeListViewVector is not supported."); - } - - default OUT visit(RunEndEncodedVector left, IN value) { - throw new UnsupportedOperationException( - "VectorVisitor for LargeListViewVector is not supported."); - }; -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java deleted file mode 100644 index c962d9ae5f0be..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare.util; - -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.FloatingPointVector; -import org.apache.arrow.vector.compare.VectorValueEqualizer; - -/** - * Vector value equalizers that regard values as equal if their difference is within a small - * threshold (epsilon). - */ -public class ValueEpsilonEqualizers { - - private ValueEpsilonEqualizers() {} - - /** Difference function for floating point values. */ - public static class FloatingPointEpsilonEqualizer - implements VectorValueEqualizer { - private final double epsilon; - - public FloatingPointEpsilonEqualizer(double epsilon) { - this.epsilon = epsilon; - } - - @Override - public final boolean valuesEqual( - FloatingPointVector vector1, int index1, FloatingPointVector vector2, int index2) { - boolean isNull1 = vector1.isNull(index1); - boolean isNull2 = vector2.isNull(index2); - - if (isNull1 || isNull2) { - return isNull1 == isNull2; - } - - double d1 = vector1.getValueAsDouble(index1); - double d2 = vector2.getValueAsDouble(index2); - - if (Double.isNaN(d1)) { - return Double.isNaN(d2); - } - if (Double.isInfinite(d1)) { - return Double.isInfinite(d2) && Math.signum(d1) == Math.signum(d2); - } - - return Math.abs(d1 - d2) <= epsilon; - } - - @Override - public VectorValueEqualizer clone() { - return new FloatingPointEpsilonEqualizer(epsilon); - } - } - - /** Difference function for float values. */ - public static class Float4EpsilonEqualizer implements VectorValueEqualizer { - private final float epsilon; - - public Float4EpsilonEqualizer(float epsilon) { - this.epsilon = epsilon; - } - - @Override - public final boolean valuesEqual( - Float4Vector vector1, int index1, Float4Vector vector2, int index2) { - boolean isNull1 = vector1.isNull(index1); - boolean isNull2 = vector2.isNull(index2); - - if (isNull1 || isNull2) { - return isNull1 == isNull2; - } - - float f1 = vector1.get(index1); - float f2 = vector2.get(index2); - - if (Float.isNaN(f1)) { - return Float.isNaN(f2); - } - if (Float.isInfinite(f1)) { - return Float.isInfinite(f2) && Math.signum(f1) == Math.signum(f2); - } - - return Math.abs(f1 - f2) <= epsilon; - } - - @Override - public VectorValueEqualizer clone() { - return new Float4EpsilonEqualizer(epsilon); - } - } - - /** Difference function for double values. */ - public static class Float8EpsilonEqualizer implements VectorValueEqualizer { - private final double epsilon; - - public Float8EpsilonEqualizer(double epsilon) { - this.epsilon = epsilon; - } - - @Override - public final boolean valuesEqual( - Float8Vector vector1, int index1, Float8Vector vector2, int index2) { - boolean isNull1 = vector1.isNull(index1); - boolean isNull2 = vector2.isNull(index2); - - if (isNull1 || isNull2) { - return isNull1 == isNull2; - } - - double d1 = vector1.get(index1); - double d2 = vector2.get(index2); - - if (Double.isNaN(d1)) { - return Double.isNaN(d2); - } - if (Double.isInfinite(d1)) { - return Double.isInfinite(d2) && Math.signum(d1) == Math.signum(d2); - } - - return Math.abs(d1 - d2) <= epsilon; - } - - @Override - public VectorValueEqualizer clone() { - return new Float8EpsilonEqualizer(epsilon); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java deleted file mode 100644 index a6a71cf1a4190..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.vector.DensityAwareVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; -import org.apache.arrow.vector.types.pojo.ArrowType.List; -import org.apache.arrow.vector.types.pojo.ArrowType.ListView; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * Base class for composite vectors. - * - *

    This class implements common functionality of composite vectors. - */ -public abstract class AbstractContainerVector implements ValueVector, DensityAwareVector { - static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); - - protected final String name; - protected final BufferAllocator allocator; - protected final CallBack callBack; - - protected AbstractContainerVector(String name, BufferAllocator allocator, CallBack callBack) { - this.name = name; - this.allocator = allocator; - this.callBack = callBack; - } - - /** Representation of vector suitable for debugging. */ - @Override - public String toString() { - return ValueVectorUtility.getToString(this, 0, getValueCount()); - } - - @Override - public void allocateNew() throws OutOfMemoryException { - if (!allocateNewSafe()) { - throw new OutOfMemoryException(); - } - } - - @Override - public BufferAllocator getAllocator() { - return allocator; - } - - /** - * Returns a {@link org.apache.arrow.vector.ValueVector} corresponding to the given field name if - * exists or null. - * - * @param name the name of the child to return - * @return the corresponding FieldVector - */ - public FieldVector getChild(String name) { - return getChild(name, FieldVector.class); - } - - /** Clears out all underlying child vectors. */ - @Override - public void close() { - for (ValueVector vector : (Iterable) this) { - vector.close(); - } - } - - protected T typeify(ValueVector v, Class clazz) { - if (clazz.isAssignableFrom(v.getClass())) { - return clazz.cast(v); - } - throw new IllegalStateException( - String.format( - "Vector requested [%s] was different than type stored [%s]. Arrow " - + "doesn't yet support heterogeneous types.", - clazz.getSimpleName(), v.getClass().getSimpleName())); - } - - protected boolean supportsDirectRead() { - return false; - } - - // return the number of child vectors - public abstract int size(); - - // add a new vector with the input FieldType or return the existing vector if we already added one - // with the same name - public abstract T addOrGet( - String name, FieldType fieldType, Class clazz); - - // return the child vector with the input name - public abstract T getChild(String name, Class clazz); - - // return the child vector's ordinal in the composite container - public abstract VectorWithOrdinal getChildVectorWithOrdinal(String name); - - public StructVector addOrGetStruct(String name) { - return addOrGet(name, FieldType.nullable(new Struct()), StructVector.class); - } - - public ListVector addOrGetList(String name) { - return addOrGet(name, FieldType.nullable(new List()), ListVector.class); - } - - public ListViewVector addOrGetListView(String name) { - return addOrGet(name, FieldType.nullable(new ListView()), ListViewVector.class); - } - - public UnionVector addOrGetUnion(String name) { - return addOrGet(name, FieldType.nullable(MinorType.UNION.getType()), UnionVector.class); - } - - public FixedSizeListVector addOrGetFixedSizeList(String name, int listSize) { - return addOrGet( - name, FieldType.nullable(new FixedSizeList(listSize)), FixedSizeListVector.class); - } - - public MapVector addOrGetMap(String name, boolean keysSorted) { - return addOrGet(name, FieldType.nullable(new ArrowType.Map(keysSorted)), MapVector.class); - } - - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - throw new UnsupportedOperationException(); - } - - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - throw new UnsupportedOperationException(); - } - - @Override - public String getName() { - return name; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java deleted file mode 100644 index 2921e43cb6410..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java +++ /dev/null @@ -1,431 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Locale; -import java.util.stream.Collectors; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.PromotableMultiMapWithOrdinal; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** Base class for StructVectors. Currently used by NonNullableStructVector */ -public abstract class AbstractStructVector extends AbstractContainerVector { - private static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); - private static final String STRUCT_CONFLICT_POLICY_ENV = "ARROW_STRUCT_CONFLICT_POLICY"; - private static final String STRUCT_CONFLICT_POLICY_JVM = "arrow.struct.conflict.policy"; - private static final ConflictPolicy DEFAULT_CONFLICT_POLICY; - // Maintains a map with key as field name and value is the vector itself - private final PromotableMultiMapWithOrdinal vectors; - protected final boolean allowConflictPolicyChanges; - private ConflictPolicy conflictPolicy; - - static { - String conflictPolicyStr = - System.getProperty(STRUCT_CONFLICT_POLICY_JVM, ConflictPolicy.CONFLICT_REPLACE.toString()); - if (conflictPolicyStr == null) { - conflictPolicyStr = System.getenv(STRUCT_CONFLICT_POLICY_ENV); - } - ConflictPolicy conflictPolicy; - try { - conflictPolicy = ConflictPolicy.valueOf(conflictPolicyStr.toUpperCase(Locale.ROOT)); - } catch (Exception e) { - conflictPolicy = ConflictPolicy.CONFLICT_REPLACE; - } - DEFAULT_CONFLICT_POLICY = conflictPolicy; - } - - /** Policy to determine how to react when duplicate columns are encountered. */ - public enum ConflictPolicy { - // Ignore the conflict and append the field. This is the default behaviour - CONFLICT_APPEND, - // Keep the existing field and ignore the newer one. - CONFLICT_IGNORE, - // Replace the existing field with the newer one. - CONFLICT_REPLACE, - // Refuse the new field and error out. - CONFLICT_ERROR - } - - /** Base constructor that sets default conflict policy to APPEND. */ - protected AbstractStructVector( - String name, - BufferAllocator allocator, - CallBack callBack, - ConflictPolicy conflictPolicy, - boolean allowConflictPolicyChanges) { - super(name, allocator, callBack); - this.conflictPolicy = conflictPolicy == null ? DEFAULT_CONFLICT_POLICY : conflictPolicy; - this.vectors = - new PromotableMultiMapWithOrdinal<>(allowConflictPolicyChanges, this.conflictPolicy); - this.allowConflictPolicyChanges = allowConflictPolicyChanges; - } - - /** Set conflict policy and return last conflict policy state. */ - public ConflictPolicy setConflictPolicy(ConflictPolicy conflictPolicy) { - ConflictPolicy tmp = this.conflictPolicy; - this.conflictPolicy = conflictPolicy; - this.vectors.setConflictPolicy(conflictPolicy); - return tmp; - } - - public ConflictPolicy getConflictPolicy() { - return conflictPolicy; - } - - @Override - public void close() { - for (final ValueVector valueVector : vectors.values()) { - valueVector.close(); - } - vectors.clear(); - - super.close(); - } - - @Override - public boolean allocateNewSafe() { - /* boolean to keep track if all the memory allocation were successful - * Used in the case of composite vectors when we need to allocate multiple - * buffers for multiple vectors. If one of the allocations failed we need to - * clear all the memory that we allocated - */ - boolean success = false; - try { - for (final ValueVector v : vectors.values()) { - if (!v.allocateNewSafe()) { - return false; - } - } - success = true; - } finally { - if (!success) { - clear(); - } - } - return true; - } - - @Override - public void reAlloc() { - for (final ValueVector v : vectors.values()) { - v.reAlloc(); - } - } - - /** - * Adds a new field with the given parameters or replaces the existing one and consequently - * returns the resultant {@link org.apache.arrow.vector.ValueVector}. - * - *

    Execution takes place in the following order: - * - *

      - *
    • if field is new, create and insert a new vector of desired type. - *
    • if field exists and existing vector is of desired vector type, return the vector. - *
    • if field exists and null filled, clear the existing vector; create and insert a new - * vector of desired type. - *
    • otherwise, throw an {@link java.lang.IllegalStateException} - *
    - * - * @param childName the name of the field - * @param fieldType the type for the vector - * @param clazz class of expected vector type - * @param class type of expected vector type - * @return resultant {@link org.apache.arrow.vector.ValueVector} - * @throws java.lang.IllegalStateException raised if there is a hard schema change - */ - @Override - public T addOrGet(String childName, FieldType fieldType, Class clazz) { - final ValueVector existing = getChild(childName); - boolean create = false; - if (existing == null) { - create = true; - } else if (clazz.isAssignableFrom(existing.getClass())) { - return clazz.cast(existing); - } else if (nullFilled(existing)) { - existing.clear(); - create = true; - } - if (create) { - final T vector = clazz.cast(fieldType.createNewSingleVector(childName, allocator, callBack)); - putChild(childName, vector); - if (callBack != null) { - callBack.doWork(); - } - return vector; - } - final String message = - "Arrow does not support schema change yet. Existing[%s] and desired[%s] vector types " - + "mismatch"; - throw new IllegalStateException( - String.format(message, existing.getClass().getSimpleName(), clazz.getSimpleName())); - } - - private boolean nullFilled(ValueVector vector) { - return BitVectorHelper.checkAllBitsEqualTo( - vector.getValidityBuffer(), vector.getValueCount(), false); - } - - /** - * Returns a {@link org.apache.arrow.vector.ValueVector} corresponding to the given ordinal - * identifier. - * - * @param id the ordinal of the child to return - * @return the corresponding child - */ - public ValueVector getChildByOrdinal(int id) { - return vectors.getByOrdinal(id); - } - - /** - * Returns a {@link org.apache.arrow.vector.ValueVector} instance of subtype of T corresponding to - * the given field name if exists or null. - * - *

    If there is more than one element for name this will return the first inserted. - * - * @param name the name of the child to return - * @param clazz the expected type of the child - * @return the child corresponding to this name - */ - @Override - public T getChild(String name, Class clazz) { - final FieldVector f = vectors.get(name); - if (f == null) { - return null; - } - return typeify(f, clazz); - } - - protected ValueVector add(String childName, FieldType fieldType) { - FieldVector vector = fieldType.createNewSingleVector(childName, allocator, callBack); - putChild(childName, vector); - if (callBack != null) { - callBack.doWork(); - } - return vector; - } - - /** - * Inserts the vector with the given name if it does not exist else replaces it with the new - * value. - * - *

    Note that this method does not enforce any vector type check nor throws a schema change - * exception. - * - * @param name the name of the child to add - * @param vector the vector to add as a child - */ - protected void putChild(String name, FieldVector vector) { - putVector(name, vector); - } - - private void put(String name, FieldVector vector, boolean overwrite) { - final boolean old = - vectors.put( - Preconditions.checkNotNull(name, "field name cannot be null"), - Preconditions.checkNotNull(vector, "vector cannot be null"), - overwrite); - if (old) { - logger.debug("Field [{}] mutated to [{}] ", name, vector.getClass().getSimpleName()); - } - } - - /** - * Inserts the input vector into the map if it does not exist. - * - *

    If the field name already exists the conflict is handled according to the currently set - * ConflictPolicy - * - * @param name field name - * @param vector vector to be inserted - */ - protected void putVector(String name, FieldVector vector) { - switch (conflictPolicy) { - case CONFLICT_APPEND: - put(name, vector, false); - break; - case CONFLICT_IGNORE: - if (!vectors.containsKey(name)) { - put(name, vector, false); - } - break; - case CONFLICT_REPLACE: - if (vectors.containsKey(name)) { - vectors.removeAll(name); - } - put(name, vector, true); - break; - case CONFLICT_ERROR: - if (vectors.containsKey(name)) { - throw new IllegalStateException( - String.format( - "Vector already exists: Existing[%s], Requested[%s] ", - vector.getClass().getSimpleName(), vector.getField().getFieldType())); - } - put(name, vector, false); - break; - default: - throw new IllegalStateException( - String.format("%s type not a valid conflict state", conflictPolicy)); - } - } - - /** - * Get child vectors. - * - * @return a sequence of underlying child vectors. - */ - protected List getChildren() { - int size = vectors.size(); - List children = new ArrayList<>(); - for (int i = 0; i < size; i++) { - children.add(vectors.getByOrdinal(i)); - } - return children; - } - - /** Get child field names. */ - public List getChildFieldNames() { - return getChildren().stream() - .map(child -> child.getField().getName()) - .collect(Collectors.toList()); - } - - /** - * Get the number of child vectors. - * - * @return the number of underlying child vectors. - */ - @Override - public int size() { - return vectors.size(); - } - - @Override - public Iterator iterator() { - return Collections.unmodifiableCollection(vectors.values()).iterator(); - } - - /** - * Get primitive child vectors. - * - * @return a list of scalar child vectors recursing the entire vector hierarchy. - */ - public List getPrimitiveVectors() { - final List primitiveVectors = new ArrayList<>(); - for (final FieldVector v : vectors.values()) { - primitiveVectors.addAll(getPrimitiveVectors(v)); - } - return primitiveVectors; - } - - private List getPrimitiveVectors(FieldVector v) { - final List primitives = new ArrayList<>(); - if (v instanceof AbstractStructVector) { - AbstractStructVector structVector = (AbstractStructVector) v; - primitives.addAll(structVector.getPrimitiveVectors()); - } else if (v instanceof ListVector) { - ListVector listVector = (ListVector) v; - primitives.addAll(getPrimitiveVectors(listVector.getDataVector())); - } else if (v instanceof FixedSizeListVector) { - FixedSizeListVector listVector = (FixedSizeListVector) v; - primitives.addAll(getPrimitiveVectors(listVector.getDataVector())); - } else if (v instanceof UnionVector) { - UnionVector unionVector = (UnionVector) v; - for (final FieldVector vector : unionVector.getChildrenFromFields()) { - primitives.addAll(getPrimitiveVectors(vector)); - } - } else { - primitives.add(v); - } - return primitives; - } - - /** - * Get a child vector by name. If duplicate names this returns the first inserted. - * - * @param name the name of the child to return - * @return a vector with its corresponding ordinal mapping if field exists or null. - */ - @Override - public VectorWithOrdinal getChildVectorWithOrdinal(String name) { - final int ordinal = vectors.getOrdinal(name); - if (ordinal < 0) { - return null; - } - final ValueVector vector = vectors.getByOrdinal(ordinal); - return new VectorWithOrdinal(vector, ordinal); - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them. Also, this won't clear the child - * buffers. - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - final List buffers = new ArrayList<>(); - - for (final ValueVector vector : vectors.values()) { - for (final ArrowBuf buf : vector.getBuffers(false)) { - buffers.add(buf); - if (clear) { - buf.getReferenceManager().retain(1); - } - } - if (clear) { - vector.clear(); - } - } - - return buffers.toArray(new ArrowBuf[buffers.size()]); - } - - @Override - public int getBufferSize() { - int actualBufSize = 0; - - for (final ValueVector v : vectors.values()) { - for (final ArrowBuf buf : v.getBuffers(false)) { - actualBufSize += (int) buf.writerIndex(); - } - } - return actualBufSize; - } - - @Override - public String toString() { - return ValueVectorUtility.getToString(this, 0, getValueCount()); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java deleted file mode 100644 index 12edd6557bd9c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import java.util.Collections; -import java.util.Iterator; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.DensityAwareVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.SchemaChangeRuntimeException; - -public abstract class BaseLargeRepeatedValueViewVector extends BaseValueVector - implements RepeatedValueVector, FieldVector { - public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; - public static final String DATA_VECTOR_NAME = "$data$"; - - public static final byte OFFSET_WIDTH = 8; - public static final byte SIZE_WIDTH = 8; - protected ArrowBuf offsetBuffer; - protected ArrowBuf sizeBuffer; - protected FieldVector vector; - protected final CallBack repeatedCallBack; - protected int valueCount; - protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; - protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH; - private final String name; - - protected String defaultDataVectorName = DATA_VECTOR_NAME; - - protected BaseLargeRepeatedValueViewVector( - String name, BufferAllocator allocator, CallBack callBack) { - this(name, allocator, DEFAULT_DATA_VECTOR, callBack); - } - - protected BaseLargeRepeatedValueViewVector( - String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) { - super(allocator); - this.name = name; - this.offsetBuffer = allocator.getEmpty(); - this.sizeBuffer = allocator.getEmpty(); - this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null"); - this.repeatedCallBack = callBack; - this.valueCount = 0; - } - - @Override - public String getName() { - return name; - } - - @Override - public boolean allocateNewSafe() { - boolean dataAlloc = false; - try { - allocateBuffers(); - dataAlloc = vector.allocateNewSafe(); - } catch (Exception e) { - clear(); - return false; - } finally { - if (!dataAlloc) { - clear(); - } - } - return dataAlloc; - } - - private void allocateBuffers() { - offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes); - sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes); - } - - protected ArrowBuf allocateBuffers(final long size) { - final int curSize = (int) size; - ArrowBuf buffer = allocator.buffer(curSize); - buffer.readerIndex(0); - buffer.setZero(0, buffer.capacity()); - return buffer; - } - - @Override - public void reAlloc() { - reallocateBuffers(); - vector.reAlloc(); - } - - protected void reallocateBuffers() { - reallocOffsetBuffer(); - reallocSizeBuffer(); - } - - private void reallocOffsetBuffer() { - final long currentBufferCapacity = offsetBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (offsetAllocationSizeInBytes > 0) { - newAllocationSize = offsetAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - offsetBuffer.getReferenceManager().release(1); - offsetBuffer = newBuf; - offsetAllocationSizeInBytes = newAllocationSize; - } - - private void reallocSizeBuffer() { - final long currentBufferCapacity = sizeBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (sizeAllocationSizeInBytes > 0) { - newAllocationSize = sizeAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH * 2; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - newAllocationSize = Math.min(newAllocationSize, (long) SIZE_WIDTH * Integer.MAX_VALUE); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= sizeBuffer.capacity()) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, sizeBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - sizeBuffer.getReferenceManager().release(1); - sizeBuffer = newBuf; - sizeAllocationSizeInBytes = newAllocationSize; - } - - @Override - public FieldVector getDataVector() { - return vector; - } - - @Override - public void setInitialCapacity(int numRecords) { - offsetAllocationSizeInBytes = (long) (numRecords) * OFFSET_WIDTH; - sizeAllocationSizeInBytes = (long) (numRecords) * SIZE_WIDTH; - if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { - vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); - } else { - vector.setInitialCapacity(numRecords); - } - } - - @Override - public void setInitialCapacity(int numRecords, double density) { - if ((numRecords * density) >= Integer.MAX_VALUE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } - - offsetAllocationSizeInBytes = (long) numRecords * OFFSET_WIDTH; - sizeAllocationSizeInBytes = (long) numRecords * SIZE_WIDTH; - - int innerValueCapacity = Math.max((int) (numRecords * density), 1); - - if (vector instanceof DensityAwareVector) { - ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); - } else { - vector.setInitialCapacity(innerValueCapacity); - } - } - - /** - * Specialized version of setInitialTotalCapacity() for LargeListViewVector. This is used by some - * callers when they want to explicitly control and be conservative about memory allocated for - * inner data vector. This is very useful when we are working with memory constraints for a query - * and have a fixed amount of memory reserved for the record batch. In such cases, we are likely - * to face OOM or related problems when we reserve memory for a record batch with value count x - * and do setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount, but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param totalNumberOfElements the total number of elements to allow for in this vector across - * all records. - */ - public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { - offsetAllocationSizeInBytes = (long) numRecords * OFFSET_WIDTH; - sizeAllocationSizeInBytes = (long) numRecords * SIZE_WIDTH; - vector.setInitialCapacity(totalNumberOfElements); - } - - @Override - public int getValueCapacity() { - throw new UnsupportedOperationException( - "Get value capacity is not supported in RepeatedValueVector"); - } - - protected int getOffsetBufferValueCapacity() { - return checkedCastToInt(offsetBuffer.capacity() / OFFSET_WIDTH); - } - - protected int getSizeBufferValueCapacity() { - return capAtMaxInt(sizeBuffer.capacity() / SIZE_WIDTH); - } - - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + vector.getBufferSize(); - } - - @Override - public int getBufferSizeFor(int valueCount) { - if (valueCount == 0) { - return 0; - } - - int innerVectorValueCount = 0; - - for (int i = 0; i < valueCount; i++) { - innerVectorValueCount += sizeBuffer.getInt(i * SIZE_WIDTH); - } - - return (valueCount * OFFSET_WIDTH) - + (valueCount * SIZE_WIDTH) - + vector.getBufferSizeFor(checkedCastToInt(innerVectorValueCount)); - } - - @Override - public Iterator iterator() { - return Collections.singleton(getDataVector()).iterator(); - } - - @Override - public void clear() { - offsetBuffer = releaseBuffer(offsetBuffer); - sizeBuffer = releaseBuffer(sizeBuffer); - vector.clear(); - valueCount = 0; - super.clear(); - } - - @Override - public void reset() { - offsetBuffer.setZero(0, offsetBuffer.capacity()); - sizeBuffer.setZero(0, sizeBuffer.capacity()); - vector.reset(); - valueCount = 0; - } - - @Override - public ArrowBuf[] getBuffers(boolean clear) { - return new ArrowBuf[0]; - } - - @Override - public int getValueCount() { - return valueCount; - } - - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - while (valueCount > getOffsetBufferValueCapacity()) { - reallocateBuffers(); - } - final int childValueCount = valueCount == 0 ? 0 : getMaxViewEndChildVector(); - vector.setValueCount(childValueCount); - } - - /** - * Get the end of the child vector via the maximum view length. This method deduces the length by - * considering the condition i.e., argmax_i(offsets[i] + size[i]). - * - * @return the end of the child vector. - */ - protected int getMaxViewEndChildVector() { - int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); - for (int i = 0; i < valueCount; i++) { - int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH); - int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH); - int currentSum = currentOffset + currentSize; - maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); - } - - return maxOffsetSizeSum; - } - - /** - * Get the end of the child vector via the maximum view length of the child vector by index. - * - * @return the end of the child vector by index - */ - protected int getMaxViewEndChildVectorByIndex(int index) { - int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); - for (int i = 0; i < index; i++) { - int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH); - int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH); - int currentSum = currentOffset + currentSize; - maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); - } - - return maxOffsetSizeSum; - } - - /** - * Initialize the data vector (and execute callback) if it hasn't already been done, returns the - * data vector. - */ - public AddOrGetResult addOrGetVector(FieldType fieldType) { - boolean created = false; - if (vector instanceof NullVector) { - vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack); - // returned vector must have the same field - created = true; - if (repeatedCallBack != null - && - // not a schema change if changing from ZeroVector to ZeroVector - (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) { - repeatedCallBack.doWork(); - } - } - - if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { - final String msg = - String.format( - "Inner vector type mismatch. Requested type: [%s], actual type: [%s]", - fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); - throw new SchemaChangeRuntimeException(msg); - } - - return new AddOrGetResult<>((T) vector, created); - } - - protected void replaceDataVector(FieldVector v) { - vector.clear(); - vector = v; - } - - public abstract boolean isEmpty(int index); - - /** - * Start a new value at the given index. - * - * @param index the index to start the new value at - * @return the offset in the data vector where the new value starts - */ - public int startNewValue(int index) { - while (index >= getOffsetBufferValueCapacity()) { - reallocOffsetBuffer(); - } - while (index >= getSizeBufferValueCapacity()) { - reallocSizeBuffer(); - } - - if (index > 0) { - final int prevOffset = getMaxViewEndChildVectorByIndex(index); - offsetBuffer.setInt((long) index * OFFSET_WIDTH, prevOffset); - } - - setValueCount(index + 1); - return offsetBuffer.getInt((long) index * OFFSET_WIDTH); - } - - @Override - @Deprecated - public UInt4Vector getOffsetVector() { - throw new UnsupportedOperationException("There is no inner offset vector"); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java deleted file mode 100644 index 389670b878a76..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import org.apache.arrow.vector.FieldVector; - -/** Abstraction for all list type vectors. */ -public interface BaseListVector extends FieldVector { - - /** Get data vector start index with the given list index. */ - int getElementStartIndex(int index); - - /** Get data vector end index with the given list index. */ - int getElementEndIndex(int index); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java deleted file mode 100644 index fbe83bad52cf1..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.DensityAwareVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.SchemaChangeRuntimeException; - -/** Base class for Vectors that contain repeated values. */ -public abstract class BaseRepeatedValueVector extends BaseValueVector - implements RepeatedValueVector, BaseListVector { - - public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; - public static final String DATA_VECTOR_NAME = "$data$"; - - public static final byte OFFSET_WIDTH = 4; - protected ArrowBuf offsetBuffer; - protected FieldVector vector; - protected final CallBack repeatedCallBack; - protected int valueCount; - protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; - private final String name; - - protected String defaultDataVectorName = DATA_VECTOR_NAME; - - protected BaseRepeatedValueVector(String name, BufferAllocator allocator, CallBack callBack) { - this(name, allocator, DEFAULT_DATA_VECTOR, callBack); - } - - protected BaseRepeatedValueVector( - String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) { - super(allocator); - this.name = name; - this.offsetBuffer = allocator.getEmpty(); - this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null"); - this.repeatedCallBack = callBack; - this.valueCount = 0; - } - - @Override - public String getName() { - return name; - } - - @Override - public boolean allocateNewSafe() { - boolean dataAlloc = false; - try { - offsetBuffer = allocateOffsetBuffer(offsetAllocationSizeInBytes); - dataAlloc = vector.allocateNewSafe(); - } catch (Exception e) { - e.printStackTrace(); - clear(); - return false; - } finally { - if (!dataAlloc) { - clear(); - } - } - return dataAlloc; - } - - protected ArrowBuf allocateOffsetBuffer(final long size) { - final int curSize = (int) size; - ArrowBuf offsetBuffer = allocator.buffer(curSize); - offsetBuffer.readerIndex(0); - offsetAllocationSizeInBytes = curSize; - offsetBuffer.setZero(0, offsetBuffer.capacity()); - return offsetBuffer; - } - - @Override - public void reAlloc() { - reallocOffsetBuffer(); - vector.reAlloc(); - } - - protected void reallocOffsetBuffer() { - final long currentBufferCapacity = offsetBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (offsetAllocationSizeInBytes > 0) { - newAllocationSize = offsetAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - offsetBuffer.getReferenceManager().release(1); - offsetBuffer = newBuf; - offsetAllocationSizeInBytes = newAllocationSize; - } - - /** - * Get the offset vector. - * - * @return the underlying offset vector or null if none exists. - * @deprecated This API will be removed, as the current implementations no longer hold inner - * offset vectors. - */ - @Override - @Deprecated - public UInt4Vector getOffsetVector() { - throw new UnsupportedOperationException("There is no inner offset vector"); - } - - @Override - public FieldVector getDataVector() { - return vector; - } - - @Override - public void setInitialCapacity(int numRecords) { - offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH; - if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { - vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); - } else { - vector.setInitialCapacity(numRecords); - } - } - - /** - * Specialized version of setInitialCapacity() for ListVector. This is used by some callers when - * they want to explicitly control and be conservative about memory allocated for inner data - * vector. This is very useful when we are working with memory constraints for a query and have a - * fixed amount of memory reserved for the record batch. In such cases, we are likely to face OOM - * or related problems when we reserve memory for a record batch with value count x and do - * setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param density density of ListVector. Density is the average size of list per position in the - * List vector. For example, a density value of 10 implies each position in the list vector - * has a list of 10 values. A density value of 0.1 implies out of 10 positions in the list - * vector, 1 position has a list of size 1 and remaining positions are null (no lists) or - * empty lists. This helps in tightly controlling the memory we provision for inner data - * vector. - */ - @Override - public void setInitialCapacity(int numRecords, double density) { - if ((numRecords * density) >= Integer.MAX_VALUE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } - - offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH; - - int innerValueCapacity = Math.max((int) (numRecords * density), 1); - - if (vector instanceof DensityAwareVector) { - ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); - } else { - vector.setInitialCapacity(innerValueCapacity); - } - } - - /** - * Specialized version of setInitialTotalCapacity() for ListVector. This is used by some callers - * when they want to explicitly control and be conservative about memory allocated for inner data - * vector. This is very useful when we are working with memory constraints for a query and have a - * fixed amount of memory reserved for the record batch. In such cases, we are likely to face OOM - * or related problems when we reserve memory for a record batch with value count x and do - * setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param totalNumberOfElements the total number of elements to to allow for in this vector across - * all records. - */ - public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { - offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH; - vector.setInitialCapacity(totalNumberOfElements); - } - - @Override - public int getValueCapacity() { - final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); - if (vector == DEFAULT_DATA_VECTOR) { - return offsetValueCapacity; - } - return Math.min(vector.getValueCapacity(), offsetValueCapacity); - } - - protected int getOffsetBufferValueCapacity() { - return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); - } - - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - return ((valueCount + 1) * OFFSET_WIDTH) + vector.getBufferSize(); - } - - @Override - public int getBufferSizeFor(int valueCount) { - if (valueCount == 0) { - return 0; - } - - int innerVectorValueCount = offsetBuffer.getInt(valueCount * OFFSET_WIDTH); - - return ((valueCount + 1) * OFFSET_WIDTH) + vector.getBufferSizeFor(innerVectorValueCount); - } - - @Override - public Iterator iterator() { - return Collections.singleton(getDataVector()).iterator(); - } - - @Override - public void clear() { - offsetBuffer = releaseBuffer(offsetBuffer); - vector.clear(); - valueCount = 0; - super.clear(); - } - - @Override - public void reset() { - offsetBuffer.setZero(0, offsetBuffer.capacity()); - vector.reset(); - valueCount = 0; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them. Also, this won't clear the child - * buffers. - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - final ArrowBuf[] buffers; - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - List list = new ArrayList<>(); - list.add(offsetBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); - buffers = list.toArray(new ArrowBuf[list.size()]); - } - if (clear) { - for (ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - /** - * Get value indicating if inner vector is set. - * - * @return 1 if inner vector is explicitly set via #addOrGetVector else 0 - */ - public int size() { - return vector == DEFAULT_DATA_VECTOR ? 0 : 1; - } - - /** - * Initialize the data vector (and execute callback) if it hasn't already been done, returns the - * data vector. - */ - public AddOrGetResult addOrGetVector(FieldType fieldType) { - boolean created = false; - if (vector instanceof NullVector) { - vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack); - // returned vector must have the same field - created = true; - if (repeatedCallBack != null - && - // not a schema change if changing from ZeroVector to ZeroVector - (fieldType.getType().getTypeID() != ArrowTypeID.Null)) { - repeatedCallBack.doWork(); - } - } - - if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { - final String msg = - String.format( - "Inner vector type mismatch. Requested type: [%s], actual type: [%s]", - fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); - throw new SchemaChangeRuntimeException(msg); - } - - return new AddOrGetResult<>((T) vector, created); - } - - protected void replaceDataVector(FieldVector v) { - vector.clear(); - vector = v; - } - - @Override - public int getValueCount() { - return valueCount; - } - - /* returns the value count for inner data vector for this list vector */ - public int getInnerValueCount() { - return vector.getValueCount(); - } - - /** Returns the value count for inner data vector at a particular index. */ - public int getInnerValueCountAt(int index) { - return offsetBuffer.getInt((index + 1) * OFFSET_WIDTH) - - offsetBuffer.getInt(index * OFFSET_WIDTH); - } - - /** Return if value at index is empty. */ - public abstract boolean isEmpty(int index); - - /** Starts a new repeated value. */ - public int startNewValue(int index) { - while (index >= getOffsetBufferValueCapacity()) { - reallocOffsetBuffer(); - } - int offset = offsetBuffer.getInt(index * OFFSET_WIDTH); - offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, offset); - setValueCount(index + 1); - return offset; - } - - /** Preallocates the number of repeated values. */ - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - while (valueCount > getOffsetBufferValueCapacity()) { - reallocOffsetBuffer(); - } - final int childValueCount = - valueCount == 0 ? 0 : offsetBuffer.getInt(valueCount * OFFSET_WIDTH); - vector.setValueCount(childValueCount); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java deleted file mode 100644 index e6213316b55a3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; - -import java.util.Collections; -import java.util.Iterator; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.DensityAwareVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.SchemaChangeRuntimeException; - -public abstract class BaseRepeatedValueViewVector extends BaseValueVector - implements RepeatedValueVector, BaseListVector { - - public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; - public static final String DATA_VECTOR_NAME = "$data$"; - - public static final byte OFFSET_WIDTH = 4; - public static final byte SIZE_WIDTH = 4; - protected ArrowBuf offsetBuffer; - protected ArrowBuf sizeBuffer; - protected FieldVector vector; - protected final CallBack repeatedCallBack; - protected int valueCount; - protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; - protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH; - private final String name; - - protected String defaultDataVectorName = DATA_VECTOR_NAME; - - protected BaseRepeatedValueViewVector(String name, BufferAllocator allocator, CallBack callBack) { - this(name, allocator, DEFAULT_DATA_VECTOR, callBack); - } - - protected BaseRepeatedValueViewVector( - String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) { - super(allocator); - this.name = name; - this.offsetBuffer = allocator.getEmpty(); - this.sizeBuffer = allocator.getEmpty(); - this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null"); - this.repeatedCallBack = callBack; - this.valueCount = 0; - } - - @Override - public String getName() { - return name; - } - - @Override - public boolean allocateNewSafe() { - boolean dataAlloc = false; - try { - allocateBuffers(); - dataAlloc = vector.allocateNewSafe(); - } catch (Exception e) { - clear(); - return false; - } finally { - if (!dataAlloc) { - clear(); - } - } - return dataAlloc; - } - - private void allocateBuffers() { - offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes); - sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes); - } - - protected ArrowBuf allocateBuffers(final long size) { - final int curSize = (int) size; - ArrowBuf buffer = allocator.buffer(curSize); - buffer.readerIndex(0); - buffer.setZero(0, buffer.capacity()); - return buffer; - } - - @Override - public void reAlloc() { - reallocateBuffers(); - vector.reAlloc(); - } - - protected void reallocateBuffers() { - reallocOffsetBuffer(); - reallocSizeBuffer(); - } - - private void reallocOffsetBuffer() { - final long currentBufferCapacity = offsetBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (offsetAllocationSizeInBytes > 0) { - newAllocationSize = offsetAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - offsetBuffer.getReferenceManager().release(1); - offsetBuffer = newBuf; - offsetAllocationSizeInBytes = newAllocationSize; - } - - private void reallocSizeBuffer() { - final long currentBufferCapacity = sizeBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (sizeAllocationSizeInBytes > 0) { - newAllocationSize = sizeAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH * 2; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - newAllocationSize = Math.min(newAllocationSize, (long) SIZE_WIDTH * Integer.MAX_VALUE); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= sizeBuffer.capacity()) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, sizeBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - sizeBuffer.getReferenceManager().release(1); - sizeBuffer = newBuf; - sizeAllocationSizeInBytes = newAllocationSize; - } - - @Override - public FieldVector getDataVector() { - return vector; - } - - @Override - public void setInitialCapacity(int numRecords) { - offsetAllocationSizeInBytes = (numRecords) * OFFSET_WIDTH; - sizeAllocationSizeInBytes = (numRecords) * SIZE_WIDTH; - if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { - vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); - } else { - vector.setInitialCapacity(numRecords); - } - } - - @Override - public void setInitialCapacity(int numRecords, double density) { - if ((numRecords * density) >= Integer.MAX_VALUE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } - - offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; - sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; - - int innerValueCapacity = Math.max((int) (numRecords * density), 1); - - if (vector instanceof DensityAwareVector) { - ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); - } else { - vector.setInitialCapacity(innerValueCapacity); - } - } - - /** - * Specialized version of setInitialTotalCapacity() for ListViewVector. This is used by some - * callers when they want to explicitly control and be conservative about memory allocated for - * inner data vector. This is very useful when we are working with memory constraints for a query - * and have a fixed amount of memory reserved for the record batch. In such cases, we are likely - * to face OOM or related problems when we reserve memory for a record batch with value count x - * and do setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount, but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param totalNumberOfElements the total number of elements to allow for in this vector across - * all records. - */ - public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { - offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; - sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; - vector.setInitialCapacity(totalNumberOfElements); - } - - @Override - public int getValueCapacity() { - throw new UnsupportedOperationException( - "Get value capacity is not supported in RepeatedValueVector"); - } - - protected int getOffsetBufferValueCapacity() { - return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); - } - - protected int getSizeBufferValueCapacity() { - return capAtMaxInt(sizeBuffer.capacity() / SIZE_WIDTH); - } - - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + vector.getBufferSize(); - } - - @Override - public int getBufferSizeFor(int valueCount) { - if (valueCount == 0) { - return 0; - } - - int innerVectorValueCount = 0; - - for (int i = 0; i < valueCount; i++) { - innerVectorValueCount += sizeBuffer.getInt(i * SIZE_WIDTH); - } - - return (valueCount * OFFSET_WIDTH) - + (valueCount * SIZE_WIDTH) - + vector.getBufferSizeFor(innerVectorValueCount); - } - - @Override - public Iterator iterator() { - return Collections.singleton(getDataVector()).iterator(); - } - - @Override - public void clear() { - offsetBuffer = releaseBuffer(offsetBuffer); - sizeBuffer = releaseBuffer(sizeBuffer); - vector.clear(); - valueCount = 0; - super.clear(); - } - - @Override - public void reset() { - offsetBuffer.setZero(0, offsetBuffer.capacity()); - sizeBuffer.setZero(0, sizeBuffer.capacity()); - vector.reset(); - valueCount = 0; - } - - @Override - public ArrowBuf[] getBuffers(boolean clear) { - return new ArrowBuf[0]; - } - - @Override - public int getValueCount() { - return valueCount; - } - - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - while (valueCount > getOffsetBufferValueCapacity()) { - reallocateBuffers(); - } - final int childValueCount = valueCount == 0 ? 0 : getMaxViewEndChildVector(); - vector.setValueCount(childValueCount); - } - - /** - * Get the end of the child vector via the maximum view length. This method deduces the length by - * considering the condition i.e., argmax_i(offsets[i] + size[i]). - * - * @return the end of the child vector. - */ - protected int getMaxViewEndChildVector() { - int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); - for (int i = 0; i < valueCount; i++) { - int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); - int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); - int currentSum = currentOffset + currentSize; - maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); - } - - return maxOffsetSizeSum; - } - - /** - * Get the end of the child vector via the maximum view length of the child vector by index. - * - * @return the end of the child vector by index - */ - protected int getMaxViewEndChildVectorByIndex(int index) { - int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); - // int minOffset = offsetBuffer.getInt(0); - for (int i = 0; i < index; i++) { - int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); - int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); - int currentSum = currentOffset + currentSize; - maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); - } - - return maxOffsetSizeSum; - } - - /** - * Initialize the data vector (and execute callback) if it hasn't already been done, returns the - * data vector. - */ - public AddOrGetResult addOrGetVector(FieldType fieldType) { - boolean created = false; - if (vector instanceof NullVector) { - vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack); - // returned vector must have the same field - created = true; - if (repeatedCallBack != null - && - // not a schema change if changing from ZeroVector to ZeroVector - (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) { - repeatedCallBack.doWork(); - } - } - - if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { - final String msg = - String.format( - "Inner vector type mismatch. Requested type: [%s], actual type: [%s]", - fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); - throw new SchemaChangeRuntimeException(msg); - } - - return new AddOrGetResult<>((T) vector, created); - } - - protected void replaceDataVector(FieldVector v) { - vector.clear(); - vector = v; - } - - public abstract boolean isEmpty(int index); - - /** - * Start a new value at the given index. - * - * @param index the index to start the new value at - * @return the offset in the data vector where the new value starts - */ - public int startNewValue(int index) { - while (index >= getOffsetBufferValueCapacity()) { - reallocOffsetBuffer(); - } - while (index >= getSizeBufferValueCapacity()) { - reallocSizeBuffer(); - } - - if (index > 0) { - final int prevOffset = getMaxViewEndChildVectorByIndex(index); - offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); - } - - setValueCount(index + 1); - return offsetBuffer.getInt(index * OFFSET_WIDTH); - } - - @Override - @Deprecated - public UInt4Vector getOffsetVector() { - throw new UnsupportedOperationException("There is no inner offset vector"); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java deleted file mode 100644 index 4477ebc94df2d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.UInt4Vector; - -/** A helper class that is used to track and populate empty values in repeated value vectors. */ -public class EmptyValuePopulator { - private final UInt4Vector offsets; - - public EmptyValuePopulator(UInt4Vector offsets) { - this.offsets = Preconditions.checkNotNull(offsets, "offsets cannot be null"); - } - - /** - * Marks all values since the last set as empty. The last set value is obtained from underlying - * offsets vector. - * - * @param lastIndex the last index (inclusive) in the offsets vector until which empty population - * takes place - * @throws java.lang.IndexOutOfBoundsException if lastIndex is negative or greater than offsets - * capacity. - */ - public void populate(int lastIndex) { - if (lastIndex < 0) { - throw new IndexOutOfBoundsException("index cannot be negative"); - } - final int lastSet = Math.max(offsets.getValueCount() - 1, 0); - final int previousEnd = offsets.get(lastSet); // 0 ? 0 : accessor.get(lastSet); - for (int i = lastSet; i < lastIndex; i++) { - offsets.setSafe(i + 1, previousEnd); - } - offsets.setValueCount(lastIndex + 1); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java deleted file mode 100644 index c762eb51725ca..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ /dev/null @@ -1,733 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static java.util.Collections.singletonList; -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.util.Preconditions.checkArgument; -import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Objects; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader; -import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.SchemaChangeRuntimeException; -import org.apache.arrow.vector.util.TransferPair; - -/** A ListVector where every list value is of the same size. */ -public class FixedSizeListVector extends BaseValueVector - implements BaseListVector, PromotableVector, ValueIterableVector> { - - public static FixedSizeListVector empty(String name, int size, BufferAllocator allocator) { - FieldType fieldType = FieldType.nullable(new ArrowType.FixedSizeList(size)); - return new FixedSizeListVector(name, allocator, fieldType, null); - } - - private FieldVector vector; - private ArrowBuf validityBuffer; - private final int listSize; - private Field field; - - private UnionFixedSizeListReader reader; - private int valueCount; - private int validityAllocationSizeInBytes; - - /** - * Creates a new instance. - * - * @param name The name for the vector. - * @param allocator The allocator to use for creating/reallocating buffers for the vector. - * @param fieldType The underlying data type of the vector. - * @param unusedSchemaChangeCallback Currently unused. - */ - public FixedSizeListVector( - String name, - BufferAllocator allocator, - FieldType fieldType, - CallBack unusedSchemaChangeCallback) { - this(new Field(name, fieldType, null), allocator, unusedSchemaChangeCallback); - } - - /** - * Creates a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for creating/reallocating buffers for the vector. - * @param unusedSchemaChangeCallback Currently unused. - */ - public FixedSizeListVector( - Field field, BufferAllocator allocator, CallBack unusedSchemaChangeCallback) { - super(allocator); - - this.field = field; - this.validityBuffer = allocator.getEmpty(); - this.vector = ZeroVector.INSTANCE; - this.listSize = ((ArrowType.FixedSizeList) field.getFieldType().getType()).getListSize(); - Preconditions.checkArgument(listSize >= 0, "list size must be non-negative"); - this.valueCount = 0; - this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - } - - @Override - public Field getField() { - if (field.getChildren().contains(getDataVector().getField())) { - return field; - } - field = - new Field( - field.getName(), - field.getFieldType(), - Collections.singletonList(getDataVector().getField())); - return field; - } - - @Override - public MinorType getMinorType() { - return MinorType.FIXED_SIZE_LIST; - } - - @Override - public String getName() { - return field.getName(); - } - - /** Get the fixed size for each list. */ - public int getListSize() { - return listSize; - } - - @Override - public void initializeChildrenFromFields(List children) { - checkArgument( - children.size() == 1, - "Lists have one child Field. Found: %s", - children.isEmpty() ? "none" : children); - - Field field = children.get(0); - AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); - checkArgument( - addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); - - addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); - this.field = new Field(this.field.getName(), this.field.getFieldType(), children); - } - - @Override - public List getChildrenFromFields() { - return singletonList(vector); - } - - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 1) { - throw new IllegalArgumentException( - "Illegal buffer count, expected " + 1 + ", got: " + ownBuffers.size()); - } - - ArrowBuf bitBuffer = ownBuffers.get(0); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - valueCount = fieldNode.getLength(); - - validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); - } - - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(1); - setReaderAndWriterIndex(); - result.add(validityBuffer); - - return result; - } - - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - @Override - protected FieldReader getReaderImpl() { - return new UnionFixedSizeListReader(this); - } - - @Override - public UnionFixedSizeListReader getReader() { - reader = (UnionFixedSizeListReader) super.getReader(); - return reader; - } - - private void invalidateReader() { - reader = null; - } - - @Override - public void allocateNew() throws OutOfMemoryException { - if (!allocateNewSafe()) { - throw new OutOfMemoryException("Failure while allocating memory"); - } - } - - @Override - public boolean allocateNewSafe() { - /* boolean to keep track if all the memory allocation were successful - * Used in the case of composite vectors when we need to allocate multiple - * buffers for multiple vectors. If one of the allocations failed we need to - * clear all the memory that we allocated - */ - boolean success = false; - try { - /* we are doing a new allocation -- release the current buffers */ - clear(); - /* allocate validity buffer */ - allocateValidityBuffer(validityAllocationSizeInBytes); - success = vector.allocateNewSafe(); - } finally { - if (!success) { - clear(); - } - } - return success; - } - - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - @Override - public void reAlloc() { - reallocValidityBuffer(); - vector.reAlloc(); - } - - private void reallocValidityBuffer() { - final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); - long newAllocationSize = currentBufferCapacity * 2L; - if (newAllocationSize == 0) { - if (validityAllocationSizeInBytes > 0) { - newAllocationSize = validityAllocationSizeInBytes; - } else { - newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - validityBuffer.getReferenceManager().release(1); - validityBuffer = newBuf; - validityAllocationSizeInBytes = (int) newAllocationSize; - } - - public FieldVector getDataVector() { - return vector; - } - - /** - * Start a new value in the list vector. - * - * @param index index of the value to start - */ - public int startNewValue(int index) { - while (index >= getValidityBufferValueCapacity()) { - reallocValidityBuffer(); - } - - BitVectorHelper.setBit(validityBuffer, index); - return index * listSize; - } - - public UnionFixedSizeListWriter getWriter() { - return new UnionFixedSizeListWriter(this); - } - - @Override - public void setInitialCapacity(int numRecords) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - vector.setInitialCapacity(numRecords * listSize); - } - - @Override - public int getValueCapacity() { - if (vector == ZeroVector.INSTANCE || listSize == 0) { - return 0; - } - return Math.min(vector.getValueCapacity() / listSize, getValidityBufferValueCapacity()); - } - - @Override - public int getBufferSize() { - if (getValueCount() == 0) { - return 0; - } - return getValidityBufferSizeFromCount(valueCount) + vector.getBufferSize(); - } - - @Override - public int getBufferSizeFor(int valueCount) { - if (valueCount == 0) { - return 0; - } - return getValidityBufferSizeFromCount(valueCount) - + vector.getBufferSizeFor(valueCount * listSize); - } - - @Override - public Iterator iterator() { - return Collections.singleton(vector).iterator(); - } - - @Override - public void clear() { - validityBuffer = releaseBuffer(validityBuffer); - vector.clear(); - valueCount = 0; - super.clear(); - } - - @Override - public void reset() { - validityBuffer.setZero(0, validityBuffer.capacity()); - vector.reset(); - valueCount = 0; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them. Also, this won't clear the child - * buffers. - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - setReaderAndWriterIndex(); - final ArrowBuf[] buffers; - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - List list = new ArrayList<>(); - list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); - buffers = list.toArray(new ArrowBuf[list.size()]); - } - if (clear) { - for (ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - /** - * Get value indicating if inner vector is set. - * - * @return 1 if inner vector is explicitly set via #addOrGetVector else 0 - */ - public int size() { - return vector == ZeroVector.INSTANCE ? 0 : 1; - } - - @Override - @SuppressWarnings("unchecked") - public AddOrGetResult addOrGetVector(FieldType type) { - boolean created = false; - if (vector == ZeroVector.INSTANCE) { - vector = type.createNewSingleVector(DATA_VECTOR_NAME, allocator, null); - invalidateReader(); - created = true; - } - // returned vector must have the same field - if (!Objects.equals(vector.getField().getType(), type.getType())) { - final String msg = - String.format( - "Inner vector type mismatch. Requested type: [%s], actual type: [%s]", - type.getType(), vector.getField().getType()); - throw new SchemaChangeRuntimeException(msg); - } - - return new AddOrGetResult<>((T) vector, created); - } - - @Override - public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from); - } - - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - TransferPair pair = from.makeTransferPair(this); - pair.copyValueSafe(fromIndex, thisIndex); - } - - @Override - public UnionVector promoteToUnion() { - UnionVector vector = - new UnionVector(getName(), allocator, /* field type */ null, /* call-back */ null); - this.vector.clear(); - this.vector = vector; - invalidateReader(); - return vector; - } - - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getOffsetBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public List getObject(int index) { - if (isSet(index) == 0) { - return null; - } - final List vals = new JsonStringArrayList<>(listSize); - for (int i = 0; i < listSize; i++) { - vals.add(vector.getObject(index * listSize + i)); - } - return vals; - } - - /** Returns whether the value at index null. */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** Returns non-zero when the value at index is non-null. */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - @Override - public int getValueCount() { - return valueCount; - } - - /** Returns the number of elements the validity buffer can represent with its current capacity. */ - private int getValidityBufferValueCapacity() { - return capAtMaxInt(validityBuffer.capacity() * 8); - } - - /** Sets the value at index to null. Reallocates if index is larger than capacity. */ - @Override - public void setNull(int index) { - while (index >= getValidityBufferValueCapacity()) { - reallocValidityBuffer(); - } - BitVectorHelper.unsetBit(validityBuffer, index); - } - - /** Sets the value at index to not-null. Reallocates if index is larger than capacity. */ - public void setNotNull(int index) { - while (index >= getValidityBufferValueCapacity()) { - reallocValidityBuffer(); - } - BitVectorHelper.setBit(validityBuffer, index); - } - - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - while (valueCount > getValidityBufferValueCapacity()) { - reallocValidityBuffer(); - } - vector.setValueCount(valueCount * listSize); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return getTransferPair(field, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((FixedSizeListVector) target); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isSet(index) == 0) { - return ArrowBufPointer.NULL_HASH_CODE; - } - int hash = 0; - for (int i = 0; i < listSize; i++) { - hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(index * listSize + i, hasher)); - } - return hash; - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - @Override - public int getElementStartIndex(int index) { - return listSize * index; - } - - @Override - public int getElementEndIndex(int index) { - return listSize * (index + 1); - } - - private class TransferImpl implements TransferPair { - - FixedSizeListVector to; - TransferPair dataPair; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new FixedSizeListVector(name, allocator, field.getFieldType(), callBack)); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - this(new FixedSizeListVector(field, allocator, callBack)); - } - - public TransferImpl(FixedSizeListVector to) { - this.to = to; - if (!(vector instanceof ZeroVector)) { - to.addOrGetVector(vector.getField().getFieldType()); - } - dataPair = vector.makeTransferPair(to.vector); - } - - @Override - public void transfer() { - to.clear(); - dataPair.transfer(); - to.validityBuffer = BaseValueVector.transferBuffer(validityBuffer, to.allocator); - to.setValueCount(valueCount); - clear(); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - final int startPoint = listSize * startIndex; - final int sliceLength = listSize * length; - to.clear(); - - /* splitAndTransfer validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - /* splitAndTransfer data buffer */ - dataPair.splitAndTransfer(startPoint, sliceLength); - to.setValueCount(length); - } - - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, FixedSizeListVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - while (toIndex >= to.getValueCapacity()) { - to.reAlloc(); - } - BitVectorHelper.setValidityBit(to.validityBuffer, toIndex, isSet(fromIndex)); - int fromOffset = fromIndex * listSize; - int toOffset = toIndex * listSize; - for (int i = 0; i < listSize; i++) { - dataPair.copyValueSafe(fromOffset + i, toOffset + i); - } - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java deleted file mode 100644 index ed075352c931c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ /dev/null @@ -1,1127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static java.util.Collections.singletonList; -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.util.Preconditions.checkArgument; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.DensityAwareVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.complex.impl.UnionLargeListReader; -import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.SchemaChangeRuntimeException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A list vector contains lists of a specific type of elements. Its structure contains 3 elements. - * - *
      - *
    1. A validity buffer. - *
    2. An offset buffer, that denotes lists boundaries. - *
    3. A child data vector that contains the elements of lists. - *
    - * - * This is the LargeList variant of list, it has a 64-bit wide offset - * - *

    WARNING: Currently Arrow in Java doesn't support 64-bit vectors. This class follows the - * expected behaviour of a LargeList but doesn't actually support allocating a 64-bit vector. It has - * little use until 64-bit vectors are supported and should be used with caution. todo review - * checkedCastToInt usage in this class. Once int64 indexed vectors are supported these checks - * aren't needed. - */ -public class LargeListVector extends BaseValueVector - implements RepeatedValueVector, FieldVector, PromotableVector, ValueIterableVector> { - - public static LargeListVector empty(String name, BufferAllocator allocator) { - return new LargeListVector( - name, allocator, FieldType.nullable(ArrowType.LargeList.INSTANCE), null); - } - - public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; - public static final String DATA_VECTOR_NAME = "$data$"; - - public static final byte OFFSET_WIDTH = 8; - protected ArrowBuf offsetBuffer; - protected FieldVector vector; - protected final CallBack callBack; - protected int valueCount; - protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; - - protected String defaultDataVectorName = DATA_VECTOR_NAME; - protected ArrowBuf validityBuffer; - protected UnionLargeListReader reader; - private Field field; - private int validityAllocationSizeInBytes; - - /** The maximum index that is actually set. */ - private int lastSet; - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param fieldType The type of this list. - * @param callBack A schema change callback. - */ - public LargeListVector( - String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - this(new Field(name, fieldType, null), allocator, callBack); - } - - /** - * Creates a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for creating/reallocating buffers for the vector. - * @param callBack A schema change callback. - */ - public LargeListVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(allocator); - this.field = field; - this.validityBuffer = allocator.getEmpty(); - this.callBack = callBack; - this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - this.lastSet = -1; - this.offsetBuffer = allocator.getEmpty(); - this.vector = vector == null ? DEFAULT_DATA_VECTOR : vector; - this.valueCount = 0; - } - - @Override - public void initializeChildrenFromFields(List children) { - checkArgument( - children.size() == 1, - "Lists have one child Field. Found: %s", - children.isEmpty() ? "none" : children); - - Field field = children.get(0); - AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); - checkArgument( - addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); - - addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); - this.field = new Field(this.field.getName(), this.field.getFieldType(), children); - } - - @Override - public void setInitialCapacity(int numRecords) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - offsetAllocationSizeInBytes = (long) (numRecords + 1) * OFFSET_WIDTH; - if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { - vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); - } else { - vector.setInitialCapacity(numRecords); - } - } - - /** - * Specialized version of setInitialCapacity() for ListVector. This is used by some callers when - * they want to explicitly control and be conservative about memory allocated for inner data - * vector. This is very useful when we are working with memory constraints for a query and have a - * fixed amount of memory reserved for the record batch. In such cases, we are likely to face OOM - * or related problems when we reserve memory for a record batch with value count x and do - * setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param density density of ListVector. Density is the average size of list per position in the - * List vector. For example, a density value of 10 implies each position in the list vector - * has a list of 10 values. A density value of 0.1 implies out of 10 positions in the list - * vector, 1 position has a list of size 1 and remaining positions are null (no lists) or - * empty lists. This helps in tightly controlling the memory we provision for inner data - * vector. - */ - @Override - public void setInitialCapacity(int numRecords, double density) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - if ((numRecords * density) >= Integer.MAX_VALUE) { - throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); - } - - offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH; - - int innerValueCapacity = Math.max((int) (numRecords * density), 1); - - if (vector instanceof DensityAwareVector) { - ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); - } else { - vector.setInitialCapacity(innerValueCapacity); - } - } - - /** - * Specialized version of setInitialTotalCapacity() for ListVector. This is used by some callers - * when they want to explicitly control and be conservative about memory allocated for inner data - * vector. This is very useful when we are working with memory constraints for a query and have a - * fixed amount of memory reserved for the record batch. In such cases, we are likely to face OOM - * or related problems when we reserve memory for a record batch with value count x and do - * setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param totalNumberOfElements the total number of elements to to allow for in this vector across - * all records. - */ - public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { - offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH; - vector.setInitialCapacity(totalNumberOfElements); - } - - /** - * Get the density of this ListVector. - * - * @return density - */ - public double getDensity() { - if (valueCount == 0) { - return 0.0D; - } - final long startOffset = offsetBuffer.getLong(0L); - final long endOffset = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH); - final double totalListSize = endOffset - startOffset; - return totalListSize / valueCount; - } - - @Override - public List getChildrenFromFields() { - return singletonList(getDataVector()); - } - - /** - * Load the buffers of this vector with provided source buffers. The caller manages the source - * buffers and populates them before invoking this method. - * - * @param fieldNode the fieldNode indicating the value count - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 2) { - throw new IllegalArgumentException( - "Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size()); - } - - ArrowBuf bitBuffer = ownBuffers.get(0); - ArrowBuf offBuffer = ownBuffers.get(1); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); - - validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); - offsetAllocationSizeInBytes = offsetBuffer.capacity(); - - lastSet = fieldNode.getLength() - 1; - valueCount = fieldNode.getLength(); - } - - /** - * Get the buffers belonging to this vector. - * - * @return the inner buffers. - */ - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(2); - setReaderAndWriterIndex(); - result.add(validityBuffer); - result.add(offsetBuffer); - - return result; - } - - /** - * Export the buffers of the fields for C Data Interface. This method traverse the buffers and - * export buffer and buffer's memory address into a list of buffers and a pointer to the list of - * buffers. - */ - @Override - public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); - - if (offsetBuffer.capacity() == 0) { - // Empty offset buffer is allowed for historical reason. - // To export it through C Data interface, we need to allocate a buffer with one offset. - // We set `retain = false` to explicitly not increase the ref count for the exported buffer. - // The ref count of the newly created buffer (i.e., 1) already represents the usage - // at imported side. - exportBuffer(allocateOffsetBuffer(OFFSET_WIDTH), buffers, buffersPtr, nullValue, false); - } else { - exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true); - } - } - - /** Set the reader and writer indexes for the inner buffers. */ - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - offsetBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); - } else { - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); - } - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - /** Same as {@link #allocateNewSafe()}. */ - @Override - public void allocateNew() throws OutOfMemoryException { - if (!allocateNewSafe()) { - throw new OutOfMemoryException("Failure while allocating memory"); - } - } - - /** - * Allocate memory for the vector. We internally use a default value count of 4096 to allocate - * memory for at least these many elements in the vector. - * - * @return false if memory allocation fails, true otherwise. - */ - @Override - public boolean allocateNewSafe() { - boolean success = false; - try { - /* we are doing a new allocation -- release the current buffers */ - clear(); - /* allocate validity buffer */ - allocateValidityBuffer(validityAllocationSizeInBytes); - /* allocate offset and data buffer */ - boolean dataAlloc = false; - try { - offsetBuffer = allocateOffsetBuffer(offsetAllocationSizeInBytes); - dataAlloc = vector.allocateNewSafe(); - } catch (Exception e) { - e.printStackTrace(); - clear(); - success = false; - } finally { - if (!dataAlloc) { - clear(); - } - } - success = dataAlloc; - } finally { - if (!success) { - clear(); - } - } - return success; - } - - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - protected ArrowBuf allocateOffsetBuffer(final long size) { - ArrowBuf offsetBuffer = allocator.buffer(size); - offsetBuffer.readerIndex(0); - offsetAllocationSizeInBytes = size; - offsetBuffer.setZero(0, offsetBuffer.capacity()); - return offsetBuffer; - } - - /** - * Resize the vector to increase the capacity. The internal behavior is to double the current - * value capacity. - */ - @Override - public void reAlloc() { - /* reallocate the validity buffer */ - reallocValidityBuffer(); - /* reallocate the offset and data */ - reallocOffsetBuffer(); - vector.reAlloc(); - } - - private void reallocValidityAndOffsetBuffers() { - reallocOffsetBuffer(); - reallocValidityBuffer(); - } - - protected void reallocOffsetBuffer() { - final long currentBufferCapacity = offsetBuffer.capacity(); - long newAllocationSize = currentBufferCapacity * 2; - if (newAllocationSize == 0) { - if (offsetAllocationSizeInBytes > 0) { - newAllocationSize = offsetAllocationSizeInBytes; - } else { - newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; - } - } - - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - offsetBuffer.getReferenceManager().release(1); - offsetBuffer = newBuf; - offsetAllocationSizeInBytes = newAllocationSize; - } - - private void reallocValidityBuffer() { - final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); - long newAllocationSize = currentBufferCapacity * 2L; - if (newAllocationSize == 0) { - if (validityAllocationSizeInBytes > 0) { - newAllocationSize = validityAllocationSizeInBytes; - } else { - newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; - } - } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - validityBuffer.getReferenceManager().release(1); - validityBuffer = newBuf; - validityAllocationSizeInBytes = (int) newAllocationSize; - } - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param inIndex position to copy from in source vector - * @param outIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from); - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param inIndex position to copy from in source vector - * @param outIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFrom(int inIndex, int outIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - FieldReader in = from.getReader(); - in.setPosition(inIndex); - UnionLargeListWriter out = getWriter(); - out.setPosition(outIndex); - ComplexCopier.copy(in, out); - } - - /** - * Get the offset vector. - * - * @return the underlying offset vector or null if none exists. - * @deprecated This API will be removed, as the current implementations no longer hold inner - * offset vectors. - */ - @Override - @Deprecated - public UInt4Vector getOffsetVector() { - throw new UnsupportedOperationException("There is no inner offset vector"); - } - - /** - * Get the inner data vector for this list vector. - * - * @return data vector - */ - @Override - public FieldVector getDataVector() { - return vector; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return getTransferPair(field, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((LargeListVector) target); - } - - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getOffsetBufferAddress() { - return offsetBuffer.memoryAddress(); - } - - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - return offsetBuffer; - } - - @Override - public int getValueCount() { - return valueCount; - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isSet(index) == 0) { - return ArrowBufPointer.NULL_HASH_CODE; - } - int hash = 0; - final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH); - final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); - for (long i = start; i < end; i++) { - hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(checkedCastToInt(i), hasher)); - } - return hash; - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - public UnionLargeListWriter getWriter() { - return new UnionLargeListWriter(this); - } - - protected void replaceDataVector(FieldVector v) { - vector.clear(); - vector = v; - } - - @Override - public UnionVector promoteToUnion() { - UnionVector vector = new UnionVector("$data$", allocator, /* field type */ null, callBack); - replaceDataVector(vector); - invalidateReader(); - if (callBack != null) { - callBack.doWork(); - } - return vector; - } - - private class TransferImpl implements TransferPair { - - LargeListVector to; - TransferPair dataTransferPair; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new LargeListVector(name, allocator, field.getFieldType(), callBack)); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - this(new LargeListVector(field, allocator, callBack)); - } - - public TransferImpl(LargeListVector to) { - this.to = to; - to.addOrGetVector(vector.getField().getFieldType()); - if (to.getDataVector() instanceof ZeroVector) { - to.addOrGetVector(vector.getField().getFieldType()); - } - dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); - } - - /** - * Transfer this vector'data to another vector. The memory associated with this vector is - * transferred to the allocator of target vector for accounting and management purposes. - */ - @Override - public void transfer() { - to.clear(); - dataTransferPair.transfer(); - to.validityBuffer = transferBuffer(validityBuffer, to.allocator); - to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); - to.lastSet = lastSet; - if (valueCount > 0) { - to.setValueCount(valueCount); - } - clear(); - } - - /** - * Slice this vector at desired index and length and transfer the corresponding data to the - * target vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - */ - @Override - public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - final long startPoint = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH); - final long sliceLength = - offsetBuffer.getLong((long) (startIndex + length) * OFFSET_WIDTH) - startPoint; - to.clear(); - to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); - /* splitAndTransfer offset buffer */ - for (int i = 0; i < length + 1; i++) { - final long relativeOffset = - offsetBuffer.getLong((long) (startIndex + i) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeOffset); - } - /* splitAndTransfer validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - /* splitAndTransfer data buffer */ - dataTransferPair.splitAndTransfer( - checkedCastToInt(startPoint), checkedCastToInt(sliceLength)); - to.lastSet = length - 1; - to.setValueCount(length); - } - - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, LargeListVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - this.to.copyFrom(from, to, LargeListVector.this); - } - } - - @Override - protected FieldReader getReaderImpl() { - return new UnionLargeListReader(this); - } - - @Override - public UnionLargeListReader getReader() { - reader = (UnionLargeListReader) super.getReader(); - return reader; - } - - /** - * Initialize the data vector (and execute callback) if it hasn't already been done, returns the - * data vector. - */ - @Override - public AddOrGetResult addOrGetVector(FieldType fieldType) { - boolean created = false; - if (vector instanceof NullVector) { - vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, callBack); - // returned vector must have the same field - created = true; - if (callBack != null - && - // not a schema change if changing from ZeroVector to ZeroVector - (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) { - callBack.doWork(); - } - } - - if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { - final String msg = - String.format( - "Inner vector type mismatch. Requested type: [%s], actual type: [%s]", - fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); - throw new SchemaChangeRuntimeException(msg); - } - - invalidateReader(); - return new AddOrGetResult<>((T) vector, created); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH; - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - return offsetBufferSize + validityBufferSize + vector.getBufferSize(); - } - - @Override - public int getBufferSizeFor(int valueCount) { - if (valueCount == 0) { - return 0; - } - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - long innerVectorValueCount = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH); - - return ((valueCount + 1) * OFFSET_WIDTH) - + vector.getBufferSizeFor(checkedCastToInt(innerVectorValueCount)) - + validityBufferSize; - } - - @Override - public Field getField() { - if (field.getChildren().contains(getDataVector().getField())) { - return field; - } - field = - new Field( - field.getName(), - field.getFieldType(), - Collections.singletonList(getDataVector().getField())); - return field; - } - - @Override - public MinorType getMinorType() { - return MinorType.LARGELIST; - } - - @Override - public String getName() { - return field.getName(); - } - - @Override - public void clear() { - offsetBuffer = releaseBuffer(offsetBuffer); - vector.clear(); - valueCount = 0; - super.clear(); - validityBuffer = releaseBuffer(validityBuffer); - lastSet = -1; - } - - @Override - public void reset() { - offsetBuffer.setZero(0, offsetBuffer.capacity()); - vector.reset(); - valueCount = 0; - validityBuffer.setZero(0, validityBuffer.capacity()); - lastSet = -1; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them. Also, this won't clear the child - * buffers. - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - setReaderAndWriterIndex(); - final ArrowBuf[] buffers; - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - List list = new ArrayList<>(); - list.add(offsetBuffer); - list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); - buffers = list.toArray(new ArrowBuf[list.size()]); - } - if (clear) { - for (ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - protected void invalidateReader() { - reader = null; - } - - /** - * Get the element in the list vector at a particular index. - * - * @param index position of the element - * @return Object at given position - */ - @Override - public List getObject(int index) { - if (isSet(index) == 0) { - return null; - } - final List vals = new JsonStringArrayList<>(); - final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH); - final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); - final ValueVector vv = getDataVector(); - for (long i = start; i < end; i++) { - vals.add(vv.getObject(checkedCastToInt(i))); - } - - return vals; - } - - /** - * Check if element at given index is null. - * - * @param index position of element - * @return true if element at given index is null, false otherwise - */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** - * Check if element at given index is empty list. - * - * @param index position of element - * @return true if element at given index is empty list or NULL, false otherwise - */ - public boolean isEmpty(int index) { - if (isNull(index)) { - return true; - } else { - final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH); - final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); - return start == end; - } - } - - /** - * Same as {@link #isNull(int)}. - * - * @param index position of element - * @return 1 if element at given index is not null, 0 otherwise - */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Get the number of elements that are null in the vector. - * - * @return the number of null elements. - */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** - * Get the current value capacity for the vector. - * - * @return number of elements that vector can hold. - */ - @Override - public int getValueCapacity() { - return getValidityAndOffsetValueCapacity(); - } - - protected int getOffsetBufferValueCapacity() { - return checkedCastToInt(offsetBuffer.capacity() / OFFSET_WIDTH); - } - - private int getValidityAndOffsetValueCapacity() { - final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); - return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); - } - - private int getValidityBufferValueCapacity() { - return capAtMaxInt(validityBuffer.capacity() * 8); - } - - /** - * Sets the list at index to be not-null. Reallocates validity buffer if index is larger than - * current capacity. - */ - public void setNotNull(int index) { - while (index >= getValidityAndOffsetValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - BitVectorHelper.setBit(validityBuffer, index); - lastSet = index; - } - - /** - * Sets list at index to be null. - * - * @param index position in vector - */ - @Override - public void setNull(int index) { - while (index >= getValidityAndOffsetValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - if (lastSet >= index) { - lastSet = index - 1; - } - for (int i = lastSet + 1; i <= index; i++) { - final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); - offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset); - } - BitVectorHelper.unsetBit(validityBuffer, index); - } - - /** - * Start a new value in the list vector. - * - * @param index index of the value to start - */ - public long startNewValue(long index) { - while (index >= getValidityAndOffsetValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - for (int i = lastSet + 1; i <= index; i++) { - final long currentOffset = offsetBuffer.getLong((long) i * OFFSET_WIDTH); - offsetBuffer.setLong(((long) i + 1L) * OFFSET_WIDTH, currentOffset); - } - BitVectorHelper.setBit(validityBuffer, index); - lastSet = checkedCastToInt(index); - return offsetBuffer.getLong(((long) lastSet + 1L) * OFFSET_WIDTH); - } - - /** - * End the current value. - * - * @param index index of the value to end - * @param size number of elements in the list that was written - */ - public void endValue(int index, long size) { - final long currentOffset = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); - offsetBuffer.setLong(((long) index + 1L) * OFFSET_WIDTH, currentOffset + size); - } - - /** - * Sets the value count for the vector. - * - *

    Important note: The underlying vector does not support 64-bit allocations yet. This may - * throw if attempting to hold larger than what a 32-bit vector can store. - * - * @param valueCount value count - */ - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - if (valueCount > 0) { - while (valueCount > getValidityAndOffsetValueCapacity()) { - /* check if validity and offset buffers need to be re-allocated */ - reallocValidityAndOffsetBuffers(); - } - for (int i = lastSet + 1; i < valueCount; i++) { - /* fill the holes with offsets */ - final long currentOffset = offsetBuffer.getLong((long) i * OFFSET_WIDTH); - offsetBuffer.setLong(((long) i + 1L) * OFFSET_WIDTH, currentOffset); - } - } - /* valueCount for the data vector is the current end offset */ - final long childValueCount = - (valueCount == 0) ? 0 : offsetBuffer.getLong(((long) lastSet + 1L) * OFFSET_WIDTH); - /* set the value count of data vector and this will take care of - * checking whether data buffer needs to be reallocated. - * TODO: revisit when 64-bit vectors are supported - */ - Preconditions.checkArgument( - childValueCount <= Integer.MAX_VALUE || childValueCount >= Integer.MIN_VALUE, - "LargeListVector doesn't yet support 64-bit allocations: %s", - childValueCount); - vector.setValueCount((int) childValueCount); - } - - public void setLastSet(int value) { - lastSet = value; - } - - public int getLastSet() { - return lastSet; - } - - public long getElementStartIndex(int index) { - return offsetBuffer.getLong((long) index * OFFSET_WIDTH); - } - - public long getElementEndIndex(int index) { - return offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java deleted file mode 100644 index 84c6f03edb25d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ /dev/null @@ -1,1029 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static java.util.Collections.singletonList; -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.util.Preconditions.checkArgument; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.UnionLargeListViewReader; -import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionListReader; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A large list view vector contains lists of a specific type of elements. Its structure contains 3 - * elements. - * - *

      - *
    1. A validity buffer. - *
    2. An offset buffer, that denotes lists starting positions. - *
    3. A size buffer, that denotes sizes of the lists. - *
    4. A child data vector that contains the elements of lists. - *
    - * - * This is the LargeListView variant of listview, it has a 64-bit wide offset - * - *

    WARNING: Currently Arrow in Java doesn't support 64-bit vectors. This class follows the - * expected behaviour of a LargeList but doesn't actually support allocating a 64-bit vector. It has - * little use until 64-bit vectors are supported and should be used with caution. todo review - * checkedCastToInt usage in this class. Once int64 indexed vectors are supported these checks - * aren't needed. - */ -public class LargeListViewVector extends BaseLargeRepeatedValueViewVector - implements PromotableVector, ValueIterableVector> { - - protected ArrowBuf validityBuffer; - protected UnionLargeListViewReader reader; - private CallBack callBack; - protected Field field; - protected int validityAllocationSizeInBytes; - - public static LargeListViewVector empty(String name, BufferAllocator allocator) { - return new LargeListViewVector( - name, allocator, FieldType.nullable(ArrowType.LargeListView.INSTANCE), null); - } - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param fieldType The type of this list. - * @param callBack A schema change callback. - */ - public LargeListViewVector( - String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - this(new Field(name, fieldType, null), allocator, callBack); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param callBack A schema change callback. - */ - public LargeListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(field.getName(), allocator, callBack); - this.validityBuffer = allocator.getEmpty(); - this.field = field; - this.callBack = callBack; - this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - } - - @Override - public void initializeChildrenFromFields(List children) { - checkArgument( - children.size() == 1, - "ListViews have one child Field. Found: %s", - children.isEmpty() ? "none" : children); - - Field field = children.get(0); - AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); - checkArgument( - addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); - - addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); - this.field = new Field(this.field.getName(), this.field.getFieldType(), children); - } - - @Override - public void setInitialCapacity(int numRecords) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialCapacity(numRecords); - } - - /** - * Specialized version of setInitialCapacity() for LargeListViewVector. This is used by some - * callers when they want to explicitly control and be conservative about memory allocated for - * inner data vector. This is very useful when we are working with memory constraints for a query - * and have a fixed amount of memory reserved for the record batch. In such cases, we are likely - * to face OOM or related problems when we reserve memory for a record batch with value count x - * and do setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount, but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param density density of LargeListViewVector. Density is the average size of a list per - * position in the LargeListViewVector. For example, a density value of 10 implies each - * position in the list vector has a list of 10 values. A density value of 0.1 implies out of - * 10 positions in the list vector, 1 position has a list of size 1, and the remaining - * positions are null (no lists) or empty lists. This helps in tightly controlling the memory - * we provision for inner data vector. - */ - @Override - public void setInitialCapacity(int numRecords, double density) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialCapacity(numRecords, density); - } - - /** - * Specialized version of setInitialTotalCapacity() for LargeListViewVector. This is used by some - * callers when they want to explicitly control and be conservative about memory allocated for - * inner data vector. This is very useful when we are working with memory constraints for a query - * and have a fixed amount of memory reserved for the record batch. In such cases, we are likely - * to face OOM or related problems when we reserve memory for a record batch with value count x - * and do setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount, but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param totalNumberOfElements the total number of elements to allow for in this vector across - * all records. - */ - @Override - public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialTotalCapacity(numRecords, totalNumberOfElements); - } - - @Override - public List getChildrenFromFields() { - return singletonList(getDataVector()); - } - - /** - * Load the buffers associated with this Field. - * - * @param fieldNode the fieldNode - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 3) { - throw new IllegalArgumentException( - "Illegal buffer count, expected " + 3 + ", got: " + ownBuffers.size()); - } - - ArrowBuf bitBuffer = ownBuffers.get(0); - ArrowBuf offBuffer = ownBuffers.get(1); - ArrowBuf szBuffer = ownBuffers.get(2); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); - sizeBuffer.getReferenceManager().release(); - sizeBuffer = szBuffer.getReferenceManager().retain(szBuffer, allocator); - - validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); - offsetAllocationSizeInBytes = offsetBuffer.capacity(); - sizeAllocationSizeInBytes = sizeBuffer.capacity(); - - valueCount = fieldNode.getLength(); - } - - /** Set the reader and writer indexes for the inner buffers. */ - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - offsetBuffer.readerIndex(0); - sizeBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); - sizeBuffer.writerIndex(0); - } else { - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH); - sizeBuffer.writerIndex((long) valueCount * SIZE_WIDTH); - } - } - - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(2); - setReaderAndWriterIndex(); - result.add(validityBuffer); - result.add(offsetBuffer); - result.add(sizeBuffer); - - return result; - } - - /** - * Export the buffers of the fields for C Data Interface. This method traverses the buffers and - * export buffer and buffer's memory address into a list of buffers and a pointer to the list of - * buffers. - */ - @Override - public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); - exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true); - exportBuffer(sizeBuffer, buffers, buffersPtr, nullValue, true); - } - - @Override - public void allocateNew() throws OutOfMemoryException { - if (!allocateNewSafe()) { - throw new OutOfMemoryException("Failure while allocating memory"); - } - } - - @Override - public boolean allocateNewSafe() { - boolean success = false; - try { - /* release the current buffers, hence this is a new allocation - * Note that, the `clear` method call below is releasing validityBuffer - * calling the superclass clear method which is releasing the associated buffers - * (sizeBuffer and offsetBuffer). - */ - clear(); - /* allocate validity buffer */ - allocateValidityBuffer(validityAllocationSizeInBytes); - /* allocate offset, data and sizes buffer */ - success = super.allocateNewSafe(); - } finally { - if (!success) { - clear(); - } - } - return success; - } - - protected void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - @Override - public void reAlloc() { - /* reallocate the validity buffer */ - reallocValidityBuffer(); - /* reallocate the offset, size, and data */ - super.reAlloc(); - } - - protected void reallocValidityAndSizeAndOffsetBuffers() { - reallocateBuffers(); - reallocValidityBuffer(); - } - - private void reallocValidityBuffer() { - final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); - long newAllocationSize = getNewAllocationSize(currentBufferCapacity); - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - validityBuffer.getReferenceManager().release(1); - validityBuffer = newBuf; - validityAllocationSizeInBytes = (int) newAllocationSize; - } - - private long getNewAllocationSize(int currentBufferCapacity) { - long newAllocationSize = currentBufferCapacity * 2L; - if (newAllocationSize == 0) { - if (validityAllocationSizeInBytes > 0) { - newAllocationSize = validityAllocationSizeInBytes; - } else { - newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; - } - } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - return newAllocationSize; - } - - @Override - public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { - throw new UnsupportedOperationException( - "LargeListViewVector does not support copyFromSafe operation yet."); - } - - @Override - public void copyFrom(int inIndex, int outIndex, ValueVector from) { - throw new UnsupportedOperationException( - "LargeListViewVector does not support copyFrom operation yet."); - } - - @Override - public FieldVector getDataVector() { - return vector; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return getTransferPair(field, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((LargeListViewVector) target); - } - - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getOffsetBufferAddress() { - return offsetBuffer.memoryAddress(); - } - - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - return offsetBuffer; - } - - public ArrowBuf getSizeBuffer() { - return sizeBuffer; - } - - public long getSizeBufferAddress() { - return sizeBuffer.memoryAddress(); - } - - /** - * Get the hash code for the element at the given index. - * - * @param index position of the element - * @return hash code for the element at the given index - */ - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - /** - * Get the hash code for the element at the given index. - * - * @param index position of the element - * @param hasher hasher to use - * @return hash code for the element at the given index - */ - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isSet(index) == 0) { - return ArrowBufPointer.NULL_HASH_CODE; - } - int hash = 0; - final int start = offsetBuffer.getInt((long) index * OFFSET_WIDTH); - final int end = sizeBuffer.getInt((long) index * OFFSET_WIDTH); - for (int i = start; i < end; i++) { - hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(checkedCastToInt(i), hasher)); - } - return hash; - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - private class TransferImpl implements TransferPair { - - LargeListViewVector to; - TransferPair dataTransferPair; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new LargeListViewVector(name, allocator, field.getFieldType(), callBack)); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - this(new LargeListViewVector(field, allocator, callBack)); - } - - public TransferImpl(LargeListViewVector to) { - this.to = to; - to.addOrGetVector(vector.getField().getFieldType()); - if (to.getDataVector() instanceof ZeroVector) { - to.addOrGetVector(vector.getField().getFieldType()); - } - dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); - } - - @Override - public void transfer() { - to.clear(); - dataTransferPair.transfer(); - to.validityBuffer = transferBuffer(validityBuffer, to.allocator); - to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); - to.sizeBuffer = transferBuffer(sizeBuffer, to.allocator); - if (valueCount > 0) { - to.setValueCount(valueCount); - } - clear(); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - to.clear(); - if (length > 0) { - // we have to scan by index since there are out-of-order offsets - to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH); - to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH); - - /* splitAndTransfer the size buffer */ - int maxOffsetAndSizeSum = Integer.MIN_VALUE; - int minOffsetValue = Integer.MAX_VALUE; - for (int i = 0; i < length; i++) { - final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); - final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH); - to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue); - maxOffsetAndSizeSum = Math.max(maxOffsetAndSizeSum, offsetValue + sizeValue); - minOffsetValue = Math.min(minOffsetValue, offsetValue); - } - - /* splitAndTransfer the offset buffer */ - for (int i = 0; i < length; i++) { - final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); - final int relativeOffset = offsetValue - minOffsetValue; - to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset); - } - - /* splitAndTransfer the validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - - /* splitAndTransfer the data buffer */ - final int childSliceLength = maxOffsetAndSizeSum - minOffsetValue; - dataTransferPair.splitAndTransfer(minOffsetValue, childSliceLength); - to.setValueCount(length); - } - } - - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer( - int startIndex, int length, LargeListViewVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - this.to.copyFrom(from, to, LargeListViewVector.this); - } - } - - @Override - protected FieldReader getReaderImpl() { - throw new UnsupportedOperationException( - "LargeListViewVector does not support getReaderImpl operation yet."); - } - - @Override - public UnionListReader getReader() { - throw new UnsupportedOperationException( - "LargeListViewVector does not support getReader operation yet."); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - final int offsetBufferSize = valueCount * OFFSET_WIDTH; - final int sizeBufferSize = valueCount * SIZE_WIDTH; - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - return offsetBufferSize + sizeBufferSize + validityBufferSize + vector.getBufferSize(); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this. - * - * @param valueCount the number of values to assume this vector contains - * @return size of underlying buffers. - */ - @Override - public int getBufferSizeFor(int valueCount) { - if (valueCount == 0) { - return 0; - } - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - - return super.getBufferSizeFor(valueCount) + validityBufferSize; - } - - /** - * Get the field associated with the list view vector. - * - * @return the field - */ - @Override - public Field getField() { - if (field.getChildren().contains(getDataVector().getField())) { - return field; - } - field = - new Field( - field.getName(), - field.getFieldType(), - Collections.singletonList(getDataVector().getField())); - return field; - } - - /** - * Get the minor type for the vector. - * - * @return the minor type - */ - @Override - public MinorType getMinorType() { - return MinorType.LARGELISTVIEW; - } - - /** Clear the vector data. */ - @Override - public void clear() { - // calling superclass clear method which is releasing the sizeBufer and offsetBuffer - super.clear(); - validityBuffer = releaseBuffer(validityBuffer); - } - - /** Release the buffers associated with this vector. */ - @Override - public void reset() { - super.reset(); - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them. Also, this won't clear the child - * buffers. - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - setReaderAndWriterIndex(); - final ArrowBuf[] buffers; - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - List list = new ArrayList<>(); - // the order must be validity, offset and size buffers - list.add(validityBuffer); - list.add(offsetBuffer); - list.add(sizeBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); - buffers = list.toArray(new ArrowBuf[list.size()]); - } - if (clear) { - for (ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - /** - * Get the element in the list view vector at a particular index. - * - * @param index position of the element - * @return Object at given position - */ - @Override - public List getObject(int index) { - if (isSet(index) == 0) { - return null; - } - final List vals = new JsonStringArrayList<>(); - final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); - final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); - final ValueVector vv = getDataVector(); - for (int i = start; i < end; i++) { - vals.add(vv.getObject(checkedCastToInt(i))); - } - - return vals; - } - - /** - * Check if an element at given index is null. - * - * @param index position of an element - * @return true if an element at given index is null, false otherwise - */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** - * Check if an element at given index is an empty list. - * - * @param index position of an element - * @return true if an element at given index is an empty list or NULL, false otherwise - */ - @Override - public boolean isEmpty(int index) { - if (isNull(index)) { - return true; - } else { - return sizeBuffer.getInt(index * SIZE_WIDTH) == 0; - } - } - - /** - * Same as {@link #isNull(int)}. - * - * @param index position of the element - * @return 1 if element at given index is not null, 0 otherwise - */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Get the number of elements that are null in the vector. - * - * @return the number of null elements. - */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** - * Get the value capacity by considering validity and offset capacity. Note that the size buffer - * capacity is not considered here since it has the same capacity as the offset buffer. - * - * @return the value capacity - */ - @Override - public int getValueCapacity() { - return getValidityAndOffsetValueCapacity(); - } - - private int getValidityAndSizeValueCapacity() { - final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); - final int sizeValueCapacity = Math.max(getSizeBufferValueCapacity(), 0); - return Math.min(offsetValueCapacity, sizeValueCapacity); - } - - private int getValidityAndOffsetValueCapacity() { - final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); - return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); - } - - private int getValidityBufferValueCapacity() { - return capAtMaxInt(validityBuffer.capacity() * 8); - } - - /** - * Set the element at the given index to null. - * - * @param index the value to change - */ - @Override - public void setNull(int index) { - while (index >= getValidityAndSizeValueCapacity()) { - reallocValidityAndSizeAndOffsetBuffers(); - } - - offsetBuffer.setInt(index * OFFSET_WIDTH, 0); - sizeBuffer.setInt(index * SIZE_WIDTH, 0); - BitVectorHelper.unsetBit(validityBuffer, index); - } - - /** - * Start new value in the ListView vector. - * - * @param index index of the value to start - * @return offset of the new value - */ - @Override - public int startNewValue(int index) { - while (index >= getValidityAndSizeValueCapacity()) { - reallocValidityAndSizeAndOffsetBuffers(); - } - - if (index > 0) { - final int prevOffset = getMaxViewEndChildVectorByIndex(index); - offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); - } - - BitVectorHelper.setBit(validityBuffer, index); - return offsetBuffer.getInt(index * OFFSET_WIDTH); - } - - /** - * Validate the invariants of the offset and size buffers. 0 <= offsets[i] <= length of the child - * array 0 <= offsets[i] + size[i] <= length of the child array - * - * @param offset the offset at a given index - * @param size the size at a given index - */ - private void validateInvariants(int offset, int size) { - if (offset < 0) { - throw new IllegalArgumentException("Offset cannot be negative"); - } - - if (size < 0) { - throw new IllegalArgumentException("Size cannot be negative"); - } - - // 0 <= offsets[i] <= length of the child array - if (offset > this.vector.getValueCount()) { - throw new IllegalArgumentException("Offset is out of bounds."); - } - - // 0 <= offsets[i] + size[i] <= length of the child array - if (offset + size > this.vector.getValueCount()) { - throw new IllegalArgumentException("Offset + size <= length of the child array."); - } - } - - /** - * Set the offset at the given index. Make sure to use this function after updating `field` vector - * and using `setValidity` - * - * @param index index of the value to set - * @param value value to set - */ - public void setOffset(int index, int value) { - validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH)); - - offsetBuffer.setInt(index * OFFSET_WIDTH, value); - } - - /** - * Set the size at the given index. Make sure to use this function after using `setOffset`. - * - * @param index index of the value to set - * @param value value to set - */ - public void setSize(int index, int value) { - validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value); - - sizeBuffer.setInt(index * SIZE_WIDTH, value); - } - - /** - * Set the validity at the given index. - * - * @param index index of the value to set - * @param value value to set (0 for unset and 1 for a set) - */ - public void setValidity(int index, int value) { - if (value == 0) { - BitVectorHelper.unsetBit(validityBuffer, index); - } else { - BitVectorHelper.setBit(validityBuffer, index); - } - } - - /** - * Sets the value count for the vector. - * - *

    Important note: The underlying vector does not support 64-bit allocations yet. This may - * throw if attempting to hold larger than what a 32-bit vector can store. - * - * @param valueCount value count - */ - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - if (valueCount > 0) { - while (valueCount > getValidityAndSizeValueCapacity()) { - /* check if validity and offset buffers need to be re-allocated */ - reallocValidityAndSizeAndOffsetBuffers(); - } - } - /* valueCount for the data vector is the current end offset */ - final long childValueCount = (valueCount == 0) ? 0 : getMaxViewEndChildVector(); - /* set the value count of data vector and this will take care of - * checking whether data buffer needs to be reallocated. - * TODO: revisit when 64-bit vectors are supported - */ - Preconditions.checkArgument( - childValueCount <= Integer.MAX_VALUE && childValueCount >= 0, - "LargeListViewVector doesn't yet support 64-bit allocations: %s", - childValueCount); - vector.setValueCount((int) childValueCount); - } - - @Override - public AddOrGetResult addOrGetVector(FieldType fieldType) { - AddOrGetResult result = super.addOrGetVector(fieldType); - invalidateReader(); - return result; - } - - @Override - public UnionVector promoteToUnion() { - UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack); - replaceDataVector(vector); - invalidateReader(); - if (callBack != null) { - callBack.doWork(); - } - return vector; - } - - private void invalidateReader() { - reader = null; - } - - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - public UnionLargeListViewWriter getWriter() { - return new UnionLargeListViewWriter(this); - } - - @Override - public int getValueCount() { - return valueCount; - } - - /** - * Get the density of this LargeListViewVector. - * - * @return density - */ - public double getDensity() { - if (valueCount == 0) { - return 0.0D; - } - final double totalListSize = getMaxViewEndChildVector(); - return totalListSize / valueCount; - } - - /** Validating LargeListViewVector creation based on the specification guideline. */ - @Override - public void validate() { - for (int i = 0; i < valueCount; i++) { - final int offset = offsetBuffer.getInt((long) i * OFFSET_WIDTH); - final int size = sizeBuffer.getInt((long) i * SIZE_WIDTH); - validateInvariants(offset, size); - } - } - - /** - * End the current value. - * - * @param index index of the value to end - * @param size number of elements in the list that was written - */ - public void endValue(int index, int size) { - sizeBuffer.setInt((long) index * SIZE_WIDTH, size); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java deleted file mode 100644 index 76682c28fe65d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ /dev/null @@ -1,978 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static java.util.Collections.singletonList; -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.util.Preconditions.checkArgument; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.complex.impl.UnionListReader; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A list vector contains lists of a specific type of elements. Its structure contains 3 elements. - * - *

      - *
    1. A validity buffer. - *
    2. An offset buffer, that denotes lists boundaries. - *
    3. A child data vector that contains the elements of lists. - *
    - * - * The latter two are managed by its superclass. - */ -public class ListVector extends BaseRepeatedValueVector - implements PromotableVector, ValueIterableVector> { - - public static ListVector empty(String name, BufferAllocator allocator) { - return new ListVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null); - } - - protected ArrowBuf validityBuffer; - protected UnionListReader reader; - private CallBack callBack; - protected Field field; - protected int validityAllocationSizeInBytes; - - /** The maximum index that is actually set. */ - protected int lastSet; - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param fieldType The type of this list. - * @param callBack A schema change callback. - */ - public ListVector( - String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - this(new Field(name, fieldType, null), allocator, callBack); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param callBack A schema change callback. - */ - public ListVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(field.getName(), allocator, callBack); - this.validityBuffer = allocator.getEmpty(); - this.field = field; - this.callBack = callBack; - this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - this.lastSet = -1; - } - - @Override - public void initializeChildrenFromFields(List children) { - checkArgument( - children.size() == 1, - "Lists have one child Field. Found: %s", - children.isEmpty() ? "none" : children); - - Field field = children.get(0); - AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); - checkArgument( - addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); - - addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); - this.field = new Field(this.field.getName(), this.field.getFieldType(), children); - } - - @Override - public void setInitialCapacity(int numRecords) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialCapacity(numRecords); - } - - /** - * Specialized version of setInitialCapacity() for ListVector. This is used by some callers when - * they want to explicitly control and be conservative about memory allocated for inner data - * vector. This is very useful when we are working with memory constraints for a query and have a - * fixed amount of memory reserved for the record batch. In such cases, we are likely to face OOM - * or related problems when we reserve memory for a record batch with value count x and do - * setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param density density of ListVector. Density is the average size of list per position in the - * List vector. For example, a density value of 10 implies each position in the list vector - * has a list of 10 values. A density value of 0.1 implies out of 10 positions in the list - * vector, 1 position has a list of size 1 and remaining positions are null (no lists) or - * empty lists. This helps in tightly controlling the memory we provision for inner data - * vector. - */ - @Override - public void setInitialCapacity(int numRecords, double density) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialCapacity(numRecords, density); - } - - /** - * Specialized version of setInitialTotalCapacity() for ListVector. This is used by some callers - * when they want to explicitly control and be conservative about memory allocated for inner data - * vector. This is very useful when we are working with memory constraints for a query and have a - * fixed amount of memory reserved for the record batch. In such cases, we are likely to face OOM - * or related problems when we reserve memory for a record batch with value count x and do - * setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param totalNumberOfElements the total number of elements to to allow for in this vector across - * all records. - */ - @Override - public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialTotalCapacity(numRecords, totalNumberOfElements); - } - - /** - * Get the density of this ListVector. - * - * @return density - */ - public double getDensity() { - if (valueCount == 0) { - return 0.0D; - } - final int startOffset = offsetBuffer.getInt(0); - final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH); - final double totalListSize = endOffset - startOffset; - return totalListSize / valueCount; - } - - @Override - public List getChildrenFromFields() { - return singletonList(getDataVector()); - } - - /** - * Load the buffers of this vector with provided source buffers. The caller manages the source - * buffers and populates them before invoking this method. - * - * @param fieldNode the fieldNode indicating the value count - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 2) { - throw new IllegalArgumentException( - "Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size()); - } - - ArrowBuf bitBuffer = ownBuffers.get(0); - ArrowBuf offBuffer = ownBuffers.get(1); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); - - validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); - offsetAllocationSizeInBytes = offsetBuffer.capacity(); - - lastSet = fieldNode.getLength() - 1; - valueCount = fieldNode.getLength(); - } - - /** - * Get the buffers belonging to this vector. - * - * @return the inner buffers. - */ - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(2); - setReaderAndWriterIndex(); - result.add(validityBuffer); - result.add(offsetBuffer); - - return result; - } - - /** - * Export the buffers of the fields for C Data Interface. This method traverse the buffers and - * export buffer and buffer's memory address into a list of buffers and a pointer to the list of - * buffers. - */ - @Override - public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); - - if (offsetBuffer.capacity() == 0) { - // Empty offset buffer is allowed for historical reason. - // To export it through C Data interface, we need to allocate a buffer with one offset. - // We set `retain = false` to explicitly not increase the ref count for the exported buffer. - // The ref count of the newly created buffer (i.e., 1) already represents the usage - // at imported side. - exportBuffer(allocateOffsetBuffer(OFFSET_WIDTH), buffers, buffersPtr, nullValue, false); - } else { - exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true); - } - } - - /** Set the reader and writer indexes for the inner buffers. */ - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - offsetBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); - } else { - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); - } - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - public UnionListWriter getWriter() { - return new UnionListWriter(this); - } - - /** Same as {@link #allocateNewSafe()}. */ - @Override - public void allocateNew() throws OutOfMemoryException { - if (!allocateNewSafe()) { - throw new OutOfMemoryException("Failure while allocating memory"); - } - } - - /** - * Allocate memory for the vector. We internally use a default value count of 4096 to allocate - * memory for at least these many elements in the vector. - * - * @return false if memory allocation fails, true otherwise. - */ - @Override - public boolean allocateNewSafe() { - boolean success = false; - try { - /* we are doing a new allocation -- release the current buffers */ - clear(); - /* allocate validity buffer */ - allocateValidityBuffer(validityAllocationSizeInBytes); - /* allocate offset and data buffer */ - success = super.allocateNewSafe(); - } finally { - if (!success) { - clear(); - } - } - return success; - } - - protected void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - /** - * Resize the vector to increase the capacity. The internal behavior is to double the current - * value capacity. - */ - @Override - public void reAlloc() { - /* reallocate the validity buffer */ - reallocValidityBuffer(); - /* reallocate the offset and data */ - super.reAlloc(); - } - - protected void reallocValidityAndOffsetBuffers() { - reallocOffsetBuffer(); - reallocValidityBuffer(); - } - - private void reallocValidityBuffer() { - final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); - long newAllocationSize = getNewAllocationSize(currentBufferCapacity); - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - validityBuffer.getReferenceManager().release(1); - validityBuffer = newBuf; - validityAllocationSizeInBytes = (int) newAllocationSize; - } - - private long getNewAllocationSize(int currentBufferCapacity) { - long newAllocationSize = currentBufferCapacity * 2L; - if (newAllocationSize == 0) { - if (validityAllocationSizeInBytes > 0) { - newAllocationSize = validityAllocationSizeInBytes; - } else { - newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; - } - } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - return newAllocationSize; - } - - /** - * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the - * capacity of the vector needs to be expanded before copy. - * - * @param inIndex position to copy from in source vector - * @param outIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from); - } - - /** - * Copy a cell value from a particular index in source vector to a particular position in this - * vector. - * - * @param inIndex position to copy from in source vector - * @param outIndex position to copy to in this vector - * @param from source vector - */ - @Override - public void copyFrom(int inIndex, int outIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - FieldReader in = from.getReader(); - in.setPosition(inIndex); - FieldWriter out = getWriter(); - out.setPosition(outIndex); - ComplexCopier.copy(in, out); - } - - /** - * Get the inner data vector for this list vector. - * - * @return data vector - */ - @Override - public FieldVector getDataVector() { - return vector; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return getTransferPair(field, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((ListVector) target); - } - - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getOffsetBufferAddress() { - return offsetBuffer.memoryAddress(); - } - - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - return offsetBuffer; - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isSet(index) == 0) { - return ArrowBufPointer.NULL_HASH_CODE; - } - int hash = 0; - final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); - final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); - for (int i = start; i < end; i++) { - hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher)); - } - return hash; - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - private class TransferImpl implements TransferPair { - - ListVector to; - TransferPair dataTransferPair; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new ListVector(name, allocator, field.getFieldType(), callBack)); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - this(new ListVector(field, allocator, callBack)); - } - - public TransferImpl(ListVector to) { - this.to = to; - to.addOrGetVector(vector.getField().getFieldType()); - if (to.getDataVector() instanceof ZeroVector) { - to.addOrGetVector(vector.getField().getFieldType()); - } - dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); - } - - /** - * Transfer this vector'data to another vector. The memory associated with this vector is - * transferred to the allocator of target vector for accounting and management purposes. - */ - @Override - public void transfer() { - to.clear(); - dataTransferPair.transfer(); - to.validityBuffer = transferBuffer(validityBuffer, to.allocator); - to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); - to.lastSet = lastSet; - if (valueCount > 0) { - to.setValueCount(valueCount); - } - clear(); - } - - /** - * Slice this vector at desired index and length and transfer the corresponding data to the - * target vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - */ - @Override - public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - to.clear(); - if (length > 0) { - final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH); - final int sliceLength = - offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); - /* splitAndTransfer offset buffer */ - for (int i = 0; i < length + 1; i++) { - final int relativeOffset = - offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset); - } - /* splitAndTransfer validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - /* splitAndTransfer data buffer */ - dataTransferPair.splitAndTransfer(startPoint, sliceLength); - to.lastSet = length - 1; - to.setValueCount(length); - } - } - - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer(int startIndex, int length, ListVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - this.to.copyFrom(from, to, ListVector.this); - } - } - - @Override - protected FieldReader getReaderImpl() { - return new UnionListReader(this); - } - - @Override - public UnionListReader getReader() { - reader = (UnionListReader) super.getReader(); - return reader; - } - - /** Initialize the child data vector to field type. */ - @Override - public AddOrGetResult addOrGetVector(FieldType fieldType) { - AddOrGetResult result = super.addOrGetVector(fieldType); - invalidateReader(); - return result; - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH; - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - return offsetBufferSize + validityBufferSize + vector.getBufferSize(); - } - - @Override - public int getBufferSizeFor(int valueCount) { - if (valueCount == 0) { - return 0; - } - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - - return super.getBufferSizeFor(valueCount) + validityBufferSize; - } - - @Override - public Field getField() { - if (field.getChildren().contains(getDataVector().getField())) { - return field; - } - field = - new Field( - field.getName(), - field.getFieldType(), - Collections.singletonList(getDataVector().getField())); - return field; - } - - @Override - public MinorType getMinorType() { - return MinorType.LIST; - } - - @Override - public void clear() { - super.clear(); - validityBuffer = releaseBuffer(validityBuffer); - lastSet = -1; - } - - @Override - public void reset() { - super.reset(); - validityBuffer.setZero(0, validityBuffer.capacity()); - lastSet = -1; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them. Also, this won't clear the child - * buffers. - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - setReaderAndWriterIndex(); - final ArrowBuf[] buffers; - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - List list = new ArrayList<>(); - list.add(offsetBuffer); - list.add(validityBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); - buffers = list.toArray(new ArrowBuf[list.size()]); - } - if (clear) { - for (ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - @Override - public UnionVector promoteToUnion() { - UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack); - replaceDataVector(vector); - invalidateReader(); - if (callBack != null) { - callBack.doWork(); - } - return vector; - } - - protected void invalidateReader() { - reader = null; - } - - /** - * Get the element in the list vector at a particular index. - * - * @param index position of the element - * @return Object at given position - */ - @Override - public List getObject(int index) { - if (isSet(index) == 0) { - return null; - } - final List vals = new JsonStringArrayList<>(); - final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); - final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); - final ValueVector vv = getDataVector(); - for (int i = start; i < end; i++) { - vals.add(vv.getObject(i)); - } - - return vals; - } - - /** - * Check if element at given index is null. - * - * @param index position of element - * @return true if element at given index is null, false otherwise - */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** - * Check if element at given index is empty list. - * - * @param index position of element - * @return true if element at given index is empty list or NULL, false otherwise - */ - @Override - public boolean isEmpty(int index) { - if (isNull(index)) { - return true; - } else { - final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); - final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); - return start == end; - } - } - - /** - * Same as {@link #isNull(int)}. - * - * @param index position of element - * @return 1 if element at given index is not null, 0 otherwise - */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Get the number of elements that are null in the vector. - * - * @return the number of null elements. - */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** - * Get the current value capacity for the vector. - * - * @return number of elements that vector can hold. - */ - @Override - public int getValueCapacity() { - return getValidityAndOffsetValueCapacity(); - } - - private int getValidityAndOffsetValueCapacity() { - final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); - return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); - } - - private int getValidityBufferValueCapacity() { - return capAtMaxInt(validityBuffer.capacity() * 8); - } - - /** - * Sets the list at index to be not-null. Reallocates validity buffer if index is larger than - * current capacity. - */ - public void setNotNull(int index) { - while (index >= getValidityAndOffsetValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - BitVectorHelper.setBit(validityBuffer, index); - lastSet = index; - } - - /** - * Sets list at index to be null. - * - * @param index position in vector - */ - @Override - public void setNull(int index) { - while (index >= getValidityAndOffsetValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - if (lastSet >= index) { - lastSet = index - 1; - } - for (int i = lastSet + 1; i <= index; i++) { - final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); - offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset); - } - BitVectorHelper.unsetBit(validityBuffer, index); - lastSet = index; - } - - /** - * Start a new value in the list vector. - * - * @param index index of the value to start - */ - @Override - public int startNewValue(int index) { - while (index >= getValidityAndOffsetValueCapacity()) { - reallocValidityAndOffsetBuffers(); - } - if (lastSet >= index) { - lastSet = index - 1; - } - for (int i = lastSet + 1; i <= index; i++) { - final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); - offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset); - } - BitVectorHelper.setBit(validityBuffer, index); - lastSet = index; - return offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH); - } - - /** - * End the current value. - * - * @param index index of the value to end - * @param size number of elements in the list that was written - */ - public void endValue(int index, int size) { - final int currentOffset = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); - offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, currentOffset + size); - } - - /** - * Sets the value count for the vector. - * - * @param valueCount value count - */ - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - if (valueCount > 0) { - while (valueCount > getValidityAndOffsetValueCapacity()) { - /* check if validity and offset buffers need to be re-allocated */ - reallocValidityAndOffsetBuffers(); - } - for (int i = lastSet + 1; i < valueCount; i++) { - /* fill the holes with offsets */ - final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); - offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset); - } - } - /* valueCount for the data vector is the current end offset */ - final int childValueCount = - (valueCount == 0) ? 0 : offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH); - /* set the value count of data vector and this will take care of - * checking whether data buffer needs to be reallocated. - */ - vector.setValueCount(childValueCount); - } - - public void setLastSet(int value) { - lastSet = value; - } - - public int getLastSet() { - return lastSet; - } - - @Override - public int getElementStartIndex(int index) { - return offsetBuffer.getInt(index * OFFSET_WIDTH); - } - - @Override - public int getElementEndIndex(int index) { - return offsetBuffer.getInt((index + 1) * OFFSET_WIDTH); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java deleted file mode 100644 index 9b4e6b4c0cd4a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ /dev/null @@ -1,1031 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static java.util.Collections.singletonList; -import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.util.Preconditions.checkArgument; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.complex.impl.UnionListViewReader; -import org.apache.arrow.vector.complex.impl.UnionListViewWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A list view vector contains lists of a specific type of elements. Its structure contains four - * elements. - * - *
      - *
    1. A validity buffer. - *
    2. An offset buffer, that denotes lists starts. - *
    3. A size buffer, that denotes lists ends. - *
    4. A child data vector that contains the elements of lists. - *
    - * - * The latter three are managed by its superclass. - */ - -/* - * TODO: consider merging the functionality in `BaseRepeatedValueVector` into this class. - */ -public class ListViewVector extends BaseRepeatedValueViewVector - implements PromotableVector, ValueIterableVector> { - - protected ArrowBuf validityBuffer; - protected UnionListViewReader reader; - private CallBack callBack; - protected Field field; - protected int validityAllocationSizeInBytes; - - public static ListViewVector empty(String name, BufferAllocator allocator) { - return new ListViewVector( - name, allocator, FieldType.nullable(ArrowType.ListView.INSTANCE), null); - } - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param fieldType The type of this list. - * @param callBack A schema change callback. - */ - public ListViewVector( - String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - this(new Field(name, fieldType, null), allocator, callBack); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param callBack A schema change callback. - */ - public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(field.getName(), allocator, callBack); - this.validityBuffer = allocator.getEmpty(); - this.field = field; - this.callBack = callBack; - this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - } - - @Override - public void initializeChildrenFromFields(List children) { - checkArgument( - children.size() == 1, - "ListViews have one child Field. Found: %s", - children.isEmpty() ? "none" : children); - - Field field = children.get(0); - AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); - checkArgument( - addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); - - addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); - this.field = new Field(this.field.getName(), this.field.getFieldType(), children); - } - - @Override - public void setInitialCapacity(int numRecords) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialCapacity(numRecords); - } - - /** - * Specialized version of setInitialCapacity() for ListViewVector. This is used by some callers - * when they want to explicitly control and be conservative about memory allocated for inner data - * vector. This is very useful when we are working with memory constraints for a query and have a - * fixed amount of memory reserved for the record batch. In such cases, we are likely to face OOM - * or related problems when we reserve memory for a record batch with value count x and do - * setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount, but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param density density of ListViewVector. Density is the average size of a list per position in - * the ListViewVector. For example, a density value of 10 implies each position in the list - * vector has a list of 10 values. A density value of 0.1 implies out of 10 positions in the - * list vector, 1 position has a list of size 1, and the remaining positions are null (no - * lists) or empty lists. This helps in tightly controlling the memory we provision for inner - * data vector. - */ - @Override - public void setInitialCapacity(int numRecords, double density) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialCapacity(numRecords, density); - } - - /** - * Specialized version of setInitialTotalCapacity() for ListViewVector. This is used by some - * callers when they want to explicitly control and be conservative about memory allocated for - * inner data vector. This is very useful when we are working with memory constraints for a query - * and have a fixed amount of memory reserved for the record batch. In such cases, we are likely - * to face OOM or related problems when we reserve memory for a record batch with value count x - * and do setInitialCapacity(x) such that each vector allocates only what is necessary and not the - * default amount, but the multiplier forces the memory requirement to go beyond what was needed. - * - * @param numRecords value count - * @param totalNumberOfElements the total number of elements to allow for in this vector across - * all records. - */ - @Override - public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { - validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); - super.setInitialTotalCapacity(numRecords, totalNumberOfElements); - } - - @Override - public List getChildrenFromFields() { - return singletonList(getDataVector()); - } - - /** - * Load the buffers associated with this Field. - * - * @param fieldNode the fieldNode - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 3) { - throw new IllegalArgumentException( - "Illegal buffer count, expected " + 3 + ", got: " + ownBuffers.size()); - } - - ArrowBuf bitBuffer = ownBuffers.get(0); - ArrowBuf offBuffer = ownBuffers.get(1); - ArrowBuf szBuffer = ownBuffers.get(2); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); - sizeBuffer.getReferenceManager().release(); - sizeBuffer = szBuffer.getReferenceManager().retain(szBuffer, allocator); - - validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); - offsetAllocationSizeInBytes = offsetBuffer.capacity(); - sizeAllocationSizeInBytes = sizeBuffer.capacity(); - - valueCount = fieldNode.getLength(); - } - - /** Set the reader and writer indexes for the inner buffers. */ - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - offsetBuffer.readerIndex(0); - sizeBuffer.readerIndex(0); - if (valueCount == 0) { - validityBuffer.writerIndex(0); - offsetBuffer.writerIndex(0); - sizeBuffer.writerIndex(0); - } else { - validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); - offsetBuffer.writerIndex(valueCount * OFFSET_WIDTH); - sizeBuffer.writerIndex(valueCount * SIZE_WIDTH); - } - } - - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(2); - setReaderAndWriterIndex(); - result.add(validityBuffer); - result.add(offsetBuffer); - result.add(sizeBuffer); - - return result; - } - - /** - * Export the buffers of the fields for C Data Interface. This method traverses the buffers and - * export buffer and buffer's memory address into a list of buffers and a pointer to the list of - * buffers. - */ - @Override - public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { - exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true); - exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true); - exportBuffer(sizeBuffer, buffers, buffersPtr, nullValue, true); - } - - @Override - public void allocateNew() throws OutOfMemoryException { - if (!allocateNewSafe()) { - throw new OutOfMemoryException("Failure while allocating memory"); - } - } - - @Override - public boolean allocateNewSafe() { - boolean success = false; - try { - /* release the current buffers, hence this is a new allocation - * Note that, the `clear` method call below is releasing validityBuffer - * calling the superclass clear method which is releasing the associated buffers - * (sizeBuffer and offsetBuffer). - */ - clear(); - /* allocate validity buffer */ - allocateValidityBuffer(validityAllocationSizeInBytes); - /* allocate offset, data and sizes buffer */ - success = super.allocateNewSafe(); - } finally { - if (!success) { - clear(); - } - } - return success; - } - - protected void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - @Override - public void reAlloc() { - /* reallocate the validity buffer */ - reallocValidityBuffer(); - /* reallocate the offset, size, and data */ - super.reAlloc(); - } - - protected void reallocValidityAndSizeAndOffsetBuffers() { - reallocateBuffers(); - reallocValidityBuffer(); - } - - private void reallocValidityBuffer() { - final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); - long newAllocationSize = getNewAllocationSize(currentBufferCapacity); - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - validityBuffer.getReferenceManager().release(1); - validityBuffer = newBuf; - validityAllocationSizeInBytes = (int) newAllocationSize; - } - - private long getNewAllocationSize(int currentBufferCapacity) { - long newAllocationSize = currentBufferCapacity * 2L; - if (newAllocationSize == 0) { - if (validityAllocationSizeInBytes > 0) { - newAllocationSize = validityAllocationSizeInBytes; - } else { - newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; - } - } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - return newAllocationSize; - } - - @Override - public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { - copyFrom(inIndex, outIndex, from); - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - @Override - public void copyFrom(int inIndex, int outIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - FieldReader in = from.getReader(); - in.setPosition(inIndex); - FieldWriter out = getWriter(); - out.setPosition(outIndex); - ComplexCopier.copy(in, out); - } - - @Override - public FieldVector getDataVector() { - return vector; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return getTransferPair(field, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((ListViewVector) target); - } - - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getOffsetBufferAddress() { - return offsetBuffer.memoryAddress(); - } - - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - return offsetBuffer; - } - - public ArrowBuf getSizeBuffer() { - return sizeBuffer; - } - - public long getSizeBufferAddress() { - return sizeBuffer.memoryAddress(); - } - - /** - * Get the hash code for the element at the given index. - * - * @param index position of the element - * @return hash code for the element at the given index - */ - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - /** - * Get the hash code for the element at the given index. - * - * @param index position of the element - * @param hasher hasher to use - * @return hash code for the element at the given index - */ - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isSet(index) == 0) { - return ArrowBufPointer.NULL_HASH_CODE; - } - int hash = 0; - final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); - final int end = sizeBuffer.getInt(index * OFFSET_WIDTH); - for (int i = start; i < end; i++) { - hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher)); - } - return hash; - } - - private class TransferImpl implements TransferPair { - - ListViewVector to; - TransferPair dataTransferPair; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new ListViewVector(name, allocator, field.getFieldType(), callBack)); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - this(new ListViewVector(field, allocator, callBack)); - } - - public TransferImpl(ListViewVector to) { - this.to = to; - to.addOrGetVector(vector.getField().getFieldType()); - if (to.getDataVector() instanceof ZeroVector) { - to.addOrGetVector(vector.getField().getFieldType()); - } - dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); - } - - @Override - public void transfer() { - to.clear(); - dataTransferPair.transfer(); - to.validityBuffer = transferBuffer(validityBuffer, to.allocator); - to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); - to.sizeBuffer = transferBuffer(sizeBuffer, to.allocator); - if (valueCount > 0) { - to.setValueCount(valueCount); - } - clear(); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - to.clear(); - if (length > 0) { - final int startPoint = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH); - // we have to scan by index since there are out-of-order offsets - to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH); - to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH); - - /* splitAndTransfer the size buffer */ - int maxOffsetAndSizeSum = -1; - int minOffsetValue = -1; - for (int i = 0; i < length; i++) { - final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); - final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH); - to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue); - if (maxOffsetAndSizeSum < offsetValue + sizeValue) { - maxOffsetAndSizeSum = offsetValue + sizeValue; - } - if (minOffsetValue == -1 || minOffsetValue > offsetValue) { - minOffsetValue = offsetValue; - } - } - - /* splitAndTransfer the offset buffer */ - for (int i = 0; i < length; i++) { - final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); - final int relativeOffset = offsetValue - minOffsetValue; - to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset); - } - - /* splitAndTransfer the validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - - /* splitAndTransfer the data buffer */ - final int childSliceLength = maxOffsetAndSizeSum - minOffsetValue; - dataTransferPair.splitAndTransfer(minOffsetValue, childSliceLength); - to.setValueCount(length); - } - } - - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer(int startIndex, int length, ListViewVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - this.to.copyFrom(from, to, ListViewVector.this); - } - } - - @Override - protected FieldReader getReaderImpl() { - return new UnionListViewReader(this); - } - - @Override - public UnionListViewReader getReader() { - reader = (UnionListViewReader) super.getReader(); - return reader; - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - final int offsetBufferSize = valueCount * OFFSET_WIDTH; - final int sizeBufferSize = valueCount * SIZE_WIDTH; - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - return offsetBufferSize + sizeBufferSize + validityBufferSize + vector.getBufferSize(); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this. - * - * @param valueCount the number of values to assume this vector contains - * @return size of underlying buffers. - */ - @Override - public int getBufferSizeFor(int valueCount) { - if (valueCount == 0) { - return 0; - } - final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); - - return super.getBufferSizeFor(valueCount) + validityBufferSize; - } - - /** - * Get the field associated with the list view vector. - * - * @return the field - */ - @Override - public Field getField() { - if (field.getChildren().contains(getDataVector().getField())) { - return field; - } - field = - new Field( - field.getName(), - field.getFieldType(), - Collections.singletonList(getDataVector().getField())); - return field; - } - - /** - * Get the minor type for the vector. - * - * @return the minor type - */ - @Override - public MinorType getMinorType() { - return MinorType.LISTVIEW; - } - - /** Clear the vector data. */ - @Override - public void clear() { - // calling superclass clear method which is releasing the sizeBufer and offsetBuffer - super.clear(); - validityBuffer = releaseBuffer(validityBuffer); - } - - /** Release the buffers associated with this vector. */ - @Override - public void reset() { - super.reset(); - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them. Also, this won't clear the child - * buffers. - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - setReaderAndWriterIndex(); - final ArrowBuf[] buffers; - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - List list = new ArrayList<>(); - // the order must be validity, offset and size buffers - list.add(validityBuffer); - list.add(offsetBuffer); - list.add(sizeBuffer); - list.addAll(Arrays.asList(vector.getBuffers(false))); - buffers = list.toArray(new ArrowBuf[list.size()]); - } - if (clear) { - for (ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - return buffers; - } - - /** - * Get the element in the list view vector at a particular index. - * - * @param index position of the element - * @return Object at given position - */ - @Override - public List getObject(int index) { - if (isSet(index) == 0) { - return null; - } - final List vals = new JsonStringArrayList<>(); - final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); - final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); - final ValueVector vv = getDataVector(); - for (int i = start; i < end; i++) { - vals.add(vv.getObject(i)); - } - - return vals; - } - - /** - * Check if an element at given index is null. - * - * @param index position of an element - * @return true if an element at given index is null, false otherwise - */ - @Override - public boolean isNull(int index) { - return (isSet(index) == 0); - } - - /** - * Check if an element at given index is an empty list. - * - * @param index position of an element - * @return true if an element at given index is an empty list or NULL, false otherwise - */ - @Override - public boolean isEmpty(int index) { - if (isNull(index)) { - return true; - } else { - return sizeBuffer.getInt(index * SIZE_WIDTH) == 0; - } - } - - /** - * Same as {@link #isNull(int)}. - * - * @param index position of the element - * @return 1 if element at given index is not null, 0 otherwise - */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Get the number of elements that are null in the vector. - * - * @return the number of null elements. - */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** - * Get the value capacity by considering validity and offset capacity. Note that the size buffer - * capacity is not considered here since it has the same capacity as the offset buffer. - * - * @return the value capacity - */ - @Override - public int getValueCapacity() { - return getValidityAndOffsetValueCapacity(); - } - - private int getValidityAndSizeValueCapacity() { - final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); - final int sizeValueCapacity = Math.max(getSizeBufferValueCapacity(), 0); - return Math.min(offsetValueCapacity, sizeValueCapacity); - } - - private int getValidityAndOffsetValueCapacity() { - final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); - return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); - } - - private int getValidityBufferValueCapacity() { - return capAtMaxInt(validityBuffer.capacity() * 8); - } - - /** - * Set the element at the given index to null. - * - * @param index the value to change - */ - @Override - public void setNull(int index) { - while (index >= getValidityAndSizeValueCapacity()) { - reallocValidityAndSizeAndOffsetBuffers(); - } - - offsetBuffer.setInt(index * OFFSET_WIDTH, 0); - sizeBuffer.setInt(index * SIZE_WIDTH, 0); - BitVectorHelper.unsetBit(validityBuffer, index); - } - - /** - * Start new value in the ListView vector. - * - * @param index index of the value to start - * @return offset of the new value - */ - @Override - public int startNewValue(int index) { - while (index >= getValidityAndSizeValueCapacity()) { - reallocValidityAndSizeAndOffsetBuffers(); - } - - if (index > 0) { - final int prevOffset = getMaxViewEndChildVectorByIndex(index); - offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); - } - - BitVectorHelper.setBit(validityBuffer, index); - return offsetBuffer.getInt(index * OFFSET_WIDTH); - } - - /** - * Validate the invariants of the offset and size buffers. 0 <= offsets[i] <= length of the child - * array 0 <= offsets[i] + size[i] <= length of the child array - * - * @param offset the offset at a given index - * @param size the size at a given index - */ - private void validateInvariants(int offset, int size) { - if (offset < 0) { - throw new IllegalArgumentException("Offset cannot be negative"); - } - - if (size < 0) { - throw new IllegalArgumentException("Size cannot be negative"); - } - - // 0 <= offsets[i] <= length of the child array - if (offset > this.vector.getValueCount()) { - throw new IllegalArgumentException("Offset is out of bounds."); - } - - // 0 <= offsets[i] + size[i] <= length of the child array - if (offset + size > this.vector.getValueCount()) { - throw new IllegalArgumentException("Offset + size <= length of the child array."); - } - } - - /** - * Set the offset at the given index. Make sure to use this function after updating `field` vector - * and using `setValidity` - * - * @param index index of the value to set - * @param value value to set - */ - public void setOffset(int index, int value) { - validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH)); - - offsetBuffer.setInt(index * OFFSET_WIDTH, value); - } - - /** - * Set the size at the given index. Make sure to use this function after using `setOffset`. - * - * @param index index of the value to set - * @param value value to set - */ - public void setSize(int index, int value) { - validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value); - - sizeBuffer.setInt(index * SIZE_WIDTH, value); - } - - /** - * Set the validity at the given index. - * - * @param index index of the value to set - * @param value value to set (0 for unset and 1 for a set) - */ - public void setValidity(int index, int value) { - if (value == 0) { - BitVectorHelper.unsetBit(validityBuffer, index); - } else { - BitVectorHelper.setBit(validityBuffer, index); - } - } - - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - if (valueCount > 0) { - while (valueCount > getValidityAndSizeValueCapacity()) { - /* check if validity and offset buffers need to be re-allocated */ - reallocValidityAndSizeAndOffsetBuffers(); - } - } - /* valueCount for the data vector is the current end offset */ - final int childValueCount = (valueCount == 0) ? 0 : getMaxViewEndChildVector(); - /* set the value count of data vector and this will take care of - * checking whether data buffer needs to be reallocated. - */ - vector.setValueCount(childValueCount); - } - - @Override - public int getElementStartIndex(int index) { - return offsetBuffer.getInt(index * OFFSET_WIDTH); - } - - @Override - public int getElementEndIndex(int index) { - return sizeBuffer.getInt(index * OFFSET_WIDTH); - } - - @Override - public AddOrGetResult addOrGetVector(FieldType fieldType) { - AddOrGetResult result = super.addOrGetVector(fieldType); - invalidateReader(); - return result; - } - - @Override - public UnionVector promoteToUnion() { - UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack); - replaceDataVector(vector); - invalidateReader(); - if (callBack != null) { - callBack.doWork(); - } - return vector; - } - - private void invalidateReader() { - reader = null; - } - - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - public UnionListViewWriter getWriter() { - return new UnionListViewWriter(this); - } - - @Override - public int getValueCount() { - return valueCount; - } - - /** - * Get the density of this ListVector. - * - * @return density - */ - public double getDensity() { - if (valueCount == 0) { - return 0.0D; - } - final double totalListSize = getMaxViewEndChildVector(); - return totalListSize / valueCount; - } - - /** Validating ListViewVector creation based on the specification guideline. */ - @Override - public void validate() { - for (int i = 0; i < valueCount; i++) { - final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH); - final int size = sizeBuffer.getInt(i * SIZE_WIDTH); - validateInvariants(offset, size); - } - } - - /** - * End the current value. - * - * @param index index of the value to end - * @param size number of elements in the list that was written - */ - public void endValue(int index, int size) { - sizeBuffer.setInt(index * SIZE_WIDTH, size); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java deleted file mode 100644 index 23cda8401b0bf..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static org.apache.arrow.util.Preconditions.checkArgument; - -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.complex.impl.UnionMapReader; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType.Map; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A MapVector is used to store entries of key/value pairs. It is a container vector that is - * composed of a list of struct values with "key" and "value" fields. The MapVector is nullable, but - * if a map is set at a given index, there must be an entry. In other words, the StructVector data - * is non-nullable. Also for a given entry, the "key" is non-nullable, however the "value" can be - * null. - */ -public class MapVector extends ListVector { - - public static final String KEY_NAME = "key"; - public static final String VALUE_NAME = "value"; - public static final String DATA_VECTOR_NAME = "entries"; - - /** - * Construct an empty MapVector with no data. Child vectors must be added subsequently. - * - * @param name The name of the vector. - * @param allocator The allocator used for allocating/reallocating buffers. - * @param keysSorted True if the map keys have been pre-sorted. - * @return a new instance of MapVector. - */ - public static MapVector empty(String name, BufferAllocator allocator, boolean keysSorted) { - return new MapVector(name, allocator, FieldType.nullable(new Map(keysSorted)), null); - } - - /** - * Construct a MapVector instance. - * - * @param name The name of the vector. - * @param allocator The allocator used for allocating/reallocating buffers. - * @param fieldType The type definition of the MapVector. - * @param callBack A schema change callback. - */ - public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(name, allocator, fieldType, callBack); - defaultDataVectorName = DATA_VECTOR_NAME; - } - - public MapVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(field, allocator, callBack); - defaultDataVectorName = DATA_VECTOR_NAME; - } - - /** - * Initialize child vectors of the map from the given list of fields. - * - * @param children List of fields that will be children of this MapVector. - */ - @Override - public void initializeChildrenFromFields(List children) { - checkArgument( - children.size() == 1, - "Maps have one List child. Found: %s", - children.isEmpty() ? "none" : children); - - Field structField = children.get(0); - MinorType minorType = Types.getMinorTypeForArrowType(structField.getType()); - checkArgument( - minorType == MinorType.STRUCT && !structField.isNullable(), - "Map data should be a non-nullable struct type"); - checkArgument( - structField.getChildren().size() == 2, - "Map data should be a struct with 2 children. Found: %s", - children); - - Field keyField = structField.getChildren().get(0); - checkArgument(!keyField.isNullable(), "Map data key type should be a non-nullable"); - - AddOrGetResult addOrGetVector = addOrGetVector(structField.getFieldType()); - checkArgument( - addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); - - addOrGetVector.getVector().initializeChildrenFromFields(structField.getChildren()); - this.field = new Field(this.field.getName(), this.field.getFieldType(), children); - } - - /** Get the writer for this MapVector instance. */ - @Override - public UnionMapWriter getWriter() { - return new UnionMapWriter(this); - } - - /** Get the reader for this MapVector instance. */ - @Override - public UnionMapReader getReader() { - if (reader == null) { - reader = new UnionMapReader(this); - } - return (UnionMapReader) reader; - } - - @Override - public MinorType getMinorType() { - return MinorType.MAP; - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new TransferImpl(field, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(ref, allocator, callBack); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(field, allocator, callBack); - } - - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new MapVector.TransferImpl((MapVector) target); - } - - private class TransferImpl implements TransferPair { - - MapVector to; - TransferPair dataTransferPair; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new MapVector(name, allocator, field.getFieldType(), callBack)); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - this(new MapVector(field, allocator, callBack)); - } - - public TransferImpl(MapVector to) { - this.to = to; - to.addOrGetVector(vector.getField().getFieldType()); - if (to.getDataVector() instanceof ZeroVector) { - to.addOrGetVector(vector.getField().getFieldType()); - } - dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); - } - - /** - * Transfer this vector'data to another vector. The memory associated with this vector is - * transferred to the allocator of target vector for accounting and management purposes. - */ - @Override - public void transfer() { - to.clear(); - dataTransferPair.transfer(); - to.validityBuffer = transferBuffer(validityBuffer, to.allocator); - to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); - to.lastSet = lastSet; - if (valueCount > 0) { - to.setValueCount(valueCount); - } - clear(); - } - - /** - * Slice this vector at desired index and length and transfer the corresponding data to the - * target vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - */ - @Override - public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - to.clear(); - if (length > 0) { - final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH); - final int sliceLength = - offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); - /* splitAndTransfer offset buffer */ - for (int i = 0; i < length + 1; i++) { - final int relativeOffset = - offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint; - to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset); - } - /* splitAndTransfer validity buffer */ - splitAndTransferValidityBuffer(startIndex, length, to); - /* splitAndTransfer data buffer */ - dataTransferPair.splitAndTransfer(startPoint, sliceLength); - to.lastSet = length - 1; - to.setValueCount(length); - } - } - - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer(int startIndex, int length, MapVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = getValidityBufferSizeFromCount(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - this.to.copyFrom(from, to, MapVector.this); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java deleted file mode 100644 index 5a215608ef64b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ /dev/null @@ -1,520 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.DensityAwareVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.ComplexHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.JsonStringHashMap; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A struct vector that has no null values (and no validity buffer). Child Vectors are handled in - * {@link AbstractStructVector}. - */ -public class NonNullableStructVector extends AbstractStructVector - implements ValueIterableVector> { - - /** - * Construct a new empty instance which replaces an existing field with the new one in case of - * name conflict. - */ - public static NonNullableStructVector empty(String name, BufferAllocator allocator) { - FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - return new NonNullableStructVector( - name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false); - } - - /** Construct a new empty instance which preserve fields with identical names. */ - public static NonNullableStructVector emptyWithDuplicates( - String name, BufferAllocator allocator) { - FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - return new NonNullableStructVector( - name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true); - } - - private final SingleStructReaderImpl reader = new SingleStructReaderImpl(this); - protected Field field; - public int valueCount; - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use to allocating/reallocating buffers. - * @param fieldType The type of this list. - */ - public NonNullableStructVector( - String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - this(new Field(name, fieldType, null), allocator, callBack); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use to allocating/reallocating buffers. - * @param callBack A schema change callback. - */ - public NonNullableStructVector(Field field, BufferAllocator allocator, CallBack callBack) { - this(field, allocator, callBack, null, true); - } - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use to allocating/reallocating buffers. - * @param fieldType The type of this list. - * @param callBack A schema change callback. - * @param conflictPolicy How to handle duplicate field names in the struct. - */ - public NonNullableStructVector( - String name, - BufferAllocator allocator, - FieldType fieldType, - CallBack callBack, - ConflictPolicy conflictPolicy, - boolean allowConflictPolicyChanges) { - this( - new Field(name, fieldType, null), - allocator, - callBack, - conflictPolicy, - allowConflictPolicyChanges); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use to allocating/reallocating buffers. - * @param callBack A schema change callback. - * @param conflictPolicy How to handle duplicate field names in the struct. - */ - public NonNullableStructVector( - Field field, - BufferAllocator allocator, - CallBack callBack, - ConflictPolicy conflictPolicy, - boolean allowConflictPolicyChanges) { - super(field.getName(), allocator, callBack, conflictPolicy, allowConflictPolicyChanges); - this.field = field; - this.valueCount = 0; - } - - @Override - public FieldReader getReader() { - return reader; - } - - private transient StructTransferPair ephPair; - - /** - * Copies the element at fromIndex in the provided vector to thisIndex. Reallocates buffers if - * thisIndex is larger then current capacity. - */ - @Override - public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); - if (ephPair == null || ephPair.from != from) { - ephPair = (StructTransferPair) from.makeTransferPair(this); - } - ephPair.copyValueSafe(fromIndex, thisIndex); - } - - @Override - public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - copyFrom(fromIndex, thisIndex, from); - } - - @Override - protected boolean supportsDirectRead() { - return true; - } - - public Iterator fieldNameIterator() { - return getChildFieldNames().iterator(); - } - - @Override - public void setInitialCapacity(int numRecords) { - for (final ValueVector v : this) { - v.setInitialCapacity(numRecords); - } - } - - @Override - public void setInitialCapacity(int valueCount, double density) { - for (final ValueVector vector : this) { - if (vector instanceof DensityAwareVector) { - ((DensityAwareVector) vector).setInitialCapacity(valueCount, density); - } else { - vector.setInitialCapacity(valueCount); - } - } - } - - @Override - public int getBufferSize() { - if (valueCount == 0 || size() == 0) { - return 0; - } - long buffer = 0; - for (final ValueVector v : this) { - buffer += v.getBufferSize(); - } - - return (int) buffer; - } - - @Override - public int getBufferSizeFor(final int valueCount) { - if (valueCount == 0) { - return 0; - } - - long bufferSize = 0; - for (final ValueVector v : this) { - bufferSize += v.getBufferSizeFor(valueCount); - } - - return (int) bufferSize; - } - - @Override - public ArrowBuf getValidityBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(name, allocator, null); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new StructTransferPair( - this, - new NonNullableStructVector( - name, - allocator, - field.getFieldType(), - callBack, - getConflictPolicy(), - allowConflictPolicyChanges), - false); - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new StructTransferPair(this, (NonNullableStructVector) to); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new StructTransferPair( - this, - new NonNullableStructVector( - ref, - allocator, - field.getFieldType(), - callBack, - getConflictPolicy(), - allowConflictPolicyChanges), - false); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new StructTransferPair( - this, - new NonNullableStructVector( - field, allocator, callBack, getConflictPolicy(), allowConflictPolicyChanges), - false); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new StructTransferPair( - this, - new NonNullableStructVector( - field, allocator, callBack, getConflictPolicy(), allowConflictPolicyChanges), - false); - } - - /** {@link TransferPair} for this this class. */ - protected static class StructTransferPair implements TransferPair { - private final TransferPair[] pairs; - private final NonNullableStructVector from; - private final NonNullableStructVector to; - - public StructTransferPair(NonNullableStructVector from, NonNullableStructVector to) { - this(from, to, true); - } - - protected StructTransferPair( - NonNullableStructVector from, NonNullableStructVector to, boolean allocate) { - this.from = from; - this.to = to; - this.pairs = new TransferPair[from.size()]; - this.to.ephPair = null; - - int i = 0; - FieldVector vector; - for (String child : from.getChildFieldNames()) { - int preSize = to.size(); - vector = from.getChild(child); - if (vector == null) { - continue; - } - // DRILL-1872: we add the child fields for the vector, looking up the field by name. For a - // map vector, - // the child fields may be nested fields of the top level child. For example if the - // structure - // of a child field is oa.oab.oabc then we add oa, then add oab to oa then oabc to oab. - // But the children member of a Materialized field is a HashSet. If the fields are added in - // the - // children HashSet, and the hashCode of the Materialized field includes the hash code of - // the - // children, the hashCode value of oa changes *after* the field has been added to the - // HashSet. - // (This is similar to what happens in ScanBatch where the children cannot be added till - // they are - // read). To take care of this, we ensure that the hashCode of the MaterializedField does - // not - // include the hashCode of the children but is based only on MaterializedField$key. - final FieldVector newVector = - to.addOrGet(child, vector.getField().getFieldType(), vector.getClass()); - if (allocate && to.size() != preSize) { - newVector.allocateNew(); - } - pairs[i++] = vector.makeTransferPair(newVector); - } - } - - @Override - public void transfer() { - for (final TransferPair p : pairs) { - p.transfer(); - } - to.valueCount = from.valueCount; - from.clear(); - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - for (TransferPair p : pairs) { - p.copyValueSafe(from, to); - } - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - for (TransferPair p : pairs) { - p.splitAndTransfer(startIndex, length); - } - to.setValueCount(length); - } - } - - @Override - public int getValueCapacity() { - if (size() == 0) { - return 0; - } - - return getChildren().stream().mapToInt(child -> child.getValueCapacity()).min().getAsInt(); - } - - @Override - public Map getObject(int index) { - Map vv = new JsonStringHashMap<>(); - for (String child : getChildFieldNames()) { - ValueVector v = getChild(child); - if (v != null && index < v.getValueCount()) { - Object value = v.getObject(index); - if (value != null) { - vv.put(child, value); - } - } - } - return vv; - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - int hash = 0; - for (FieldVector v : getChildren()) { - if (index < v.getValueCount()) { - hash = ByteFunctionHelpers.combineHash(hash, v.hashCode(index, hasher)); - } - } - return hash; - } - - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - @Override - public boolean isNull(int index) { - return false; - } - - @Override - public int getNullCount() { - return 0; - } - - public void get(int index, ComplexHolder holder) { - reader.setPosition(index); - holder.reader = reader; - } - - @Override - public int getValueCount() { - return valueCount; - } - - public ValueVector getVectorById(int id) { - return getChildByOrdinal(id); - } - - /** Gets a child vector by ordinal position and casts to the specified class. */ - public V getVectorById(int id, Class clazz) { - ValueVector untyped = getVectorById(id); - if (clazz.isInstance(untyped)) { - return clazz.cast(untyped); - } - throw new ClassCastException( - "Id " - + id - + " had the wrong type. Expected " - + clazz.getCanonicalName() - + " but was " - + untyped.getClass().getCanonicalName()); - } - - @Override - public void setValueCount(int valueCount) { - for (final ValueVector v : getChildren()) { - v.setValueCount(valueCount); - } - NonNullableStructVector.this.valueCount = valueCount; - } - - @Override - public void clear() { - for (final ValueVector v : getChildren()) { - v.clear(); - } - valueCount = 0; - } - - @Override - public void reset() { - for (final ValueVector v : getChildren()) { - v.reset(); - } - valueCount = 0; - } - - @Override - public Field getField() { - List children = new ArrayList<>(); - for (ValueVector child : getChildren()) { - children.add(child.getField()); - } - if (children.isEmpty() || field.getChildren().equals(children)) { - return field; - } - field = new Field(field.getName(), field.getFieldType(), children); - return field; - } - - @Override - public MinorType getMinorType() { - return MinorType.STRUCT; - } - - @Override - public void close() { - final Collection vectors = getChildren(); - for (final FieldVector v : vectors) { - v.close(); - } - vectors.clear(); - - valueCount = 0; - - super.close(); - } - - /** Initializes the struct's members from the given Fields. */ - public void initializeChildrenFromFields(List children) { - for (Field field : children) { - FieldVector vector = (FieldVector) this.add(field.getName(), field.getFieldType()); - vector.initializeChildrenFromFields(field.getChildren()); - } - } - - public List getChildrenFromFields() { - return getChildren(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java deleted file mode 100644 index 03c9e3a591197..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -/** Get and set position in a particular data structure. */ -@SuppressWarnings("unused") // Used in when instantiating freemarker templates. -public interface Positionable { - int getPosition(); - - void setPosition(int index); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java deleted file mode 100644 index d5c8670d267bd..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.types.pojo.FieldType; - -/** Vector that can store multiple {@linkplain FieldType} vectors as children. */ -public interface PromotableVector { - - AddOrGetResult addOrGetVector(FieldType type); - - UnionVector promoteToUnion(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java deleted file mode 100644 index 16c7541ef7d27..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -/** - * A {@link org.apache.arrow.vector.ValueVector} mix-in that can be used in conjunction with {@link - * RepeatedValueVector} subtypes. - */ -public interface RepeatedFixedWidthVectorLike { - /** - * Allocate a new memory space for this vector. Must be called prior to using the ValueVector. - * - * @param valueCount Number of separate repeating groupings. - * @param innerValueCount Number of supported values in the vector. - */ - void allocateNew(int valueCount, int innerValueCount); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java deleted file mode 100644 index de7966a0aee2e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import org.apache.arrow.vector.DensityAwareVector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.ValueVector; - -/** - * An abstraction representing repeated value vectors. - * - *

    A repeated vector contains values that may either be flat or nested. A value consists of zero - * or more cells(inner values). Current design maintains data and offsets vectors. Each cell is - * stored in the data vector. Repeated vector uses the offset vector to determine the sequence of - * cells pertaining to an individual value. - */ -public interface RepeatedValueVector extends ValueVector, DensityAwareVector { - - int DEFAULT_REPEAT_PER_RECORD = 5; - - /** - * Get the offset vector. - * - * @return the underlying offset vector or null if none exists. - * @deprecated This API will be removed, as the current implementations no longer hold inner - * offset vectors. - */ - @Deprecated - UInt4Vector getOffsetVector(); - - /** - * Get the data vector. - * - * @return the underlying data vector or null if none exists. - */ - ValueVector getDataVector(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java deleted file mode 100644 index 98473591e37ec..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -/** - * A {@link org.apache.arrow.vector.ValueVector} mix-in that can be used in conjunction with - * variable {@link RepeatedValueVector} subtypes (e.g. Strings, Lists, etc). - */ -public interface RepeatedVariableWidthVectorLike { - /** - * Allocate a new memory space for this vector. Must be called prior to using the ValueVector. - * - * @param totalBytes Desired size of the underlying data buffer. - * @param parentValueCount Number of separate repeating groupings. - * @param childValueCount Number of supported values in the vector. - */ - void allocateNew(int totalBytes, int parentValueCount, int childValueCount); - - /** - * Provide the maximum amount of variable width bytes that can be stored int his vector. - * - * @return the byte capacity - */ - int getByteCapacity(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java deleted file mode 100644 index 1bb9a3d6c05f3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java +++ /dev/null @@ -1,823 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static org.apache.arrow.util.Preconditions.checkArgument; - -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.util.ByteFunctionHelpers; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A run-end encoded vector contains only two child vectors: a run_end vector of type int and a - * values vector of any type. There are no buffers associated with the parent vector. - */ -public class RunEndEncodedVector extends BaseValueVector implements FieldVector { - - public static final FieldVector DEFAULT_VALUE_VECTOR = ZeroVector.INSTANCE; - public static final FieldVector DEFAULT_RUN_END_VECTOR = ZeroVector.INSTANCE; - - public static RunEndEncodedVector empty(String name, BufferAllocator allocator) { - return new RunEndEncodedVector( - name, allocator, FieldType.notNullable(ArrowType.RunEndEncoded.INSTANCE), null); - } - - protected final CallBack callBack; - protected Field field; - protected FieldVector runEndsVector; - protected FieldVector valuesVector; - protected int valueCount; - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param fieldType The type of the array that is run-end encoded. - * @param callBack A schema change callback. - */ - public RunEndEncodedVector( - String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - this(new Field(name, fieldType, null), allocator, callBack); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param callBack A schema change callback. - */ - public RunEndEncodedVector(Field field, BufferAllocator allocator, CallBack callBack) { - this(field, allocator, DEFAULT_RUN_END_VECTOR, DEFAULT_VALUE_VECTOR, callBack); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use for allocating/reallocating buffers. - * @param runEndsVector The vector represents run ends. Only Zero vector or type int vector with - * size 16, 32 is allowed - * @param valuesVector The vector represents values - * @param callBack A schema change callback. - */ - public RunEndEncodedVector( - Field field, - BufferAllocator allocator, - FieldVector runEndsVector, - FieldVector valuesVector, - CallBack callBack) { - super(allocator); - this.field = field; - this.callBack = callBack; - this.valueCount = 0; - this.runEndsVector = runEndsVector; - this.valuesVector = valuesVector; - } - - /** ValueVector interface */ - - /** - * Allocate new buffers. ValueVector implements logic to determine how much to allocate. - * - * @throws OutOfMemoryException Thrown if no memory can be allocated. - */ - @Override - public void allocateNew() throws OutOfMemoryException { - if (!allocateNewSafe()) { - throw new OutOfMemoryException("Failure while allocating memory"); - } - } - - /** - * Allocates new buffers. ValueVector implements logic to determine how much to allocate. - * - * @return Returns true if allocation was successful. - */ - @Override - public boolean allocateNewSafe() { - initializeChildrenFromFields(field.getChildren()); - for (FieldVector v : getChildrenFromFields()) { - boolean isAllocated = v.allocateNewSafe(); - if (!isAllocated) { - v.clear(); - return false; - } - } - return true; - } - - /** - * Allocate new buffer with double capacity, and copy data into the new buffer. Replace vector's - * buffer with new buffer, and release old one - */ - @Override - public void reAlloc() { - for (FieldVector v : getChildrenFromFields()) { - v.reAlloc(); - } - } - - @Override - public BufferAllocator getAllocator() { - return allocator; - } - - @Override - protected FieldReader getReaderImpl() { - throw new UnsupportedOperationException("Not yet implemented."); - } - - /** - * Set the initial record capacity. - * - * @param numRecords the initial record capacity. - */ - @Override - public void setInitialCapacity(int numRecords) {} - - /** - * Returns the maximum number of values that can be stored in this vector instance. - * - * @return the maximum number of values that can be stored in this vector instance. - */ - @Override - public int getValueCapacity() { - return getChildrenFromFields().stream() - .mapToInt(item -> item != null ? item.getValueCapacity() : 0) - .min() - .orElseThrow(NoSuchElementException::new); - } - - /** Alternative to clear(). Allows use as an AutoCloseable in try-with-resources. */ - @Override - public void close() { - for (FieldVector v : getChildrenFromFields()) { - v.close(); - } - } - - /** - * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the vector has - * any child vectors, they will also be cleared. - */ - @Override - public void clear() { - for (FieldVector v : getChildrenFromFields()) { - v.clear(); - } - this.valueCount = 0; - } - - /** - * Reset the ValueVector to the initial state without releasing any owned ArrowBuf. Buffer - * capacities will remain unchanged and any previous data will be zeroed out. This includes - * buffers for data, validity, offset, etc. If the vector has any child vectors, they will also be - * reset. - */ - @Override - public void reset() { - for (FieldVector v : getChildrenFromFields()) { - v.reset(); - } - valueCount = 0; - } - - /** - * Get information about how this field is materialized. - * - * @return the field corresponding to this vector - */ - @Override - public Field getField() { - return field; - } - - @Override - public MinorType getMinorType() { - return MinorType.RUNENDENCODED; - } - - /** - * To transfer quota responsibility. - * - * @param ref the name of the vector - * @param allocator the target allocator - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return getTransferPair(ref, allocator, null); - } - - /** - * To transfer quota responsibility. - * - * @param field the Field object used by the target vector - * @param allocator the target allocator - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return getTransferPair(field, allocator, null); - } - - /** - * To transfer quota responsibility. - * - * @param ref the name of the vector - * @param allocator the target allocator - * @param callBack A schema change callback. - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(ref, allocator, callBack); - } - - /** - * To transfer quota responsibility. - * - * @param field the Field object used by the target vector - * @param allocator the target allocator - * @param callBack A schema change callback. - * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new - * target vector of the same type. - */ - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new TransferImpl(field, allocator, callBack); - } - - /** - * Makes a new transfer pair used to transfer underlying buffers. - * - * @param target the target for the transfer - * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to - * transfer underlying buffers into the target vector. - */ - @Override - public TransferPair makeTransferPair(ValueVector target) { - return new TransferImpl((RunEndEncodedVector) target); - } - - private class TransferImpl implements TransferPair { - - RunEndEncodedVector to; - TransferPair dataTransferPair; - TransferPair reeTransferPair; - - public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new RunEndEncodedVector(name, allocator, field.getFieldType(), callBack)); - } - - public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { - this(new RunEndEncodedVector(field, allocator, callBack)); - } - - public TransferImpl(RunEndEncodedVector to) { - this.to = to; - if (to.getRunEndsVector() instanceof ZeroVector) { - to.initializeChildrenFromFields(field.getChildren()); - } - reeTransferPair = getRunEndsVector().makeTransferPair(to.getRunEndsVector()); - dataTransferPair = getValuesVector().makeTransferPair(to.getValuesVector()); - } - - /** - * Transfer the vector data to another vector. The memory associated with this vector is - * transferred to the allocator of target vector for accounting and management purposes. - */ - @Override - public void transfer() { - to.clear(); - dataTransferPair.transfer(); - reeTransferPair.transfer(); - if (valueCount > 0) { - to.setValueCount(valueCount); - } - clear(); - } - - /** - * Slice this vector at the desired index and length, then transfer the corresponding data to - * the target vector. - * - * @param startIndex start position of the split in source vector. - * @param length length of the split. - */ - @Override - public void splitAndTransfer(int startIndex, int length) { - to.clear(); - if (length <= 0) { - return; - } - - int physicalStartIndex = getPhysicalIndex(startIndex); - int physicalEndIndex = getPhysicalIndex(startIndex + length - 1); - int physicalLength = physicalEndIndex - physicalStartIndex + 1; - dataTransferPair.splitAndTransfer(physicalStartIndex, physicalLength); - FieldVector toRunEndsVector = to.runEndsVector; - if (startIndex == 0) { - if (((BaseIntVector) runEndsVector).getValueAsLong(physicalEndIndex) == length) { - reeTransferPair.splitAndTransfer(physicalStartIndex, physicalLength); - } else { - reeTransferPair.splitAndTransfer(physicalStartIndex, physicalLength - 1); - toRunEndsVector.setValueCount(physicalLength); - if (toRunEndsVector instanceof SmallIntVector) { - ((SmallIntVector) toRunEndsVector).set(physicalEndIndex, length); - } else if (toRunEndsVector instanceof IntVector) { - ((IntVector) toRunEndsVector).set(physicalEndIndex, length); - } else if (toRunEndsVector instanceof BigIntVector) { - ((BigIntVector) toRunEndsVector).set(physicalEndIndex, length); - } else { - throw new IllegalArgumentException( - "Run-end vector and must be of type int with size 16, 32, or 64 bits."); - } - } - } else { - shiftRunEndsVector( - toRunEndsVector, - startIndex, - length, - physicalStartIndex, - physicalEndIndex, - physicalLength); - } - getTo().setValueCount(length); - } - - private void shiftRunEndsVector( - ValueVector toRunEndVector, - int startIndex, - int length, - int physicalStartIndex, - int physicalEndIndex, - int physicalLength) { - toRunEndVector.setValueCount(physicalLength); - toRunEndVector.getValidityBuffer().setOne(0, toRunEndVector.getValidityBuffer().capacity()); - ArrowBuf fromRunEndBuffer = runEndsVector.getDataBuffer(); - ArrowBuf toRunEndBuffer = toRunEndVector.getDataBuffer(); - int physicalLastIndex = physicalLength - 1; - if (toRunEndVector instanceof SmallIntVector) { - byte typeWidth = SmallIntVector.TYPE_WIDTH; - for (int i = 0; i < physicalLastIndex; i++) { - toRunEndBuffer.setShort( - (long) i * typeWidth, - fromRunEndBuffer.getShort((long) (i + physicalStartIndex) * typeWidth) - startIndex); - } - int lastEnd = - Math.min( - fromRunEndBuffer.getShort((long) physicalEndIndex * typeWidth) - startIndex, - length); - toRunEndBuffer.setShort((long) physicalLastIndex * typeWidth, lastEnd); - } else if (toRunEndVector instanceof IntVector) { - byte typeWidth = IntVector.TYPE_WIDTH; - for (int i = 0; i < physicalLastIndex; i++) { - toRunEndBuffer.setInt( - (long) i * typeWidth, - fromRunEndBuffer.getInt((long) (i + physicalStartIndex) * typeWidth) - startIndex); - } - int lastEnd = - Math.min( - fromRunEndBuffer.getInt((long) physicalEndIndex * typeWidth) - startIndex, length); - toRunEndBuffer.setInt((long) physicalLastIndex * typeWidth, lastEnd); - } else if (toRunEndVector instanceof BigIntVector) { - byte typeWidth = BigIntVector.TYPE_WIDTH; - for (int i = 0; i < physicalLastIndex; i++) { - toRunEndBuffer.setLong( - (long) i * typeWidth, - fromRunEndBuffer.getLong((long) (i + physicalStartIndex) * typeWidth) - startIndex); - } - long lastEnd = - Math.min( - fromRunEndBuffer.getLong((long) physicalEndIndex * typeWidth) - startIndex, length); - toRunEndBuffer.setLong((long) physicalLastIndex * typeWidth, lastEnd); - } else { - throw new IllegalArgumentException( - "Run-end vector and must be of type int with size 16, 32, or 64 bits."); - } - } - - @Override - public ValueVector getTo() { - return to; - } - - @Override - public void copyValueSafe(int from, int to) { - this.to.copyFrom(from, to, RunEndEncodedVector.this); - } - } - - /** - * Get a reader for this vector. - * - * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports - * reading values from this vector. - */ - @Override - public FieldReader getReader() { - throw new UnsupportedOperationException("Not yet implemented."); - } - - /** - * Get a writer for this vector. - * - * @return a {@link org.apache.arrow.vector.complex.writer.FieldWriter field writer} that supports - * writing values to this vector. - */ - public FieldWriter getWriter() { - throw new UnsupportedOperationException("Not yet implemented."); - } - - /** - * Get the number of bytes used by this vector. - * - * @return the number of bytes that is used by this vector instance. - */ - @Override - public int getBufferSize() { - int bufferSize = 0; - for (FieldVector v : getChildrenFromFields()) { - bufferSize += v.getBufferSize(); - } - return bufferSize; - } - - /** - * Returns the number of bytes that is used by this vector if it holds the given number of values. - * The result will be the same as if setValueCount() were called, followed by calling - * getBufferSize(), but without any of the closing side-effects that setValueCount() implies wrt - * finishing off the population of a vector. Some operations might wish to use this to determine - * how much memory has been used by a vector so far, even though it is not finished being - * populated. - * - * @param valueCount the number of values to assume this vector contains - * @return the buffer size if this vector is holding valueCount values - */ - @Override - public int getBufferSizeFor(int valueCount) { - return 0; - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer so it only should be used for in-context access. Also note - * that this buffer changes regularly thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning; the buffers will still be refcounted; - * but the returned array will be the only reference to them - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - return new ArrowBuf[0]; - } - - /** - * Gets the underlying buffer associated with validity vector. - * - * @return buffer - */ - @Override - public ArrowBuf getValidityBuffer() { - throw new UnsupportedOperationException( - "Run-end encoded vectors do not have a validity buffer."); - } - - /** - * Gets the underlying buffer associated with data vector. - * - * @return buffer - */ - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException("Run-end encoded vectors do not have a data buffer."); - } - - /** - * Gets the underlying buffer associated with offset vector. - * - * @return buffer - */ - @Override - public ArrowBuf getOffsetBuffer() { - throw new UnsupportedOperationException("Run-end encoded vectors do not have a offset buffer."); - } - - /** - * Gets the number of values. - * - * @return number of values in the vector - */ - @Override - public int getValueCount() { - return valueCount; - } - - /** Set number of values in the vector. */ - @Override - public void setValueCount(int valueCount) { - this.valueCount = valueCount; - } - - /** - * Get friendly type object from the vector. - * - * @param index index of object to get - * @return friendly type object - */ - @Override - public Object getObject(int index) { - checkIndex(index); - int physicalIndex = getPhysicalIndex(index); - return valuesVector.getObject(physicalIndex); - } - - /** - * Get the run end of giving index. - * - * @param index index of the run end to get - * @return the run end of giving index - */ - public int getRunEnd(int index) { - checkIndex(index); - int physicalIndex = getPhysicalIndex(index); - return (int) ((BaseIntVector) runEndsVector).getValueAsLong(physicalIndex); - } - - /** - * Returns number of null elements in the vector. - * - * @return number of null elements - */ - @Override - public int getNullCount() { - // Null count is always 0 for run-end encoded array - return 0; - } - - /** - * Check whether an element in the vector is null. - * - * @param index index to check for null - * @return true if element is null - */ - @Override - public boolean isNull(int index) { - int physicalIndex = getPhysicalIndex(runEndsVector, index); - return valuesVector.isNull(physicalIndex); - } - - /** Returns hashCode of element in index with the default hasher. */ - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - /** Returns hashCode of element in index with the given hasher. */ - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - int hash = 0; - for (FieldVector v : getChildrenFromFields()) { - if (index < v.getValueCount()) { - hash = ByteFunctionHelpers.combineHash(hash, v.hashCode(index, hasher)); - } - } - return hash; - } - - /** - * Accept a generic {@link VectorVisitor} and return the result. - * - * @param the output result type. - * @param the input data together with visitor. - */ - @Override - public OUT accept(VectorVisitor visitor, IN value) { - return visitor.visit(this, value); - } - - /** - * Gets the name of the vector. - * - * @return the name of the vector. - */ - @Override - public String getName() { - return this.field.getName(); - } - - @Override - public Iterator iterator() { - return Collections.unmodifiableCollection(getChildrenFromFields()).iterator(); - } - - /** FieldVector interface */ - - /** - * Initializes the child vectors to be later loaded with loadBuffers. - * - * @param children the schema containing the run_ends column first and the values column second - */ - @Override - public void initializeChildrenFromFields(List children) { - checkArgument( - children.size() == 2, - "Run-end encoded vectors must have two child Fields. Found: %s", - children.isEmpty() ? "none" : children); - checkArgument( - Arrays.asList( - MinorType.SMALLINT.getType(), MinorType.INT.getType(), MinorType.BIGINT.getType()) - .contains(children.get(0).getType()), - "The first field represents the run-end vector and must be of type int " - + "with size 16, 32, or 64 bits. Found: %s", - children.get(0).getType()); - runEndsVector = (BaseIntVector) children.get(0).createVector(allocator); - valuesVector = children.get(1).createVector(allocator); - field = new Field(field.getName(), field.getFieldType(), children); - } - - /** - * The returned list is the same size as the list passed to initializeChildrenFromFields. - * - * @return the children according to schema (empty for primitive types) - */ - @Override - public List getChildrenFromFields() { - return Arrays.asList(runEndsVector, valuesVector); - } - - /** - * Loads data in the vectors. (ownBuffers must be the same size as getFieldVectors()) - * - * @param fieldNode the fieldNode - * @param ownBuffers the buffers for this Field (own buffers only, children not included) - */ - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (!ownBuffers.isEmpty()) { - throw new UnsupportedOperationException( - "Run-end encoded vectors do not have any associated buffers."); - } - this.valueCount = fieldNode.getLength(); - } - - /** - * Get the buffers of the fields, (same size as getFieldVectors() since it is their content). - * - * @return the buffers containing the data for this vector (ready for reading) - */ - @Override - public List getFieldBuffers() { - return List.of(); - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers()."); - } - - /** - * Gets the starting address of the underlying buffer associated with validity vector. - * - * @return buffer address - */ - @Override - public long getValidityBufferAddress() { - throw new UnsupportedOperationException( - "Run-end encoded vectors do not have a validity buffer."); - } - - /** - * Gets the starting address of the underlying buffer associated with data vector. - * - * @return buffer address - */ - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException("Run-end encoded vectors do not have a data buffer."); - } - - /** - * Gets the starting address of the underlying buffer associated with offset vector. - * - * @return buffer address - */ - @Override - public long getOffsetBufferAddress() { - throw new UnsupportedOperationException( - "Run-end encoded vectors do not have an offset buffer."); - } - - /** - * Set the element at the given index to null. - * - * @param index the value to change - */ - @Override - public void setNull(int index) { - throw new UnsupportedOperationException( - "Run-end encoded vectors do not have a validity buffer."); - } - - public FieldVector getRunEndsVector() { - return runEndsVector; - } - - public FieldVector getValuesVector() { - return valuesVector; - } - - private void checkIndex(int logicalIndex) { - if (logicalIndex < 0 || logicalIndex >= valueCount) { - throw new IndexOutOfBoundsException( - String.format("index: %s, expected range (0, %s)", logicalIndex, valueCount)); - } - } - - /** - * The physical index is the index of the first value that is larger than logical index. e.g. if - * run_ends is [1,2,3], the physical index of logical index from 0 to 5 is [0, 1, 1, 2, 2, 2] - */ - public int getPhysicalIndex(int logicalIndex) { - return getPhysicalIndex(runEndsVector, logicalIndex); - } - - static int getPhysicalIndex(FieldVector runEndVector, int logicalIndex) { - if (runEndVector == null || runEndVector.getValueCount() == 0) { - return -1; - } - - int low = 0; - int high = runEndVector.getValueCount() - 1; - int result = -1; - - while (low <= high) { - int mid = low + (high - low) / 2; - long valueAsLong = ((BaseIntVector) runEndVector).getValueAsLong(mid); - if (valueAsLong > logicalIndex) { - result = mid; - high = mid - 1; - } else { - low = mid + 1; - } - } - - return result; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java deleted file mode 100644 index 14eb8186c4cad..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import java.util.Arrays; - -/** Utility methods for state machines based on enums. */ -public class StateTool { - private StateTool() {} - - static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StateTool.class); - - /** - * Verifies currentState is in one of expectedStates, throws an - * IllegalArgumentException if it isn't. - */ - public static > void check(T currentState, T... expectedStates) { - for (T s : expectedStates) { - if (s == currentState) { - return; - } - } - throw new IllegalArgumentException( - String.format( - "Expected to be in one of these states %s but was actually in " + "state %s", - Arrays.toString(expectedStates), currentState)); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java deleted file mode 100644 index ca5f572034cee..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java +++ /dev/null @@ -1,677 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.util.Preconditions.checkNotNull; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.BufferBacked; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.impl.NullableStructReaderImpl; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.holders.ComplexHolder; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.TransferPair; - -/** - * A Struct vector consists of nullability/validity buffer and children vectors that make up the - * struct's fields. The children vectors are handled by the parent class. - */ -public class StructVector extends NonNullableStructVector - implements FieldVector, ValueIterableVector> { - - /** - * Construct a new empty instance which replaces an existing field with the new one in case of - * name conflict. - */ - public static StructVector empty(String name, BufferAllocator allocator) { - FieldType fieldType = FieldType.nullable(Struct.INSTANCE); - return new StructVector( - name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false); - } - - /** Construct a new empty instance which preserve fields with identical names. */ - public static StructVector emptyWithDuplicates(String name, BufferAllocator allocator) { - FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true); - } - - private final NullableStructReaderImpl reader = new NullableStructReaderImpl(this); - private final NullableStructWriter writer = new NullableStructWriter(this); - - protected ArrowBuf validityBuffer; - private int validityAllocationSizeInBytes; - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use to allocating/reallocating buffers. - * @param fieldType The type of this list. - * @param callBack A schema change callback. - */ - public StructVector( - String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(name, checkNotNull(allocator), fieldType, callBack); - this.validityBuffer = allocator.getEmpty(); - this.validityAllocationSizeInBytes = - BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); - } - - /** - * Constructs a new instance. - * - * @param name The name of the instance. - * @param allocator The allocator to use to allocating/reallocating buffers. - * @param fieldType The type of this list. - * @param callBack A schema change callback. - * @param conflictPolicy policy to determine how duplicate names are handled. - * @param allowConflictPolicyChanges whether duplicate names are allowed at all. - */ - public StructVector( - String name, - BufferAllocator allocator, - FieldType fieldType, - CallBack callBack, - ConflictPolicy conflictPolicy, - boolean allowConflictPolicyChanges) { - super( - name, - checkNotNull(allocator), - fieldType, - callBack, - conflictPolicy, - allowConflictPolicyChanges); - this.validityBuffer = allocator.getEmpty(); - this.validityAllocationSizeInBytes = - BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use to allocating/reallocating buffers. - * @param callBack A schema change callback. - */ - public StructVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(field, checkNotNull(allocator), callBack); - this.validityBuffer = allocator.getEmpty(); - this.validityAllocationSizeInBytes = - BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); - } - - /** - * Constructs a new instance. - * - * @param field The field materialized by this vector. - * @param allocator The allocator to use to allocating/reallocating buffers. - * @param callBack A schema change callback. - * @param conflictPolicy policy to determine how duplicate names are handled. - * @param allowConflictPolicyChanges whether duplicate names are allowed at all. - */ - public StructVector( - Field field, - BufferAllocator allocator, - CallBack callBack, - ConflictPolicy conflictPolicy, - boolean allowConflictPolicyChanges) { - super(field, checkNotNull(allocator), callBack, conflictPolicy, allowConflictPolicyChanges); - this.validityBuffer = allocator.getEmpty(); - this.validityAllocationSizeInBytes = - BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); - } - - @Override - public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - if (ownBuffers.size() != 1) { - throw new IllegalArgumentException( - "Illegal buffer count, expected " + 1 + ", got: " + ownBuffers.size()); - } - - ArrowBuf bitBuffer = ownBuffers.get(0); - - validityBuffer.getReferenceManager().release(); - validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); - valueCount = fieldNode.getLength(); - validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); - } - - @Override - public List getFieldBuffers() { - List result = new ArrayList<>(1); - setReaderAndWriterIndex(); - result.add(validityBuffer); - - return result; - } - - private void setReaderAndWriterIndex() { - validityBuffer.readerIndex(0); - validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSize(valueCount)); - } - - /** - * Get the inner vectors. - * - * @return the inner vectors for this field as defined by the TypeLayout - * @deprecated This API will be removed as the current implementations no longer support inner - * vectors. - */ - @Deprecated - @Override - public List getFieldInnerVectors() { - throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); - } - - @Override - public NullableStructReaderImpl getReader() { - return reader; - } - - public NullableStructWriter getWriter() { - return writer; - } - - @Override - public TransferPair getTransferPair(BufferAllocator allocator) { - return new NullableStructTransferPair( - this, - new StructVector( - name, - allocator, - field.getFieldType(), - null, - getConflictPolicy(), - allowConflictPolicyChanges), - false); - } - - @Override - public TransferPair makeTransferPair(ValueVector to) { - return new NullableStructTransferPair(this, (StructVector) to, false); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new NullableStructTransferPair( - this, - new StructVector( - ref, - allocator, - field.getFieldType(), - null, - getConflictPolicy(), - allowConflictPolicyChanges), - false); - } - - @Override - public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new NullableStructTransferPair( - this, - new StructVector( - ref, - allocator, - field.getFieldType(), - callBack, - getConflictPolicy(), - allowConflictPolicyChanges), - false); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - return new NullableStructTransferPair( - this, - new StructVector(field, allocator, null, getConflictPolicy(), allowConflictPolicyChanges), - false); - } - - @Override - public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - return new NullableStructTransferPair( - this, - new StructVector( - field, allocator, callBack, getConflictPolicy(), allowConflictPolicyChanges), - false); - } - - /** {@link TransferPair} for this (nullable) {@link StructVector}. */ - protected class NullableStructTransferPair extends StructTransferPair { - - private StructVector target; - - protected NullableStructTransferPair(StructVector from, StructVector to, boolean allocate) { - super(from, to, allocate); - this.target = to; - } - - @Override - public void transfer() { - target.clear(); - target.validityBuffer = BaseValueVector.transferBuffer(validityBuffer, target.allocator); - super.transfer(); - clear(); - } - - @Override - public void copyValueSafe(int fromIndex, int toIndex) { - while (toIndex >= target.getValidityBufferValueCapacity()) { - target.reallocValidityBuffer(); - } - BitVectorHelper.setValidityBit(target.validityBuffer, toIndex, isSet(fromIndex)); - super.copyValueSafe(fromIndex, toIndex); - } - - @Override - public void splitAndTransfer(int startIndex, int length) { - Preconditions.checkArgument( - startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, - "Invalid parameters startIndex: %s, length: %s for valueCount: %s", - startIndex, - length, - valueCount); - target.clear(); - splitAndTransferValidityBuffer(startIndex, length, target); - super.splitAndTransfer(startIndex, length); - } - } - - /* - * transfer the validity. - */ - private void splitAndTransferValidityBuffer(int startIndex, int length, StructVector target) { - int firstByteSource = BitVectorHelper.byteIndex(startIndex); - int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); - int byteSizeTarget = BitVectorHelper.getValidityBufferSize(length); - int offset = startIndex % 8; - - if (length > 0) { - if (offset == 0) { - // slice - if (target.validityBuffer != null) { - target.validityBuffer.getReferenceManager().release(); - } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); - } else { - /* Copy data - * When the first bit starts from the middle of a byte (offset != 0), - * copy data from src BitVector. - * Each byte in the target is composed by a part in i-th byte, - * another part in (i+1)-th byte. - */ - target.allocateValidityBuffer(byteSizeTarget); - - for (int i = 0; i < byteSizeTarget - 1; i++) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset); - - target.validityBuffer.setByte(i, (b1 + b2)); - } - - /* Copying the last piece is done in the following manner: - * if the source vector has 1 or more bytes remaining, we copy - * the last piece as a byte formed by shifting data - * from the current byte and the next byte. - * - * if the source vector has no more bytes remaining - * (we are at the last byte), we copy the last piece as a byte - * by shifting data from the current byte. - */ - if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - byte b2 = - BitVectorHelper.getBitsFromNextByte( - validityBuffer, firstByteSource + byteSizeTarget, offset); - - target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); - } else { - byte b1 = - BitVectorHelper.getBitsFromCurrentByte( - validityBuffer, firstByteSource + byteSizeTarget - 1, offset); - target.validityBuffer.setByte(byteSizeTarget - 1, b1); - } - } - } - } - - /** - * Get the value capacity of the internal validity buffer. - * - * @return number of elements that validity buffer can hold - */ - private int getValidityBufferValueCapacity() { - return checkedCastToInt(validityBuffer.capacity() * 8); - } - - /** - * Get the current value capacity for the vector. - * - * @return number of elements that vector can hold. - */ - @Override - public int getValueCapacity() { - return Math.min(getValidityBufferValueCapacity(), super.getValueCapacity()); - } - - /** - * Return the underlying buffers associated with this vector. Note that this doesn't impact the - * reference counts for this buffer, so it only should be used for in-context access. Also note - * that this buffer changes regularly, thus external classes shouldn't hold a reference to it - * (unless they change it). - * - * @param clear Whether to clear vector before returning, the buffers will still be refcounted but - * the returned array will be the only reference to them. Also, this won't clear the child - * buffers. - * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. - */ - @Override - public ArrowBuf[] getBuffers(boolean clear) { - setReaderAndWriterIndex(); - final ArrowBuf[] buffers; - if (getBufferSize() == 0) { - buffers = new ArrowBuf[0]; - } else { - List list = new ArrayList<>(); - list.add(validityBuffer); - list.addAll(Arrays.asList(super.getBuffers(false))); - buffers = list.toArray(new ArrowBuf[list.size()]); - } - if (clear) { - for (ArrowBuf buffer : buffers) { - buffer.getReferenceManager().retain(); - } - clear(); - } - - return buffers; - } - - /** Close the vector and release the associated buffers. */ - @Override - public void close() { - clearValidityBuffer(); - super.close(); - } - - /** Same as {@link #close()}. */ - @Override - public void clear() { - clearValidityBuffer(); - super.clear(); - } - - /** Reset this vector to empty, does not release buffers. */ - @Override - public void reset() { - super.reset(); - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - /** Release the validity buffer. */ - private void clearValidityBuffer() { - validityBuffer.getReferenceManager().release(); - validityBuffer = allocator.getEmpty(); - } - - /** - * Get the size (number of bytes) of underlying buffers used by this vector. - * - * @return size of underlying buffers. - */ - @Override - public int getBufferSize() { - if (valueCount == 0) { - return 0; - } - return super.getBufferSize() + BitVectorHelper.getValidityBufferSize(valueCount); - } - - /** - * Get the potential buffer size for a particular number of records. - * - * @param valueCount desired number of elements in the vector - * @return estimated size of underlying buffers if the vector holds a given number of elements - */ - @Override - public int getBufferSizeFor(final int valueCount) { - if (valueCount == 0) { - return 0; - } - return super.getBufferSizeFor(valueCount) + BitVectorHelper.getValidityBufferSize(valueCount); - } - - @Override - public void setInitialCapacity(int numRecords) { - validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords); - super.setInitialCapacity(numRecords); - } - - @Override - public void setInitialCapacity(int numRecords, double density) { - validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords); - super.setInitialCapacity(numRecords, density); - } - - @Override - public boolean allocateNewSafe() { - /* Boolean to keep track if all the memory allocations were successful - * Used in the case of composite vectors when we need to allocate multiple - * buffers for multiple vectors. If one of the allocations failed we need to - * clear all the memory that we allocated - */ - boolean success = false; - try { - clear(); - allocateValidityBuffer(validityAllocationSizeInBytes); - success = super.allocateNewSafe(); - } finally { - if (!success) { - clear(); - } - } - return success; - } - - private void allocateValidityBuffer(final long size) { - final int curSize = (int) size; - validityBuffer = allocator.buffer(curSize); - validityBuffer.readerIndex(0); - validityAllocationSizeInBytes = curSize; - validityBuffer.setZero(0, validityBuffer.capacity()); - } - - @Override - public void reAlloc() { - /* reallocate the validity buffer */ - reallocValidityBuffer(); - super.reAlloc(); - } - - private void reallocValidityBuffer() { - final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); - long newAllocationSize = getNewAllocationSize(currentBufferCapacity); - - final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); - newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); - validityBuffer.getReferenceManager().release(1); - validityBuffer = newBuf; - validityAllocationSizeInBytes = (int) newAllocationSize; - } - - private long getNewAllocationSize(int currentBufferCapacity) { - long newAllocationSize = currentBufferCapacity * 2L; - if (newAllocationSize == 0) { - if (validityAllocationSizeInBytes > 0) { - newAllocationSize = validityAllocationSizeInBytes; - } else { - newAllocationSize = - BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION) * 2L; - } - } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); - assert newAllocationSize >= 1; - - if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { - throw new OversizedAllocationException("Unable to expand the buffer"); - } - return newAllocationSize; - } - - @Override - public long getValidityBufferAddress() { - return validityBuffer.memoryAddress(); - } - - @Override - public long getDataBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public long getOffsetBufferAddress() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getValidityBuffer() { - return validityBuffer; - } - - @Override - public ArrowBuf getDataBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public ArrowBuf getOffsetBuffer() { - throw new UnsupportedOperationException(); - } - - @Override - public Map getObject(int index) { - if (isSet(index) == 0) { - return null; - } else { - return super.getObject(index); - } - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - if (isSet(index) == 0) { - return ArrowBufPointer.NULL_HASH_CODE; - } else { - return super.hashCode(index, hasher); - } - } - - @Override - public void get(int index, ComplexHolder holder) { - holder.isSet = isSet(index); - if (holder.isSet == 0) { - holder.reader = null; - return; - } - super.get(index, holder); - } - - /** Return the number of null values in the vector. */ - @Override - public int getNullCount() { - return BitVectorHelper.getNullCount(validityBuffer, valueCount); - } - - /** Returns true if the value at the provided index is null. */ - @Override - public boolean isNull(int index) { - return isSet(index) == 0; - } - - /** Returns true the value at the given index is set (i.e. not null). */ - public int isSet(int index) { - final int byteIndex = index >> 3; - final byte b = validityBuffer.getByte(byteIndex); - final int bitIndex = index & 7; - return (b >> bitIndex) & 0x01; - } - - /** - * Marks the value at index as being set. Reallocates the validity buffer if index is larger than - * current capacity. - */ - public void setIndexDefined(int index) { - while (index >= getValidityBufferValueCapacity()) { - /* realloc the inner buffers if needed */ - reallocValidityBuffer(); - } - BitVectorHelper.setBit(validityBuffer, index); - } - - /** Marks the value at index as null/not set. */ - @Override - public void setNull(int index) { - while (index >= getValidityBufferValueCapacity()) { - /* realloc the inner buffers if needed */ - reallocValidityBuffer(); - } - BitVectorHelper.unsetBit(validityBuffer, index); - } - - @Override - public void setValueCount(int valueCount) { - Preconditions.checkArgument(valueCount >= 0); - while (valueCount > getValidityBufferValueCapacity()) { - /* realloc the inner buffers if needed */ - reallocValidityBuffer(); - } - super.setValueCount(valueCount); - this.valueCount = valueCount; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java deleted file mode 100644 index eb81e6739ffc3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import org.apache.arrow.vector.ValueVector; - -/** - * Tuple of a {@link ValueVector} and an index into a data structure containing the {@link - * ValueVector}. Useful for composite types to determine the index of a child. - */ -public class VectorWithOrdinal { - public final ValueVector vector; - public final int ordinal; - - public VectorWithOrdinal(ValueVector v, int ordinal) { - this.vector = v; - this.ordinal = ordinal; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java deleted file mode 100644 index b2e95663f7357..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import java.util.Iterator; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.holders.DenseUnionHolder; -import org.apache.arrow.vector.holders.UnionHolder; - -/** - * Base class providing common functionality for {@link FieldReader} implementations. - * - *

    This includes tracking the current index and throwing implementations of optional methods. - */ -abstract class AbstractBaseReader implements FieldReader { - - static final org.slf4j.Logger logger = - org.slf4j.LoggerFactory.getLogger(AbstractBaseReader.class); - - private int index; - - public AbstractBaseReader() { - super(); - } - - @Override - public int getPosition() { - return index; - } - - @Override - public void setPosition(int index) { - this.index = index; - } - - protected int idx() { - return index; - } - - @Override - public void reset() { - index = 0; - } - - @Override - public Iterator iterator() { - throw new IllegalStateException("The current reader doesn't support reading as a map."); - } - - @Override - public boolean next() { - throw new IllegalStateException("The current reader doesn't support getting next information."); - } - - @Override - public int size() { - throw new IllegalStateException("The current reader doesn't support getting size information."); - } - - @Override - public void read(UnionHolder holder) { - holder.reader = this; - holder.isSet = this.isSet() ? 1 : 0; - } - - @Override - public void read(int index, UnionHolder holder) { - throw new IllegalStateException("The current reader doesn't support reading union type"); - } - - @Override - public void copyAsValue(UnionWriter writer) { - throw new IllegalStateException("The current reader doesn't support reading union type"); - } - - @Override - public void read(DenseUnionHolder holder) { - holder.reader = this; - holder.isSet = this.isSet() ? 1 : 0; - } - - @Override - public void read(int index, DenseUnionHolder holder) { - throw new IllegalStateException("The current reader doesn't support reading dense union type"); - } - - @Override - public void copyAsValue(DenseUnionWriter writer) { - throw new IllegalStateException("The current reader doesn't support reading dense union type"); - } - - @Override - public void copyAsValue(ListWriter writer) { - ComplexCopier.copy(this, (FieldWriter) writer); - } - - @Override - public void copyAsValue(MapWriter writer) { - ComplexCopier.copy(this, (FieldWriter) writer); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java deleted file mode 100644 index 67f53d55bad3a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.complex.writer.FieldWriter; - -/** - * Base class providing common functionality for {@link FieldWriter} implementations. - * - *

    Currently this only includes index tracking. - */ -abstract class AbstractBaseWriter implements FieldWriter { - // private static final org.slf4j.Logger logger = - // org.slf4j.LoggerFactory.getLogger(AbstractBaseWriter.class); - - private int index; - - @Override - public String toString() { - return super.toString() + "[index = " + index + "]"; - } - - int idx() { - return index; - } - - @Override - public int getPosition() { - return index; - } - - @Override - public void setPosition(int index) { - this.index = index; - } - - @Override - public void end() {} -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java deleted file mode 100644 index f3e48aa050e30..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.StateTool; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; -import org.apache.arrow.vector.types.pojo.Field; - -/** Concrete implementation of {@link ComplexWriter}. */ -public class ComplexWriterImpl extends AbstractFieldWriter implements ComplexWriter { - - private NullableStructWriter structRoot; - private UnionListWriter listRoot; - private UnionListViewWriter listViewRoot; - private UnionMapWriter mapRoot; - private final NonNullableStructVector container; - - Mode mode = Mode.INIT; - private final String name; - private final boolean unionEnabled; - private final NullableStructWriterFactory nullableStructWriterFactory; - - private enum Mode { - INIT, - STRUCT, - LIST, - LISTVIEW, - MAP - } - - /** - * Constructs a new instance. - * - * @param name The name of the writer (for tracking). - * @param container A container for the data field to be written. - * @param unionEnabled Unused. - * @param caseSensitive Whether field names are case-sensitive (if false field names will be - * lowercase. - */ - public ComplexWriterImpl( - String name, NonNullableStructVector container, boolean unionEnabled, boolean caseSensitive) { - this.name = name; - this.container = container; - this.unionEnabled = unionEnabled; - nullableStructWriterFactory = - caseSensitive - ? NullableStructWriterFactory.getNullableCaseSensitiveStructWriterFactoryInstance() - : NullableStructWriterFactory.getNullableStructWriterFactoryInstance(); - } - - public ComplexWriterImpl(String name, NonNullableStructVector container, boolean unionEnabled) { - this(name, container, unionEnabled, false); - } - - public ComplexWriterImpl(String name, NonNullableStructVector container) { - this(name, container, false); - } - - @Override - public Field getField() { - return container.getField(); - } - - @Override - public int getValueCapacity() { - return container.getValueCapacity(); - } - - private void check(Mode... modes) { - StateTool.check(mode, modes); - } - - @Override - public void reset() { - setPosition(0); - } - - @Override - public void close() throws Exception { - clear(); - structRoot.close(); - if (listRoot != null) { - listRoot.close(); - } - if (listViewRoot != null) { - listViewRoot.close(); - } - } - - @Override - public void clear() { - switch (mode) { - case STRUCT: - structRoot.clear(); - break; - case LIST: - listRoot.clear(); - break; - case LISTVIEW: - listViewRoot.clear(); - break; - case MAP: - mapRoot.clear(); - break; - default: - break; - } - } - - @Override - public void setValueCount(int count) { - switch (mode) { - case STRUCT: - structRoot.setValueCount(count); - break; - case LIST: - listRoot.setValueCount(count); - break; - case LISTVIEW: - listViewRoot.setValueCount(count); - break; - case MAP: - mapRoot.setValueCount(count); - break; - default: - break; - } - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - switch (mode) { - case STRUCT: - structRoot.setPosition(index); - break; - case LIST: - listRoot.setPosition(index); - break; - case LISTVIEW: - listViewRoot.setPosition(index); - break; - case MAP: - mapRoot.setPosition(index); - break; - default: - break; - } - } - - /** - * Returns a StructWriter, initializing it necessary from the constructor this instance was - * constructed with. - */ - public StructWriter directStruct() { - Preconditions.checkArgument(name == null); - - switch (mode) { - case INIT: - structRoot = nullableStructWriterFactory.build((StructVector) container); - structRoot.setPosition(idx()); - mode = Mode.STRUCT; - break; - - case STRUCT: - break; - - default: - check(Mode.INIT, Mode.STRUCT); - } - - return structRoot; - } - - @Override - public StructWriter rootAsStruct() { - switch (mode) { - case INIT: - // TODO allow dictionaries in complex types - StructVector struct = container.addOrGetStruct(name); - structRoot = nullableStructWriterFactory.build(struct); - structRoot.setPosition(idx()); - mode = Mode.STRUCT; - break; - - case STRUCT: - break; - - default: - check(Mode.INIT, Mode.STRUCT); - } - - return structRoot; - } - - @Override - public void allocate() { - if (structRoot != null) { - structRoot.allocate(); - } else if (listRoot != null) { - listRoot.allocate(); - } - } - - @Override - public ListWriter rootAsList() { - switch (mode) { - case INIT: - int vectorCount = container.size(); - // TODO allow dictionaries in complex types - ListVector listVector = container.addOrGetList(name); - if (container.size() > vectorCount) { - listVector.allocateNew(); - } - listRoot = new UnionListWriter(listVector, nullableStructWriterFactory); - listRoot.setPosition(idx()); - mode = Mode.LIST; - break; - - case LIST: - break; - - default: - check(Mode.INIT, Mode.STRUCT); - } - - return listRoot; - } - - @Override - public ListWriter rootAsListView() { - switch (mode) { - case INIT: - int vectorCount = container.size(); - // TODO allow dictionaries in complex types - ListViewVector listVector = container.addOrGetListView(name); - if (container.size() > vectorCount) { - listVector.allocateNew(); - } - listViewRoot = new UnionListViewWriter(listVector, nullableStructWriterFactory); - listViewRoot.setPosition(idx()); - mode = Mode.LISTVIEW; - break; - - case LISTVIEW: - break; - - default: - check(Mode.INIT, Mode.STRUCT); - } - - return listViewRoot; - } - - @Override - public MapWriter rootAsMap(boolean keysSorted) { - switch (mode) { - case INIT: - int vectorCount = container.size(); - // TODO allow dictionaries in complex types - MapVector mapVector = container.addOrGetMap(name, keysSorted); - if (container.size() > vectorCount) { - mapVector.allocateNew(); - } - mapRoot = new UnionMapWriter(mapVector); - mapRoot.setPosition(idx()); - mode = Mode.MAP; - break; - - case MAP: - break; - - default: - check(Mode.INIT, Mode.STRUCT); - } - - return mapRoot; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java deleted file mode 100644 index 10dcadc4f9eac..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.types.pojo.Field; - -/** - * An {@link org.apache.arrow.vector.complex.reader.FieldReader} for reading nullable struct - * vectors. - */ -public class NullableStructReaderImpl extends SingleStructReaderImpl { - - private StructVector nullableStructVector; - - public NullableStructReaderImpl(NonNullableStructVector vector) { - super(vector); - this.nullableStructVector = (StructVector) vector; - } - - @Override - public Field getField() { - return nullableStructVector.getField(); - } - - @Override - public void copyAsValue(StructWriter writer) { - NullableStructWriter impl = (NullableStructWriter) writer; - impl.container.copyFromSafe(idx(), impl.idx(), nullableStructVector); - } - - @Override - public void copyAsField(String name, StructWriter writer) { - NullableStructWriter impl = (NullableStructWriter) writer.struct(name); - impl.container.copyFromSafe(idx(), impl.idx(), nullableStructVector); - } - - @Override - public boolean isSet() { - return !nullableStructVector.isNull(idx()); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java deleted file mode 100644 index cc945bd779546..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.complex.StructVector; - -/** - * A factory for {@link NullableStructWriter} instances. The factory allows for configuring if field - * names should be considered case-sensitive. - */ -public class NullableStructWriterFactory { - private final boolean caseSensitive; - private static final NullableStructWriterFactory nullableStructWriterFactory = - new NullableStructWriterFactory(false); - private static final NullableStructWriterFactory nullableCaseSensitiveWriterFactory = - new NullableStructWriterFactory(true); - - public NullableStructWriterFactory(boolean caseSensitive) { - this.caseSensitive = caseSensitive; - } - - /** Creates a new instance. */ - public NullableStructWriter build(StructVector container) { - return this.caseSensitive - ? new NullableCaseSensitiveStructWriter(container) - : new NullableStructWriter(container); - } - - public static NullableStructWriterFactory getNullableStructWriterFactoryInstance() { - return nullableStructWriterFactory; - } - - public static NullableStructWriterFactory getNullableCaseSensitiveStructWriterFactoryInstance() { - return nullableCaseSensitiveWriterFactory; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java deleted file mode 100644 index 9442afde34303..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.complex.AbstractContainerVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.types.Types.MinorType; - -/** An implementation of {@link AbstractFieldReader} for lists vectors. */ -@SuppressWarnings("unused") -public class SingleListReaderImpl extends AbstractFieldReader { - - private final String name; - private final AbstractContainerVector container; - private FieldReader reader; - - /** - * Constructs a new instance. - * - * @param name The name of field to read in container. - * @param container The container holding a list. - */ - public SingleListReaderImpl(String name, AbstractContainerVector container) { - super(); - this.name = name; - this.container = container; - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - if (reader != null) { - reader.setPosition(index); - } - } - - @Override - public Object readObject() { - return reader.readObject(); - } - - @Override - public FieldReader reader() { - if (reader == null) { - reader = container.getChild(name).getReader(); - setPosition(idx()); - } - return reader; - } - - @Override - public MinorType getMinorType() { - return MinorType.LIST; - } - - @Override - public boolean isSet() { - return false; - } - - @Override - public void copyAsValue(ListWriter writer) { - throw new UnsupportedOperationException( - "Generic list copying not yet supported. Please resolve to typed list."); - } - - @Override - public void copyAsField(String name, StructWriter writer) { - throw new UnsupportedOperationException( - "Generic list copying not yet supported. Please resolve to typed list."); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java deleted file mode 100644 index 65e80fcfac06e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; - -/** - * {@link FieldReader} for a single {@link org.apache.arrow.vector.complex.NonNullableStructVector}. - */ -@SuppressWarnings("unused") -public class SingleStructReaderImpl extends AbstractFieldReader { - - private final NonNullableStructVector vector; - private final Map fields = new HashMap<>(); - - public SingleStructReaderImpl(NonNullableStructVector vector) { - this.vector = vector; - } - - private void setChildrenPosition(int index) { - for (FieldReader r : fields.values()) { - r.setPosition(index); - } - } - - @Override - public Field getField() { - return vector.getField(); - } - - @Override - public FieldReader reader(String name) { - FieldReader reader = fields.get(name); - if (reader == null) { - ValueVector child = vector.getChild(name); - if (child == null) { - reader = NullReader.INSTANCE; - } else { - reader = child.getReader(); - } - fields.put(name, reader); - reader.setPosition(idx()); - } - return reader; - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - for (FieldReader r : fields.values()) { - r.setPosition(index); - } - } - - @Override - public Object readObject() { - return vector.getObject(idx()); - } - - @Override - public MinorType getMinorType() { - return MinorType.STRUCT; - } - - @Override - public boolean isSet() { - return true; - } - - @Override - public java.util.Iterator iterator() { - return vector.fieldNameIterator(); - } - - @Override - public void copyAsValue(StructWriter writer) { - SingleStructWriter impl = (SingleStructWriter) writer; - impl.container.copyFromSafe(idx(), impl.idx(), vector); - } - - @Override - public void copyAsField(String name, StructWriter writer) { - SingleStructWriter impl = (SingleStructWriter) writer.struct(name); - impl.container.copyFromSafe(idx(), impl.idx(), vector); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java deleted file mode 100644 index 7dbcbf8babe00..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.complex.writer.BaseWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructOrListWriter; -import org.apache.arrow.vector.complex.writer.BigIntWriter; -import org.apache.arrow.vector.complex.writer.BitWriter; -import org.apache.arrow.vector.complex.writer.Float4Writer; -import org.apache.arrow.vector.complex.writer.Float8Writer; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.complex.writer.VarBinaryWriter; -import org.apache.arrow.vector.complex.writer.VarCharWriter; - -/** Concrete implementation of {@link StructOrListWriter}. */ -public class StructOrListWriterImpl implements StructOrListWriter { - - public final BaseWriter.StructWriter struct; - public final BaseWriter.ListWriter list; - - /** - * Constructs a new instance using a {@link BaseWriter.StructWriter} (instead of an {@link - * BaseWriter.ListWriter}). - */ - public StructOrListWriterImpl(final BaseWriter.StructWriter writer) { - this.struct = writer; - this.list = null; - } - - /** - * Constructs a new instance using a {@link BaseWriter.ListWriter} (instead of a {@link - * BaseWriter.StructWriter}). - */ - public StructOrListWriterImpl(final BaseWriter.ListWriter writer) { - this.struct = null; - this.list = writer; - } - - /** Start writing to either the list or the struct. */ - public void start() { - if (struct != null) { - struct.start(); - } else { - list.startList(); - } - } - - /** Finish writing to the list or struct. */ - public void end() { - if (struct != null) { - struct.end(); - } else { - list.endList(); - } - } - - /** Creates a new writer for a struct with the given name. */ - public StructOrListWriter struct(final String name) { - assert struct != null; - return new StructOrListWriterImpl(struct.struct(name)); - } - - /** - * Creates a new writer for a list of structs. - * - * @param name Unused. - * @deprecated use {@link #listOfStruct(String)} instead. - */ - @Deprecated - public StructOrListWriter listoftstruct(final String name) { - return listOfStruct(name); - } - - /** - * Creates a new writer for a list of structs. - * - * @param name Unused. - */ - public StructOrListWriter listOfStruct(final String name) { - assert list != null; - return new StructOrListWriterImpl(list.struct()); - } - - public StructOrListWriter list(final String name) { - assert struct != null; - return new StructOrListWriterImpl(struct.list(name)); - } - - public boolean isStructWriter() { - return struct != null; - } - - public boolean isListWriter() { - return list != null; - } - - public VarCharWriter varChar(final String name) { - return (struct != null) ? struct.varChar(name) : list.varChar(); - } - - public IntWriter integer(final String name) { - return (struct != null) ? struct.integer(name) : list.integer(); - } - - public BigIntWriter bigInt(final String name) { - return (struct != null) ? struct.bigInt(name) : list.bigInt(); - } - - public Float4Writer float4(final String name) { - return (struct != null) ? struct.float4(name) : list.float4(); - } - - public Float8Writer float8(final String name) { - return (struct != null) ? struct.float8(name) : list.float8(); - } - - public BitWriter bit(final String name) { - return (struct != null) ? struct.bit(name) : list.bit(); - } - - public VarBinaryWriter binary(final String name) { - return (struct != null) ? struct.varBinary(name) : list.varBinary(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java deleted file mode 100644 index a2bad12183d69..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.holders.UnionHolder; -import org.apache.arrow.vector.types.Types.MinorType; - -/** Reader for fixed size list vectors. */ -public class UnionFixedSizeListReader extends AbstractFieldReader { - - private final FixedSizeListVector vector; - private final ValueVector data; - private final int listSize; - - private int currentOffset; - - /** Constructs a new instance that reads data in vector. */ - public UnionFixedSizeListReader(FixedSizeListVector vector) { - this.vector = vector; - this.data = vector.getDataVector(); - this.listSize = vector.getListSize(); - } - - @Override - public boolean isSet() { - return !vector.isNull(idx()); - } - - @Override - public FieldReader reader() { - return data.getReader(); - } - - @Override - public Object readObject() { - return vector.getObject(idx()); - } - - @Override - public MinorType getMinorType() { - return vector.getMinorType(); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - data.getReader().setPosition(index * listSize); - currentOffset = 0; - } - - @Override - public void read(int index, UnionHolder holder) { - setPosition(idx()); - for (int i = -1; i < index; i++) { - if (!next()) { - throw new IndexOutOfBoundsException("Requested " + index + ", size " + listSize); - } - } - holder.reader = data.getReader(); - holder.isSet = vector.isNull(idx()) ? 0 : 1; - } - - @Override - public int size() { - return listSize; - } - - @Override - public boolean next() { - if (currentOffset < listSize) { - data.getReader().setPosition(idx() * listSize + currentOffset++); - return true; - } else { - return false; - } - } - - @Override - public void copyAsValue(ListWriter writer) { - ComplexCopier.copy(this, (FieldWriter) writer); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java deleted file mode 100644 index be236c31662bb..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.holders.UnionHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; - -/** {@link FieldReader} for list of union types. */ -public class UnionLargeListReader extends AbstractFieldReader { - - private LargeListVector vector; - private ValueVector data; - private static final long OFFSET_WIDTH = 8L; - - public UnionLargeListReader(LargeListVector vector) { - this.vector = vector; - this.data = vector.getDataVector(); - } - - @Override - public Field getField() { - return vector.getField(); - } - - @Override - public boolean isSet() { - return !vector.isNull(idx()); - } - - private long currentOffset; - private long maxOffset; - - @Override - public void setPosition(int index) { - super.setPosition(index); - currentOffset = vector.getOffsetBuffer().getLong((long) index * OFFSET_WIDTH) - 1; - maxOffset = vector.getOffsetBuffer().getLong(((long) index + 1L) * OFFSET_WIDTH); - } - - @Override - public FieldReader reader() { - return data.getReader(); - } - - @Override - public Object readObject() { - return vector.getObject(idx()); - } - - @Override - public MinorType getMinorType() { - return MinorType.LARGELIST; - } - - @Override - public void read(int index, UnionHolder holder) { - setPosition(index); - for (int i = -1; i < index; i++) { - next(); - } - holder.reader = data.getReader(); - holder.isSet = data.getReader().isSet() ? 1 : 0; - } - - @Override - public int size() { - int size = - checkedCastToInt(maxOffset - currentOffset - 1); // todo revisit when int64 vectors are done - return size < 0 ? 0 : size; - } - - @Override - public boolean next() { - if (currentOffset + 1 < maxOffset) { - data.getReader() - .setPosition( - checkedCastToInt(++currentOffset)); // todo revisit when int64 vectors are done - return true; - } else { - return false; - } - } - - public void copyAsValue(UnionLargeListWriter writer) { - ComplexCopier.copy(this, (FieldWriter) writer); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java deleted file mode 100644 index 4bcd028de300d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.UnionHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; - -/** {@link FieldReader} for largeListView of union types. */ -public class UnionLargeListViewReader extends AbstractFieldReader { - - private final LargeListViewVector vector; - private final ValueVector data; - private int currentOffset; - private int size; - - /** - * Constructor for UnionLargeListViewReader. - * - * @param vector the vector to read from - */ - public UnionLargeListViewReader(LargeListViewVector vector) { - this.vector = vector; - this.data = vector.getDataVector(); - } - - @Override - public Field getField() { - return vector.getField(); - } - - @Override - public boolean isSet() { - return !vector.isNull(idx()); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - if (vector.getOffsetBuffer().capacity() == 0) { - currentOffset = 0; - size = 0; - } else { - currentOffset = - vector - .getOffsetBuffer() - .getInt(index * (long) BaseLargeRepeatedValueViewVector.OFFSET_WIDTH); - size = - vector.getSizeBuffer().getInt(index * (long) BaseLargeRepeatedValueViewVector.SIZE_WIDTH); - } - } - - @Override - public FieldReader reader() { - return data.getReader(); - } - - @Override - public Object readObject() { - return vector.getObject(idx()); - } - - @Override - public MinorType getMinorType() { - return MinorType.LISTVIEW; - } - - @Override - public void read(int index, UnionHolder holder) { - setPosition(idx()); - for (int i = -1; i < index; i++) { - next(); - } - holder.reader = data.getReader(); - holder.isSet = data.getReader().isSet() ? 1 : 0; - } - - @Override - public int size() { - return Math.max(size, 0); - } - - @Override - public boolean next() { - // Here, the currentOffSet keeps track of the current position in the vector inside the list at - // set position. - // And, size keeps track of the elements count in the list, so to make sure we traverse - // the full list, we need to check if the currentOffset is less than the currentOffset + size - if (currentOffset < currentOffset + size) { - data.getReader().setPosition(checkedCastToInt(currentOffset++)); - return true; - } else { - return false; - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java deleted file mode 100644 index 014608afeeb35..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.holders.UnionHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; - -/** {@link FieldReader} for list of union types. */ -public class UnionListReader extends AbstractFieldReader { - - private ListVector vector; - private ValueVector data; - private static final int OFFSET_WIDTH = 4; - - public UnionListReader(ListVector vector) { - this.vector = vector; - this.data = vector.getDataVector(); - } - - @Override - public Field getField() { - return vector.getField(); - } - - @Override - public boolean isSet() { - return !vector.isNull(idx()); - } - - private int currentOffset; - private int maxOffset; - - @Override - public void setPosition(int index) { - super.setPosition(index); - if (vector.getOffsetBuffer().capacity() == 0) { - currentOffset = 0; - maxOffset = 0; - } else { - currentOffset = vector.getOffsetBuffer().getInt(index * (long) OFFSET_WIDTH) - 1; - maxOffset = vector.getOffsetBuffer().getInt((index + 1) * (long) OFFSET_WIDTH); - } - } - - @Override - public FieldReader reader() { - return data.getReader(); - } - - @Override - public Object readObject() { - return vector.getObject(idx()); - } - - @Override - public MinorType getMinorType() { - return MinorType.LIST; - } - - @Override - public void read(int index, UnionHolder holder) { - setPosition(idx()); - for (int i = -1; i < index; i++) { - next(); - } - holder.reader = data.getReader(); - holder.isSet = data.getReader().isSet() ? 1 : 0; - } - - @Override - public int size() { - int size = maxOffset - currentOffset - 1; - return size < 0 ? 0 : size; - } - - @Override - public boolean next() { - if (currentOffset + 1 < maxOffset) { - data.getReader().setPosition(++currentOffset); - return true; - } else { - return false; - } - } - - @Override - public void copyAsValue(ListWriter writer) { - ComplexCopier.copy(this, (FieldWriter) writer); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java deleted file mode 100644 index 17ac1150fd412..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.UnionHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; - -/** {@link FieldReader} for listview of union types. */ -public class UnionListViewReader extends AbstractFieldReader { - - private final ListViewVector vector; - private final ValueVector data; - private int currentOffset; - private int size; - - /** - * Constructor for UnionListViewReader. - * - * @param vector the vector to read from - */ - public UnionListViewReader(ListViewVector vector) { - this.vector = vector; - this.data = vector.getDataVector(); - } - - @Override - public Field getField() { - return vector.getField(); - } - - @Override - public boolean isSet() { - return !vector.isNull(idx()); - } - - @Override - public void setPosition(int index) { - super.setPosition(index); - if (vector.getOffsetBuffer().capacity() == 0) { - currentOffset = 0; - size = 0; - } else { - currentOffset = - vector.getOffsetBuffer().getInt(index * (long) BaseRepeatedValueViewVector.OFFSET_WIDTH); - size = vector.getSizeBuffer().getInt(index * (long) BaseRepeatedValueViewVector.SIZE_WIDTH); - } - } - - @Override - public FieldReader reader() { - return data.getReader(); - } - - @Override - public Object readObject() { - return vector.getObject(idx()); - } - - @Override - public MinorType getMinorType() { - return MinorType.LISTVIEW; - } - - @Override - public void read(int index, UnionHolder holder) { - setPosition(idx()); - for (int i = -1; i < index; i++) { - next(); - } - holder.reader = data.getReader(); - holder.isSet = data.getReader().isSet() ? 1 : 0; - } - - @Override - public int size() { - return Math.max(size, 0); - } - - @Override - public boolean next() { - // Here, the currentOffSet keeps track of the current position in the vector inside the list at - // set position. - // And, size keeps track of the elements count in the list, so to make sure we traverse - // the full list, we need to check if the currentOffset is less than the currentOffset + size - if (currentOffset < currentOffset + size) { - data.getReader().setPosition(currentOffset++); - return true; - } else { - return false; - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java deleted file mode 100644 index b3ee62e17ae78..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.types.Types.MinorType; - -/** Reader for a MapVector. */ -public class UnionMapReader extends UnionListReader { - - private String keyName = MapVector.KEY_NAME; - private String valueName = MapVector.VALUE_NAME; - - /** - * Construct a new reader for the given vector. - * - * @param vector Vector to read from. - */ - public UnionMapReader(MapVector vector) { - super(vector); - } - - /** - * Set the key, value field names to read. - * - * @param key Field name for key. - * @param value Field name for value. - */ - public void setKeyValueNames(String key, String value) { - keyName = key; - valueName = value; - } - - /** - * Start reading a key from the map entry. - * - * @return reader that can be used to read the key. - */ - public FieldReader key() { - return reader().reader(keyName); - } - - /** - * Start reading a value element from the map entry. - * - * @return reader that can be used to read the value. - */ - public FieldReader value() { - return reader().reader(valueName); - } - - /** Return the MinorType of the reader as MAP. */ - @Override - public MinorType getMinorType() { - return MinorType.MAP; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java deleted file mode 100644 index a42d9404f4b61..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.reader; - -import org.apache.arrow.vector.complex.reader.BaseReader.ListReader; -import org.apache.arrow.vector.complex.reader.BaseReader.MapReader; -import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedListReader; -import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedMapReader; -import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedStructReader; -import org.apache.arrow.vector.complex.reader.BaseReader.ScalarReader; -import org.apache.arrow.vector.complex.reader.BaseReader.StructReader; - -/** - * Composite of all Reader types (e.g. {@link StructReader}, {@link ScalarReader}, etc). Each reader - * type is in essence a way of iterating over a {@link org.apache.arrow.vector.ValueVector}. - */ -public interface FieldReader - extends StructReader, - ListReader, - MapReader, - ScalarReader, - RepeatedStructReader, - RepeatedListReader, - RepeatedMapReader {} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java deleted file mode 100644 index 949eb35d8eb0b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.writer; - -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.ScalarWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; - -/** - * Composite of all writer types. Writers are convenience classes for incrementally adding values to - * {@linkplain org.apache.arrow.vector.ValueVector}s. - */ -public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter { - void allocate(); - - void clear(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java b/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java deleted file mode 100644 index 58d9e4db9bb9d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compression; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.util.Preconditions; - -/** - * The base class for concrete compression codecs, providing common logic for all compression - * codecs. - */ -public abstract class AbstractCompressionCodec implements CompressionCodec { - - @Override - public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) { - if (uncompressedBuffer.writerIndex() == 0L) { - // shortcut for empty buffer - ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH); - compressedBuffer.setLong(0, 0); - compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH); - uncompressedBuffer.close(); - return compressedBuffer; - } - - ArrowBuf compressedBuffer = doCompress(allocator, uncompressedBuffer); - long compressedLength = - compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH; - long uncompressedLength = uncompressedBuffer.writerIndex(); - - if (compressedLength > uncompressedLength) { - // compressed buffer is larger, send the raw buffer - compressedBuffer.close(); - // XXX: this makes a copy of uncompressedBuffer - compressedBuffer = CompressionUtil.packageRawBuffer(allocator, uncompressedBuffer); - } else { - writeUncompressedLength(compressedBuffer, uncompressedLength); - } - - uncompressedBuffer.close(); - return compressedBuffer; - } - - @Override - public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) { - Preconditions.checkArgument( - compressedBuffer.writerIndex() >= CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, - "Not enough data to decompress."); - - long decompressedLength = readUncompressedLength(compressedBuffer); - - if (decompressedLength == 0L) { - // shortcut for empty buffer - compressedBuffer.close(); - return allocator.getEmpty(); - } - - if (decompressedLength == CompressionUtil.NO_COMPRESSION_LENGTH) { - // no compression - return CompressionUtil.extractUncompressedBuffer(compressedBuffer); - } - - ArrowBuf decompressedBuffer = doDecompress(allocator, compressedBuffer); - compressedBuffer.close(); - return decompressedBuffer; - } - - protected void writeUncompressedLength(ArrowBuf compressedBuffer, long uncompressedLength) { - if (!MemoryUtil.LITTLE_ENDIAN) { - uncompressedLength = Long.reverseBytes(uncompressedLength); - } - // first 8 bytes reserved for uncompressed length, according to the specification - compressedBuffer.setLong(0, uncompressedLength); - } - - protected long readUncompressedLength(ArrowBuf compressedBuffer) { - long decompressedLength = compressedBuffer.getLong(0); - if (!MemoryUtil.LITTLE_ENDIAN) { - decompressedLength = Long.reverseBytes(decompressedLength); - } - return decompressedLength; - } - - /** - * The method that actually performs the data compression. The layout of the returned compressed - * buffer is the compressed data, plus 8 bytes reserved at the beginning of the buffer for the - * uncompressed data size. - * - *

    Please note that this method is not responsible for releasing the uncompressed buffer. - */ - protected abstract ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer); - - /** - * The method that actually performs the data decompression. The layout of the compressed buffer - * is the compressed data, plus 8 bytes at the beginning of the buffer storing the uncompressed - * data size. - * - *

    Please note that this method is not responsible for releasing the compressed buffer. - */ - protected abstract ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java b/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java deleted file mode 100644 index dd62108a84a6b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compression; - -import java.util.EnumMap; -import java.util.Map; -import java.util.ServiceLoader; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; - -/** The codec for compression/decompression. */ -public interface CompressionCodec { - - /** - * Compress a buffer. - * - * @param allocator the allocator for allocating memory for compressed buffer. - * @param uncompressedBuffer the buffer to compress. Implementation of this method should take - * care of releasing this buffer. - * @return the compressed buffer - */ - ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer); - - /** - * Decompress a buffer. - * - * @param allocator the allocator for allocating memory for decompressed buffer. - * @param compressedBuffer the buffer to be decompressed. Implementation of this method should - * take care of releasing this buffer. - * @return the decompressed buffer. - */ - ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer); - - /** - * Gets the type of the codec. - * - * @return the type of the codec. - */ - CompressionUtil.CodecType getCodecType(); - - /** Factory to create compression codec. */ - interface Factory { - /** - * This combines all the available factories registered as service providers in the module path. - * For each {@link CompressionUtil.CodecType compression codec type}, it will use whatever - * factory supports it, i.e. doesn't throw on `createCodec(type)`. If multiple factories - * registered as service providers support the same codec type, the first one encountered while - * iterating over the {@link ServiceLoader} will be selected. A codec type that is not supported - * by any registered service provider will fall back to {@link - * NoCompressionCodec.Factory#INSTANCE} for backwards compatibility. - */ - Factory INSTANCE = bestEffort(); - - /** Creates the codec based on the codec type. */ - CompressionCodec createCodec(CompressionUtil.CodecType codecType); - - /** Creates the codec based on the codec type and compression level. */ - CompressionCodec createCodec(CompressionUtil.CodecType codecType, int compressionLevel); - - private static Factory bestEffort() { - final ServiceLoader serviceLoader = ServiceLoader.load(Factory.class); - final Map factories = - new EnumMap<>(CompressionUtil.CodecType.class); - for (Factory factory : serviceLoader) { - for (CompressionUtil.CodecType codecType : CompressionUtil.CodecType.values()) { - try { - factory.createCodec(codecType); // will throw if not supported - factories.putIfAbsent(codecType, factory); - } catch (Throwable ignored) { - } - } - } - - final Factory fallback = NoCompressionCodec.Factory.INSTANCE; - return new Factory() { - @Override - public CompressionCodec createCodec(CompressionUtil.CodecType codecType) { - return factories.getOrDefault(codecType, fallback).createCodec(codecType); - } - - @Override - public CompressionCodec createCodec( - CompressionUtil.CodecType codecType, int compressionLevel) { - return factories - .getOrDefault(codecType, fallback) - .createCodec(codecType, compressionLevel); - } - }; - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java b/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java deleted file mode 100644 index 03763611e0729..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compression; - -import org.apache.arrow.flatbuf.BodyCompressionMethod; -import org.apache.arrow.flatbuf.CompressionType; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.message.ArrowBodyCompression; - -/** Utilities for data compression/decompression. */ -public class CompressionUtil { - - /** - * Compression codec types corresponding to flat buffer implementation in {@link CompressionType}. - */ - public enum CodecType { - NO_COMPRESSION(NoCompressionCodec.COMPRESSION_TYPE), - - LZ4_FRAME(org.apache.arrow.flatbuf.CompressionType.LZ4_FRAME), - - ZSTD(org.apache.arrow.flatbuf.CompressionType.ZSTD); - - private final byte type; - - CodecType(byte type) { - this.type = type; - } - - public byte getType() { - return type; - } - - /** Gets the codec type from the compression type defined in {@link CompressionType}. */ - public static CodecType fromCompressionType(byte type) { - for (CodecType codecType : values()) { - if (codecType.type == type) { - return codecType; - } - } - return NO_COMPRESSION; - } - } - - public static final long SIZE_OF_UNCOMPRESSED_LENGTH = 8L; - - /** - * Special flag to indicate no compression. (e.g. when the compressed buffer has a larger size.) - */ - public static final long NO_COMPRESSION_LENGTH = -1L; - - private CompressionUtil() {} - - /** - * Creates the {@link ArrowBodyCompression} object, given the {@link CompressionCodec}. The - * implementation of this method should depend on the values of {@link - * org.apache.arrow.flatbuf.CompressionType#names}. - */ - public static ArrowBodyCompression createBodyCompression(CompressionCodec codec) { - return new ArrowBodyCompression(codec.getCodecType().getType(), BodyCompressionMethod.BUFFER); - } - - /** Process compression by compressing the buffer as is. */ - public static ArrowBuf packageRawBuffer(BufferAllocator allocator, ArrowBuf inputBuffer) { - ArrowBuf compressedBuffer = - allocator.buffer(SIZE_OF_UNCOMPRESSED_LENGTH + inputBuffer.writerIndex()); - compressedBuffer.setLong(0, NO_COMPRESSION_LENGTH); - compressedBuffer.setBytes( - SIZE_OF_UNCOMPRESSED_LENGTH, inputBuffer, 0, inputBuffer.writerIndex()); - compressedBuffer.writerIndex(SIZE_OF_UNCOMPRESSED_LENGTH + inputBuffer.writerIndex()); - return compressedBuffer; - } - - /** Process decompression by slicing the buffer that contains the uncompressed bytes. */ - public static ArrowBuf extractUncompressedBuffer(ArrowBuf inputBuffer) { - return inputBuffer.slice( - SIZE_OF_UNCOMPRESSED_LENGTH, inputBuffer.writerIndex() - SIZE_OF_UNCOMPRESSED_LENGTH); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java b/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java deleted file mode 100644 index 4debce335aa6f..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compression; - -import org.apache.arrow.flatbuf.BodyCompressionMethod; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.message.ArrowBodyCompression; - -/** The default compression codec that does no compression. */ -public class NoCompressionCodec implements CompressionCodec { - - public static final NoCompressionCodec INSTANCE = new NoCompressionCodec(); - - public static final byte COMPRESSION_TYPE = -1; - - public static final ArrowBodyCompression DEFAULT_BODY_COMPRESSION = - new ArrowBodyCompression(COMPRESSION_TYPE, BodyCompressionMethod.BUFFER); - - private NoCompressionCodec() {} - - @Override - public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) { - return uncompressedBuffer; - } - - @Override - public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) { - return compressedBuffer; - } - - @Override - public CompressionUtil.CodecType getCodecType() { - return CompressionUtil.CodecType.NO_COMPRESSION; - } - - /** The default factory that creates a {@link NoCompressionCodec}. */ - public static class Factory implements CompressionCodec.Factory { - - public static final NoCompressionCodec.Factory INSTANCE = new NoCompressionCodec.Factory(); - - @Override - public CompressionCodec createCodec(CompressionUtil.CodecType codecType) { - switch (codecType) { - case NO_COMPRESSION: - return NoCompressionCodec.INSTANCE; - case LZ4_FRAME: - case ZSTD: - throw new IllegalArgumentException( - "Please add arrow-compression module to use CommonsCompressionFactory for " - + codecType); - default: - throw new IllegalArgumentException("Unsupported codec type: " + codecType); - } - } - - @Override - public CompressionCodec createCodec(CompressionUtil.CodecType codecType, int compressionLevel) { - return createCodec(codecType); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java deleted file mode 100644 index 4a6ea9613ba1b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.dictionary; - -import java.util.Objects; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; - -/** - * A dictionary (integer to Value mapping) that is used to facilitate dictionary encoding - * compression. - */ -public class Dictionary { - - private final DictionaryEncoding encoding; - private final FieldVector dictionary; - - public Dictionary(FieldVector dictionary, DictionaryEncoding encoding) { - this.dictionary = dictionary; - this.encoding = encoding; - } - - public FieldVector getVector() { - return dictionary; - } - - public DictionaryEncoding getEncoding() { - return encoding; - } - - public ArrowType getVectorType() { - return dictionary.getField().getType(); - } - - @Override - public String toString() { - return "Dictionary " + encoding + " " + dictionary; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof Dictionary)) { - return false; - } - Dictionary that = (Dictionary) o; - return Objects.equals(encoding, that.encoding) - && new VectorEqualsVisitor().vectorEquals(that.dictionary, dictionary); - } - - @Override - public int hashCode() { - return Objects.hash(encoding, dictionary); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java deleted file mode 100644 index 4af1a8693f9ec..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.dictionary; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Encoder/decoder for Dictionary encoded {@link ValueVector}. Dictionary encoding produces an - * integer {@link ValueVector}. Each entry in the Vector is index into the dictionary which can hold - * values of any type. - */ -public class DictionaryEncoder { - - private final DictionaryHashTable hashTable; - private final Dictionary dictionary; - private final BufferAllocator allocator; - - /** Construct an instance. */ - public DictionaryEncoder(Dictionary dictionary, BufferAllocator allocator) { - this(dictionary, allocator, SimpleHasher.INSTANCE); - } - - /** Construct an instance. */ - public DictionaryEncoder( - Dictionary dictionary, BufferAllocator allocator, ArrowBufHasher hasher) { - this.dictionary = dictionary; - this.allocator = allocator; - hashTable = new DictionaryHashTable(dictionary.getVector(), hasher); - } - - /** - * Dictionary encodes a vector with a provided dictionary. The dictionary must contain all values - * in the vector. - * - * @param vector vector to encode - * @param dictionary dictionary used for encoding - * @return dictionary encoded vector - */ - public static ValueVector encode(ValueVector vector, Dictionary dictionary) { - DictionaryEncoder encoder = new DictionaryEncoder(dictionary, vector.getAllocator()); - return encoder.encode(vector); - } - - /** - * Decodes a dictionary encoded array using the provided dictionary. - * - * @param indices dictionary encoded values, must be int type - * @param dictionary dictionary used to decode the values - * @return vector with values restored from dictionary - */ - public static ValueVector decode(ValueVector indices, Dictionary dictionary) { - return decode(indices, dictionary, indices.getAllocator()); - } - - /** - * Decodes a dictionary encoded array using the provided dictionary. - * - * @param indices dictionary encoded values, must be int type - * @param dictionary dictionary used to decode the values - * @param allocator allocator the decoded values use - * @return vector with values restored from dictionary - */ - public static ValueVector decode( - ValueVector indices, Dictionary dictionary, BufferAllocator allocator) { - int count = indices.getValueCount(); - ValueVector dictionaryVector = dictionary.getVector(); - int dictionaryCount = dictionaryVector.getValueCount(); - // copy the dictionary values into the decoded vector - TransferPair transfer = dictionaryVector.getTransferPair(allocator); - transfer.getTo().allocateNewSafe(); - try { - BaseIntVector baseIntVector = (BaseIntVector) indices; - retrieveIndexVector(baseIntVector, transfer, dictionaryCount, 0, count); - ValueVector decoded = transfer.getTo(); - decoded.setValueCount(count); - return decoded; - } catch (Exception e) { - AutoCloseables.close(e, transfer.getTo()); - throw e; - } - } - - /** - * Get the indexType according to the dictionary vector valueCount. - * - * @param valueCount dictionary vector valueCount. - * @return index type. - */ - @SuppressWarnings("ComparisonOutOfRange") - public static ArrowType.Int getIndexType(int valueCount) { - Preconditions.checkArgument(valueCount >= 0); - if (valueCount <= Byte.MAX_VALUE) { - return new ArrowType.Int(8, true); - } else if (valueCount <= Character.MAX_VALUE) { - return new ArrowType.Int(16, true); - } else if (valueCount <= Integer.MAX_VALUE) { // this comparison will always evaluate to true - return new ArrowType.Int(32, true); - } else { - return new ArrowType.Int(64, true); - } - } - - /** - * Populates indices between start and end with the encoded values of vector. - * - * @param vector the vector to encode - * @param indices the index vector - * @param encoding the hash table for encoding - * @param start the start index - * @param end the end index - */ - static void buildIndexVector( - ValueVector vector, BaseIntVector indices, DictionaryHashTable encoding, int start, int end) { - - for (int i = start; i < end; i++) { - if (!vector.isNull(i)) { - // if it's null leave it null - // note: this may fail if value was not included in the dictionary - int encoded = encoding.getIndex(i, vector); - if (encoded == -1) { - throw new IllegalArgumentException( - "Dictionary encoding not defined for value:" + vector.getObject(i)); - } - indices.setWithPossibleTruncate(i, encoded); - } - } - } - - /** - * Retrieve values to target vector from index vector. - * - * @param indices the index vector - * @param transfer the {@link TransferPair} to copy dictionary data into target vector. - * @param dictionaryCount the value count of dictionary vector. - * @param start the start index - * @param end the end index - */ - static void retrieveIndexVector( - BaseIntVector indices, TransferPair transfer, int dictionaryCount, int start, int end) { - for (int i = start; i < end; i++) { - if (!indices.isNull(i)) { - int indexAsInt = (int) indices.getValueAsLong(i); - if (indexAsInt > dictionaryCount) { - throw new IllegalArgumentException( - "Provided dictionary does not contain value for index " + indexAsInt); - } - transfer.copyValueSafe(indexAsInt, i); - } - } - } - - /** Encodes a vector with the built hash table in this encoder. */ - public ValueVector encode(ValueVector vector) { - - Field valueField = vector.getField(); - FieldType indexFieldType = - new FieldType( - valueField.isNullable(), - dictionary.getEncoding().getIndexType(), - dictionary.getEncoding(), - valueField.getMetadata()); - Field indexField = new Field(valueField.getName(), indexFieldType, null); - - // vector to hold our indices (dictionary encoded values) - FieldVector createdVector = indexField.createVector(allocator); - if (!(createdVector instanceof BaseIntVector)) { - throw new IllegalArgumentException( - "Dictionary encoding does not have a valid int type:" + createdVector.getClass()); - } - - BaseIntVector indices = (BaseIntVector) createdVector; - indices.allocateNew(); - try { - buildIndexVector(vector, indices, hashTable, 0, vector.getValueCount()); - indices.setValueCount(vector.getValueCount()); - return indices; - } catch (Exception e) { - AutoCloseables.close(e, indices); - throw e; - } - } - - /** - * Decodes a vector with the dictionary in this encoder. - * - *

    {@link DictionaryEncoder#decode(ValueVector, Dictionary, BufferAllocator)} should be used - * instead if only decoding is required as it can avoid building the {@link DictionaryHashTable} - * which only makes sense when encoding. - */ - public ValueVector decode(ValueVector indices) { - return decode(indices, dictionary, allocator); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java deleted file mode 100644 index 57faf51845c4e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.dictionary; - -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; - -/** - * HashTable used for Dictionary encoding. It holds two vectors (the vector to encode and dictionary - * vector) It stores the index in dictionary vector and for a given index in encode vector, it could - * return dictionary index. - */ -public class DictionaryHashTable { - - /** Represents a null value in map. */ - static final int NULL_VALUE = -1; - - /** The default initial capacity - MUST be a power of two. */ - static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; - - /** - * The maximum capacity, used if a higher value is implicitly specified by either of the - * constructors with arguments. - */ - static final int MAXIMUM_CAPACITY = 1 << 30; - - /** The load factor used when none specified in constructor. */ - static final float DEFAULT_LOAD_FACTOR = 0.75f; - - static final DictionaryHashTable.Entry[] EMPTY_TABLE = {}; - - /** - * The table, initialized on first use, and resized as necessary. When allocated, length is always - * a power of two. - */ - transient DictionaryHashTable.Entry[] table = EMPTY_TABLE; - - /** The number of key-value mappings contained in this map. */ - transient int size; - - /** The next size value at which to resize (capacity * load factor). */ - int threshold; - - /** The load factor for the hash table. */ - final float loadFactor; - - private final ValueVector dictionary; - - private final ArrowBufHasher hasher; - - /** Constructs an empty map with the specified initial capacity and load factor. */ - public DictionaryHashTable(int initialCapacity, ValueVector dictionary, ArrowBufHasher hasher) { - if (initialCapacity < 0) { - throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); - } - if (initialCapacity > MAXIMUM_CAPACITY) { - initialCapacity = MAXIMUM_CAPACITY; - } - this.loadFactor = DEFAULT_LOAD_FACTOR; - this.threshold = initialCapacity; - - this.dictionary = dictionary; - - this.hasher = hasher; - - // build hash table - for (int i = 0; i < this.dictionary.getValueCount(); i++) { - put(i); - } - } - - public DictionaryHashTable(ValueVector dictionary, ArrowBufHasher hasher) { - this(DEFAULT_INITIAL_CAPACITY, dictionary, hasher); - } - - public DictionaryHashTable(ValueVector dictionary) { - this(dictionary, SimpleHasher.INSTANCE); - } - - /** Compute the capacity with given threshold and create init table. */ - private void inflateTable(int threshold) { - int capacity = roundUpToPowerOf2(threshold); - this.threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1); - table = new DictionaryHashTable.Entry[capacity]; - } - - /** Computes the storage location in an array for the given hashCode. */ - static int indexFor(int h, int length) { - return h & (length - 1); - } - - /** Returns a power of two size for the given size. */ - static final int roundUpToPowerOf2(int size) { - int n = size - 1; - n |= n >>> 1; - n |= n >>> 2; - n |= n >>> 4; - n |= n >>> 8; - n |= n >>> 16; - return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1; - } - - /** - * get the corresponding dictionary index with the given index in vector which to encode. - * - * @param indexInArray index in vector. - * @return dictionary vector index or -1 if no value equals. - */ - public int getIndex(int indexInArray, ValueVector toEncode) { - int hash = toEncode.hashCode(indexInArray, this.hasher); - int index = indexFor(hash, table.length); - - RangeEqualsVisitor equalVisitor = new RangeEqualsVisitor(dictionary, toEncode, null); - Range range = new Range(0, 0, 1); - - for (DictionaryHashTable.Entry e = table[index]; e != null; e = e.next) { - if (e.hash == hash) { - int dictIndex = e.index; - - range = range.setRightStart(indexInArray).setLeftStart(dictIndex); - if (equalVisitor.rangeEquals(range)) { - return dictIndex; - } - } - } - return NULL_VALUE; - } - - /** put the index of dictionary vector to build hash table. */ - private void put(int indexInDictionary) { - if (table == EMPTY_TABLE) { - inflateTable(threshold); - } - - int hash = dictionary.hashCode(indexInDictionary, this.hasher); - int i = indexFor(hash, table.length); - for (DictionaryHashTable.Entry e = table[i]; e != null; e = e.next) { - if (e.hash == hash && e.index == indexInDictionary) { - // already has this index, return - return; - } - } - - addEntry(hash, indexInDictionary, i); - } - - /** Create a new Entry at the specific position of table. */ - void createEntry(int hash, int index, int bucketIndex) { - DictionaryHashTable.Entry e = table[bucketIndex]; - table[bucketIndex] = new DictionaryHashTable.Entry(hash, index, e); - size++; - } - - /** Add Entry at the specified location of the table. */ - void addEntry(int hash, int index, int bucketIndex) { - if ((size >= threshold) && (null != table[bucketIndex])) { - resize(2 * table.length); - bucketIndex = indexFor(hash, table.length); - } - - createEntry(hash, index, bucketIndex); - } - - /** Resize table with given new capacity. */ - void resize(int newCapacity) { - DictionaryHashTable.Entry[] oldTable = table; - int oldCapacity = oldTable.length; - if (oldCapacity == MAXIMUM_CAPACITY) { - threshold = Integer.MAX_VALUE; - return; - } - - DictionaryHashTable.Entry[] newTable = new DictionaryHashTable.Entry[newCapacity]; - transfer(newTable); - table = newTable; - threshold = (int) Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1); - } - - /** - * Transfer entries into new table from old table. - * - * @param newTable new table - */ - void transfer(DictionaryHashTable.Entry[] newTable) { - int newCapacity = newTable.length; - for (DictionaryHashTable.Entry e : table) { - while (null != e) { - DictionaryHashTable.Entry next = e.next; - int i = indexFor(e.hash, newCapacity); - e.next = newTable[i]; - newTable[i] = e; - e = next; - } - } - } - - /** Returns the number of mappings in this Map. */ - public int size() { - return size; - } - - /** Removes all elements from this map, leaving it empty. */ - public void clear() { - size = 0; - for (int i = 0; i < table.length; i++) { - table[i] = null; - } - } - - /** Class to keep dictionary index data within hash table. */ - static class Entry { - // dictionary index - int index; - DictionaryHashTable.Entry next; - int hash; - - Entry(int hash, int index, DictionaryHashTable.Entry next) { - this.index = index; - this.hash = hash; - this.next = next; - } - - public final int getIndex() { - return this.index; - } - - @Override - public int hashCode() { - return hash; - } - - public final boolean equals(Object o) { - if (!(o instanceof DictionaryHashTable.Entry)) { - return false; - } - DictionaryHashTable.Entry e = (DictionaryHashTable.Entry) o; - if (index == e.getIndex()) { - return true; - } - return false; - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java deleted file mode 100644 index 85ad9b4583e1b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.dictionary; - -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.VisibleForTesting; - -/** A manager for association of dictionary IDs to their corresponding {@link Dictionary}. */ -public interface DictionaryProvider { - - /** Return the dictionary for the given ID. */ - Dictionary lookup(long id); - - /** Get all dictionary IDs. */ - Set getDictionaryIds(); - - /** Implementation of {@link DictionaryProvider} that is backed by a hash-map. */ - class MapDictionaryProvider implements AutoCloseable, DictionaryProvider { - - private final Map map; - - /** Constructs a new instance from the given dictionaries. */ - public MapDictionaryProvider(Dictionary... dictionaries) { - this.map = new HashMap<>(); - for (Dictionary dictionary : dictionaries) { - put(dictionary); - } - } - - /** - * Initialize the map structure from another provider, but with empty vectors. - * - * @param other the {@link DictionaryProvider} to copy the ids and fields from - * @param allocator allocator to create the empty vectors - */ - // This is currently called using JPype by the integration tests. - @VisibleForTesting - public void copyStructureFrom(DictionaryProvider other, BufferAllocator allocator) { - for (Long id : other.getDictionaryIds()) { - Dictionary otherDict = other.lookup(id); - Dictionary newDict = - new Dictionary( - otherDict.getVector().getField().createVector(allocator), otherDict.getEncoding()); - put(newDict); - } - } - - public void put(Dictionary dictionary) { - map.put(dictionary.getEncoding().getId(), dictionary); - } - - @Override - public final Set getDictionaryIds() { - return map.keySet(); - } - - @Override - public Dictionary lookup(long id) { - return map.get(id); - } - - @Override - public void close() { - for (Dictionary dictionary : map.values()) { - dictionary.getVector().close(); - } - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java deleted file mode 100644 index 62b9628967251..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.dictionary; - -import java.util.Collections; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.complex.BaseListVector; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** Sub fields encoder/decoder for Dictionary encoded {@link BaseListVector}. */ -public class ListSubfieldEncoder { - - private final DictionaryHashTable hashTable; - private final Dictionary dictionary; - private final BufferAllocator allocator; - - public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator) { - this(dictionary, allocator, SimpleHasher.INSTANCE); - } - - /** Construct an instance. */ - public ListSubfieldEncoder( - Dictionary dictionary, BufferAllocator allocator, ArrowBufHasher hasher) { - this.dictionary = dictionary; - this.allocator = allocator; - BaseListVector dictVector = (BaseListVector) dictionary.getVector(); - hashTable = new DictionaryHashTable(getDataVector(dictVector), hasher); - } - - private static FieldVector getDataVector(BaseListVector vector) { - return vector.getChildrenFromFields().get(0); - } - - private static BaseListVector cloneVector(BaseListVector vector, BufferAllocator allocator) { - - final FieldType fieldType = vector.getField().getFieldType(); - BaseListVector cloned = - (BaseListVector) - fieldType.createNewSingleVector( - vector.getField().getName(), allocator, /*schemaCallBack=*/ null); - - final ArrowFieldNode fieldNode = - new ArrowFieldNode(vector.getValueCount(), vector.getNullCount()); - cloned.loadFieldBuffers(fieldNode, vector.getFieldBuffers()); - - return cloned; - } - - /** - * Dictionary encodes subfields for complex vector with a provided dictionary. The dictionary must - * contain all values in the sub fields vector. - * - * @param vector vector to encode - * @return dictionary encoded vector - */ - public BaseListVector encodeListSubField(BaseListVector vector) { - final int valueCount = vector.getValueCount(); - - FieldType indexFieldType = - new FieldType( - vector.getField().isNullable(), - dictionary.getEncoding().getIndexType(), - dictionary.getEncoding(), - vector.getField().getMetadata()); - Field valueField = new Field(vector.getField().getName(), indexFieldType, null); - - // clone list vector and initialize data vector - BaseListVector encoded = cloneVector(vector, allocator); - try { - encoded.initializeChildrenFromFields(Collections.singletonList(valueField)); - BaseIntVector indices = (BaseIntVector) getDataVector(encoded); - - ValueVector dataVector = getDataVector(vector); - for (int i = 0; i < valueCount; i++) { - if (!vector.isNull(i)) { - int start = vector.getElementStartIndex(i); - int end = vector.getElementEndIndex(i); - - DictionaryEncoder.buildIndexVector(dataVector, indices, hashTable, start, end); - } - } - - return encoded; - } catch (Exception e) { - AutoCloseables.close(e, encoded); - throw e; - } - } - - /** - * Decodes a dictionary subfields encoded vector using the provided dictionary. - * - *

    {@link ListSubfieldEncoder#decodeListSubField(BaseListVector, Dictionary, BufferAllocator)} - * should be used instead if only decoding is required as it can avoid building the {@link - * DictionaryHashTable} which only makes sense when encoding. - * - * @param vector dictionary encoded vector, its data vector must be int type - * @return vector with values restored from dictionary - */ - public BaseListVector decodeListSubField(BaseListVector vector) { - return decodeListSubField(vector, dictionary, allocator); - } - - /** - * Decodes a dictionary subfields encoded vector using the provided dictionary. - * - * @param vector dictionary encoded vector, its data vector must be int type - * @param dictionary dictionary used to decode the values - * @param allocator allocator the decoded values use - * @return vector with values restored from dictionary - */ - public static BaseListVector decodeListSubField( - BaseListVector vector, Dictionary dictionary, BufferAllocator allocator) { - int valueCount = vector.getValueCount(); - BaseListVector dictionaryVector = (BaseListVector) dictionary.getVector(); - int dictionaryValueCount = getDataVector(dictionaryVector).getValueCount(); - - // clone list vector and initialize data vector - BaseListVector decoded = cloneVector(vector, allocator); - try { - Field dataVectorField = getDataVector(dictionaryVector).getField(); - decoded.initializeChildrenFromFields(Collections.singletonList(dataVectorField)); - - // get data vector - ValueVector dataVector = getDataVector(decoded); - - TransferPair transfer = getDataVector(dictionaryVector).makeTransferPair(dataVector); - BaseIntVector indices = (BaseIntVector) getDataVector(vector); - - for (int i = 0; i < valueCount; i++) { - - if (!vector.isNull(i)) { - int start = vector.getElementStartIndex(i); - int end = vector.getElementEndIndex(i); - - DictionaryEncoder.retrieveIndexVector( - indices, transfer, dictionaryValueCount, start, end); - } - } - return decoded; - } catch (Exception e) { - AutoCloseables.close(e, decoded); - throw e; - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java deleted file mode 100644 index dc25bc32685dd..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.dictionary; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Sub fields encoder/decoder for Dictionary encoded {@link StructVector}. Notes that child vectors - * within struct vector can either be dictionary encodable or not. - */ -public class StructSubfieldEncoder { - - private final BufferAllocator allocator; - - private final DictionaryProvider.MapDictionaryProvider provider; - private final Map dictionaryIdToHashTable; - - /** Construct an instance. */ - public StructSubfieldEncoder( - BufferAllocator allocator, DictionaryProvider.MapDictionaryProvider provider) { - this(allocator, provider, SimpleHasher.INSTANCE); - } - - /** Construct an instance. */ - public StructSubfieldEncoder( - BufferAllocator allocator, - DictionaryProvider.MapDictionaryProvider provider, - ArrowBufHasher hasher) { - - this.allocator = allocator; - this.provider = provider; - - this.dictionaryIdToHashTable = new HashMap<>(); - - provider - .getDictionaryIds() - .forEach( - id -> - dictionaryIdToHashTable.put( - id, new DictionaryHashTable(provider.lookup(id).getVector(), hasher))); - } - - private static FieldVector getChildVector(StructVector vector, int index) { - return vector.getChildrenFromFields().get(index); - } - - private static StructVector cloneVector(StructVector vector, BufferAllocator allocator) { - - final FieldType fieldType = vector.getField().getFieldType(); - StructVector cloned = - (StructVector) - fieldType.createNewSingleVector( - vector.getField().getName(), allocator, /*schemaCallback=*/ null); - - final ArrowFieldNode fieldNode = - new ArrowFieldNode(vector.getValueCount(), vector.getNullCount()); - cloned.loadFieldBuffers(fieldNode, vector.getFieldBuffers()); - - return cloned; - } - - /** - * Dictionary encodes subfields for complex vector with a provided dictionary. The dictionary must - * contain all values in the sub fields vector. - * - * @param vector vector to encode - * @param columnToDictionaryId the mappings between child vector index and dictionary id. A null - * dictionary id indicates the child vector is not encodable. - * @return dictionary encoded vector - */ - public StructVector encode(StructVector vector, Map columnToDictionaryId) { - final int valueCount = vector.getValueCount(); - final int childCount = vector.getChildrenFromFields().size(); - - List childrenFields = new ArrayList<>(); - - // initialize child fields. - for (int i = 0; i < childCount; i++) { - FieldVector childVector = getChildVector(vector, i); - Long dictionaryId = columnToDictionaryId.get(i); - // A null dictionaryId indicates the child vector shouldn't be encoded. - if (dictionaryId == null) { - childrenFields.add(childVector.getField()); - } else { - Dictionary dictionary = provider.lookup(dictionaryId); - Preconditions.checkNotNull(dictionary, "Dictionary not found with id:" + dictionaryId); - FieldType indexFieldType = - new FieldType( - childVector.getField().isNullable(), - dictionary.getEncoding().getIndexType(), - dictionary.getEncoding()); - childrenFields.add( - new Field(childVector.getField().getName(), indexFieldType, /*children=*/ null)); - } - } - - // clone list vector and initialize data vector - StructVector encoded = cloneVector(vector, allocator); - try { - encoded.initializeChildrenFromFields(childrenFields); - encoded.setValueCount(valueCount); - - for (int index = 0; index < childCount; index++) { - FieldVector childVector = getChildVector(vector, index); - FieldVector encodedChildVector = getChildVector(encoded, index); - Long dictionaryId = columnToDictionaryId.get(index); - if (dictionaryId != null) { - BaseIntVector indices = (BaseIntVector) encodedChildVector; - DictionaryEncoder.buildIndexVector( - childVector, indices, dictionaryIdToHashTable.get(dictionaryId), 0, valueCount); - } else { - childVector.makeTransferPair(encodedChildVector).splitAndTransfer(0, valueCount); - } - } - - return encoded; - } catch (Exception e) { - AutoCloseables.close(e, encoded); - throw e; - } - } - - /** - * Decodes a dictionary subfields encoded vector using the provided dictionary. - * - *

    {@link StructSubfieldEncoder#decode(StructVector, DictionaryProvider.MapDictionaryProvider, - * BufferAllocator)} should be used instead if only decoding is required as it can avoid building - * the {@link DictionaryHashTable} which only makes sense when encoding. - * - * @param vector dictionary encoded vector, its child vector must be int type - * @return vector with values restored from dictionary - */ - public StructVector decode(StructVector vector) { - return decode(vector, provider, allocator); - } - - /** - * Decodes a dictionary subfields encoded vector using the provided dictionary. - * - * @param vector dictionary encoded vector, its data vector must be int type - * @param provider dictionary provider used to decode the values - * @param allocator allocator the decoded values use - * @return vector with values restored from dictionary - */ - public static StructVector decode( - StructVector vector, - DictionaryProvider.MapDictionaryProvider provider, - BufferAllocator allocator) { - final int valueCount = vector.getValueCount(); - final int childCount = vector.getChildrenFromFields().size(); - - // clone list vector and initialize child vectors - StructVector decoded = cloneVector(vector, allocator); - try { - List childFields = new ArrayList<>(); - for (int i = 0; i < childCount; i++) { - FieldVector childVector = getChildVector(vector, i); - Dictionary dictionary = getChildVectorDictionary(childVector, provider); - // childVector is not encoded. - if (dictionary == null) { - childFields.add(childVector.getField()); - } else { - childFields.add(dictionary.getVector().getField()); - } - } - decoded.initializeChildrenFromFields(childFields); - decoded.setValueCount(valueCount); - - for (int index = 0; index < childCount; index++) { - // get child vector - FieldVector childVector = getChildVector(vector, index); - FieldVector decodedChildVector = getChildVector(decoded, index); - Dictionary dictionary = getChildVectorDictionary(childVector, provider); - if (dictionary == null) { - childVector.makeTransferPair(decodedChildVector).splitAndTransfer(0, valueCount); - } else { - TransferPair transfer = dictionary.getVector().makeTransferPair(decodedChildVector); - BaseIntVector indices = (BaseIntVector) childVector; - - DictionaryEncoder.retrieveIndexVector(indices, transfer, valueCount, 0, valueCount); - } - } - - return decoded; - } catch (Exception e) { - AutoCloseables.close(e, decoded); - throw e; - } - } - - /** Get the child vector dictionary, return null if not dictionary encoded. */ - private static Dictionary getChildVectorDictionary( - FieldVector childVector, DictionaryProvider.MapDictionaryProvider provider) { - DictionaryEncoding dictionaryEncoding = childVector.getField().getDictionary(); - if (dictionaryEncoding != null) { - Dictionary dictionary = provider.lookup(dictionaryEncoding.getId()); - Preconditions.checkNotNull(dictionary, "Dictionary not found with id:" + dictionary); - return dictionary; - } - return null; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java b/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java deleted file mode 100644 index 2349a7d4bc28d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.extension; - -/** The extension metadata was malformed. */ -public class InvalidExtensionMetadataException extends RuntimeException { - public InvalidExtensionMetadataException(String message) { - super(message); - } - - public InvalidExtensionMetadataException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java deleted file mode 100644 index ca56214fdac77..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.extension; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; -import java.util.Collections; -import java.util.Objects; -import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float2Vector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.ViewVarBinaryVector; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; - -/** - * Opaque is a placeholder for a type from an external (usually non-Arrow) system that could not be - * interpreted. - */ -public class OpaqueType extends ArrowType.ExtensionType { - private static final AtomicBoolean registered = new AtomicBoolean(false); - public static final String EXTENSION_NAME = "arrow.opaque"; - private final ArrowType storageType; - private final String typeName; - private final String vendorName; - - /** Register the extension type so it can be used globally. */ - public static void ensureRegistered() { - if (!registered.getAndSet(true)) { - // The values don't matter, we just need an instance - ExtensionTypeRegistry.register(new OpaqueType(Types.MinorType.NULL.getType(), "", "")); - } - } - - /** - * Create a new type instance. - * - * @param storageType The underlying Arrow type. - * @param typeName The name of the unknown type. - * @param vendorName The name of the originating system of the unknown type. - */ - public OpaqueType(ArrowType storageType, String typeName, String vendorName) { - this.storageType = Objects.requireNonNull(storageType, "storageType"); - this.typeName = Objects.requireNonNull(typeName, "typeName"); - this.vendorName = Objects.requireNonNull(vendorName, "vendorName"); - } - - @Override - public ArrowType storageType() { - return storageType; - } - - public String typeName() { - return typeName; - } - - public String vendorName() { - return vendorName; - } - - @Override - public String extensionName() { - return EXTENSION_NAME; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other != null - && EXTENSION_NAME.equals(other.extensionName()) - && other instanceof OpaqueType - && storageType.equals(other.storageType()) - && typeName.equals(((OpaqueType) other).typeName()) - && vendorName.equals(((OpaqueType) other).vendorName()); - } - - @Override - public String serialize() { - ObjectMapper mapper = new ObjectMapper(); - ObjectNode object = mapper.createObjectNode(); - object.put("type_name", typeName); - object.put("vendor_name", vendorName); - try { - return mapper.writeValueAsString(object); - } catch (JsonProcessingException e) { - throw new RuntimeException("Could not serialize " + this, e); - } - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - ObjectMapper mapper = new ObjectMapper(); - JsonNode object; - try { - object = mapper.readTree(serializedData); - } catch (JsonProcessingException e) { - throw new InvalidExtensionMetadataException("Extension metadata is invalid", e); - } - JsonNode typeName = object.get("type_name"); - JsonNode vendorName = object.get("vendor_name"); - if (typeName == null) { - throw new InvalidExtensionMetadataException("typeName is missing"); - } - if (vendorName == null) { - throw new InvalidExtensionMetadataException("vendorName is missing"); - } - if (!typeName.isTextual()) { - throw new InvalidExtensionMetadataException("typeName should be string, was " + typeName); - } - if (!vendorName.isTextual()) { - throw new InvalidExtensionMetadataException("vendorName should be string, was " + vendorName); - } - return new OpaqueType(storageType, typeName.asText(), vendorName.asText()); - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - // XXX: fieldType is supposed to be the extension type - final Field field = new Field(name, fieldType, Collections.emptyList()); - final FieldVector underlyingVector = - storageType.accept(new UnderlyingVectorTypeVisitor(name, allocator)); - return new OpaqueVector(field, allocator, underlyingVector); - } - - @Override - public int hashCode() { - return Objects.hash(super.hashCode(), storageType, typeName, vendorName); - } - - @Override - public String toString() { - return "OpaqueType(" - + storageType - + ", typeName='" - + typeName - + '\'' - + ", vendorName='" - + vendorName - + '\'' - + ')'; - } - - private static class UnderlyingVectorTypeVisitor implements ArrowTypeVisitor { - private final String name; - private final BufferAllocator allocator; - - UnderlyingVectorTypeVisitor(String name, BufferAllocator allocator) { - this.name = name; - this.allocator = allocator; - } - - @Override - public FieldVector visit(Null type) { - return new NullVector(name); - } - - private RuntimeException unsupported(ArrowType type) { - throw new UnsupportedOperationException( - "OpaqueType#getUnderlyingVector is not supported for storage type: " + type); - } - - @Override - public FieldVector visit(Struct type) { - throw unsupported(type); - } - - @Override - public FieldVector visit(List type) { - throw unsupported(type); - } - - @Override - public FieldVector visit(LargeList type) { - throw unsupported(type); - } - - @Override - public FieldVector visit(FixedSizeList type) { - throw unsupported(type); - } - - @Override - public FieldVector visit(Union type) { - throw unsupported(type); - } - - @Override - public FieldVector visit(Map type) { - throw unsupported(type); - } - - @Override - public FieldVector visit(Int type) { - return new IntVector(name, allocator); - } - - @Override - public FieldVector visit(FloatingPoint type) { - switch (type.getPrecision()) { - case HALF: - return new Float2Vector(name, allocator); - case SINGLE: - return new Float4Vector(name, allocator); - case DOUBLE: - return new Float8Vector(name, allocator); - default: - throw unsupported(type); - } - } - - @Override - public FieldVector visit(Utf8 type) { - return new VarCharVector(name, allocator); - } - - @Override - public FieldVector visit(Utf8View type) { - return new ViewVarCharVector(name, allocator); - } - - @Override - public FieldVector visit(LargeUtf8 type) { - return new LargeVarCharVector(name, allocator); - } - - @Override - public FieldVector visit(Binary type) { - return new VarBinaryVector(name, allocator); - } - - @Override - public FieldVector visit(BinaryView type) { - return new ViewVarBinaryVector(name, allocator); - } - - @Override - public FieldVector visit(LargeBinary type) { - return new LargeVarBinaryVector(name, allocator); - } - - @Override - public FieldVector visit(FixedSizeBinary type) { - return new FixedSizeBinaryVector(Field.nullable(name, type), allocator); - } - - @Override - public FieldVector visit(Bool type) { - return new BitVector(name, allocator); - } - - @Override - public FieldVector visit(Decimal type) { - if (type.getBitWidth() == 128) { - return new DecimalVector(Field.nullable(name, type), allocator); - } else if (type.getBitWidth() == 256) { - return new Decimal256Vector(Field.nullable(name, type), allocator); - } - throw unsupported(type); - } - - @Override - public FieldVector visit(Date type) { - switch (type.getUnit()) { - case DAY: - return new DateDayVector(name, allocator); - case MILLISECOND: - return new DateMilliVector(name, allocator); - default: - throw unsupported(type); - } - } - - @Override - public FieldVector visit(Time type) { - switch (type.getUnit()) { - case SECOND: - return new TimeSecVector(name, allocator); - case MILLISECOND: - return new TimeMilliVector(name, allocator); - case MICROSECOND: - return new TimeMicroVector(name, allocator); - case NANOSECOND: - return new TimeNanoVector(name, allocator); - default: - throw unsupported(type); - } - } - - @Override - public FieldVector visit(Timestamp type) { - if (type.getTimezone() == null || type.getTimezone().isEmpty()) { - switch (type.getUnit()) { - case SECOND: - return new TimeStampSecVector(Field.nullable(name, type), allocator); - case MILLISECOND: - return new TimeStampMilliVector(Field.nullable(name, type), allocator); - case MICROSECOND: - return new TimeStampMicroVector(Field.nullable(name, type), allocator); - case NANOSECOND: - return new TimeStampNanoVector(Field.nullable(name, type), allocator); - default: - throw unsupported(type); - } - } - switch (type.getUnit()) { - case SECOND: - return new TimeStampSecTZVector(Field.nullable(name, type), allocator); - case MILLISECOND: - return new TimeStampMilliTZVector(Field.nullable(name, type), allocator); - case MICROSECOND: - return new TimeStampMicroTZVector(Field.nullable(name, type), allocator); - case NANOSECOND: - return new TimeStampNanoTZVector(Field.nullable(name, type), allocator); - default: - throw unsupported(type); - } - } - - @Override - public FieldVector visit(Interval type) { - switch (type.getUnit()) { - case YEAR_MONTH: - return new IntervalYearVector(name, allocator); - case DAY_TIME: - return new IntervalDayVector(name, allocator); - case MONTH_DAY_NANO: - return new IntervalMonthDayNanoVector(name, allocator); - default: - throw unsupported(type); - } - } - - @Override - public FieldVector visit(Duration type) { - return new DurationVector(Field.nullable(name, type), allocator); - } - - @Override - public FieldVector visit(ListView type) { - throw unsupported(type); - } - - @Override - public FieldVector visit(LargeListView type) { - throw unsupported(type); - } - - @Override - public FieldVector visit(RunEndEncoded type) { - throw unsupported(type); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java deleted file mode 100644 index 00eb9a984e6bf..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.extension; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.types.pojo.Field; - -/** - * Opaque is a wrapper for (usually binary) data from an external (often non-Arrow) system that - * could not be interpreted. - */ -public class OpaqueVector extends ExtensionTypeVector - implements ValueIterableVector { - private final Field field; - - public OpaqueVector(Field field, BufferAllocator allocator, FieldVector underlyingVector) { - super(field, allocator, underlyingVector); - this.field = field; - } - - @Override - public Field getField() { - return field; - } - - @Override - public Object getObject(int index) { - return getUnderlyingVector().getObject(index); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java b/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java deleted file mode 100644 index 8c4ecd862819a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.holders; - -import org.apache.arrow.vector.complex.reader.FieldReader; - -/** Represents a single value of a complex type (e.g. Union, Struct). */ -public class ComplexHolder implements ValueHolder { - public FieldReader reader; - public int isSet; -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java b/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java deleted file mode 100644 index 9de7053b980b3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.holders; - -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.types.Types; - -/** {@link ValueHolder} for Dense Union Vectors. */ -public class DenseUnionHolder implements ValueHolder { - public FieldReader reader; - public int isSet; - public byte typeId; - - public Types.MinorType getMinorType() { - return reader.getMinorType(); - } - - public boolean isSet() { - return isSet == 1; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java b/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java deleted file mode 100644 index e96b86ce60e47..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.holders; - -/** {@link ValueHolder} for a nested {@link org.apache.arrow.vector.complex.ListVector}. */ -public final class RepeatedListHolder implements ValueHolder { - public int start; - public int end; -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java b/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java deleted file mode 100644 index e2cca96f54b79..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.holders; - -/** {@link ValueHolder} for a list of structs. */ -public final class RepeatedStructHolder implements ValueHolder { - public int start; - public int end; -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java b/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java deleted file mode 100644 index af0b472c1be4a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.holders; - -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.types.Types.MinorType; - -/** {@link ValueHolder} for Union Vectors. */ -public class UnionHolder implements ValueHolder { - public FieldReader reader; - public int isSet; - - public MinorType getMinorType() { - return reader.getMinorType(); - } - - public boolean isSet() { - return isSet == 1; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java b/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java deleted file mode 100644 index c9cbbb5b439d6..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.holders; - -/** - * Wrapper object for an individual value in Arrow. - * - *

    ValueHolders are designed to be mutable wrapper objects for defining clean APIs that access - * data in Arrow. For performance, object creation is avoided at all costs throughout execution. For - * this reason, ValueHolders are disallowed from implementing any methods, this allows for them to - * be replaced by their java primitive inner members during optimization of run-time generated code. - */ -public interface ValueHolder {} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java deleted file mode 100644 index 7cac0a15a198e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.SeekableByteChannel; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.flatbuf.Footer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.VisibleForTesting; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowFooter; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.validate.MetadataV4UnionChecker; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** An implementation of {@link ArrowReader} that reads the standard arrow binary file format. */ -public class ArrowFileReader extends ArrowReader { - - private static final Logger LOGGER = LoggerFactory.getLogger(ArrowFileReader.class); - - private SeekableReadChannel in; - private ArrowFooter footer; - private int currentDictionaryBatch = 0; - private int currentRecordBatch = 0; - - public ArrowFileReader( - SeekableReadChannel in, - BufferAllocator allocator, - CompressionCodec.Factory compressionFactory) { - super(allocator, compressionFactory); - this.in = in; - } - - public ArrowFileReader( - SeekableByteChannel in, - BufferAllocator allocator, - CompressionCodec.Factory compressionFactory) { - this(new SeekableReadChannel(in), allocator, compressionFactory); - } - - public ArrowFileReader(SeekableReadChannel in, BufferAllocator allocator) { - this(in, allocator, CompressionCodec.Factory.INSTANCE); - } - - public ArrowFileReader(SeekableByteChannel in, BufferAllocator allocator) { - this(new SeekableReadChannel(in), allocator); - } - - @Override - public long bytesRead() { - return in.bytesRead(); - } - - @Override - protected void closeReadSource() throws IOException { - in.close(); - } - - @Override - protected Schema readSchema() throws IOException { - if (footer == null) { - if (in.size() <= (ArrowMagic.MAGIC_LENGTH * 2 + 4)) { - throw new InvalidArrowFileException("file too small: " + in.size()); - } - ByteBuffer buffer = ByteBuffer.allocate(4 + ArrowMagic.MAGIC_LENGTH); - long footerLengthOffset = in.size() - buffer.remaining(); - in.setPosition(footerLengthOffset); - in.readFully(buffer); - buffer.flip(); - byte[] array = buffer.array(); - if (!ArrowMagic.validateMagic(Arrays.copyOfRange(array, 4, array.length))) { - throw new InvalidArrowFileException( - "missing Magic number " + Arrays.toString(buffer.array())); - } - int footerLength = MessageSerializer.bytesToInt(array); - if (footerLength <= 0 - || footerLength + ArrowMagic.MAGIC_LENGTH * 2 + 4 > in.size() - || footerLength > footerLengthOffset) { - throw new InvalidArrowFileException("invalid footer length: " + footerLength); - } - long footerOffset = footerLengthOffset - footerLength; - LOGGER.debug("Footer starts at {}, length: {}", footerOffset, footerLength); - ByteBuffer footerBuffer = ByteBuffer.allocate(footerLength); - in.setPosition(footerOffset); - in.readFully(footerBuffer); - footerBuffer.flip(); - Footer footerFB = Footer.getRootAsFooter(footerBuffer); - this.footer = new ArrowFooter(footerFB); - } - MetadataV4UnionChecker.checkRead(footer.getSchema(), footer.getMetadataVersion()); - return footer.getSchema(); - } - - @Override - public void initialize() throws IOException { - super.initialize(); - - // empty stream, has no dictionaries in IPC. - if (footer.getRecordBatches().size() == 0) { - return; - } - // Read and load all dictionaries from schema - for (int i = 0; i < dictionaries.size(); i++) { - ArrowDictionaryBatch dictionaryBatch = readDictionary(); - loadDictionary(dictionaryBatch); - } - } - - /** Get custom metadata. */ - public Map getMetaData() { - if (footer != null) { - return footer.getMetaData(); - } - return new HashMap<>(); - } - - /** - * Read a dictionary batch from the source, will be invoked after the schema has been read and - * called N times, where N is the number of dictionaries indicated by the schema Fields. - * - * @return the read ArrowDictionaryBatch - * @throws IOException on error - */ - public ArrowDictionaryBatch readDictionary() throws IOException { - if (currentDictionaryBatch >= footer.getDictionaries().size()) { - throw new IOException( - "Requested more dictionaries than defined in footer: " + currentDictionaryBatch); - } - ArrowBlock block = footer.getDictionaries().get(currentDictionaryBatch++); - return readDictionaryBatch(in, block, allocator); - } - - /** Returns true if a batch was read, false if no more batches. */ - @Override - public boolean loadNextBatch() throws IOException { - prepareLoadNextBatch(); - - if (currentRecordBatch < footer.getRecordBatches().size()) { - ArrowBlock block = footer.getRecordBatches().get(currentRecordBatch++); - ArrowRecordBatch batch = readRecordBatch(in, block, allocator); - loadRecordBatch(batch); - return true; - } else { - return false; - } - } - - public List getDictionaryBlocks() throws IOException { - ensureInitialized(); - return footer.getDictionaries(); - } - - /** Returns the {@link ArrowBlock} metadata from the file. */ - public List getRecordBlocks() throws IOException { - ensureInitialized(); - return footer.getRecordBatches(); - } - - /** Loads record batch for the given block. */ - public boolean loadRecordBatch(ArrowBlock block) throws IOException { - ensureInitialized(); - int blockIndex = footer.getRecordBatches().indexOf(block); - if (blockIndex == -1) { - throw new IllegalArgumentException("Arrow block does not exist in record batches: " + block); - } - currentRecordBatch = blockIndex; - return loadNextBatch(); - } - - @VisibleForTesting - ArrowFooter getFooter() { - return footer; - } - - private ArrowDictionaryBatch readDictionaryBatch( - SeekableReadChannel in, ArrowBlock block, BufferAllocator allocator) throws IOException { - LOGGER.debug( - "DictionaryRecordBatch at {}, metadata: {}, body: {}", - block.getOffset(), - block.getMetadataLength(), - block.getBodyLength()); - in.setPosition(block.getOffset()); - ArrowDictionaryBatch batch = MessageSerializer.deserializeDictionaryBatch(in, block, allocator); - if (batch == null) { - throw new IOException("Invalid file. No batch at offset: " + block.getOffset()); - } - return batch; - } - - private ArrowRecordBatch readRecordBatch( - SeekableReadChannel in, ArrowBlock block, BufferAllocator allocator) throws IOException { - LOGGER.debug( - "RecordBatch at {}, metadata: {}, body: {}", - block.getOffset(), - block.getMetadataLength(), - block.getBodyLength()); - in.setPosition(block.getOffset()); - ArrowRecordBatch batch = MessageSerializer.deserializeRecordBatch(in, block, allocator); - if (batch == null) { - throw new IOException("Invalid file. No batch at offset: " + block.getOffset()); - } - return batch; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java deleted file mode 100644 index a873aaa860b26..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.nio.channels.WritableByteChannel; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import org.apache.arrow.util.VisibleForTesting; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowFooter; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link ArrowWriter} that writes out a Arrow files - * (https://arrow.apache.org/docs/format/IPC.html#file-format). - */ -public class ArrowFileWriter extends ArrowWriter { - - private static final Logger LOGGER = LoggerFactory.getLogger(ArrowFileWriter.class); - - // All ArrowBlocks written are saved in these lists to be passed to ArrowFooter in endInternal. - private final List dictionaryBlocks = new ArrayList<>(); - private final List recordBlocks = new ArrayList<>(); - - private Map metaData; - private boolean dictionariesWritten = false; - - public ArrowFileWriter( - VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) { - super(root, provider, out); - } - - public ArrowFileWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - Map metaData) { - super(root, provider, out); - this.metaData = metaData; - } - - public ArrowFileWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - IpcOption option) { - super(root, provider, out, option); - } - - public ArrowFileWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - Map metaData, - IpcOption option) { - super(root, provider, out, option); - this.metaData = metaData; - } - - public ArrowFileWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - Map metaData, - IpcOption option, - CompressionCodec.Factory compressionFactory, - CompressionUtil.CodecType codecType) { - this(root, provider, out, metaData, option, compressionFactory, codecType, Optional.empty()); - } - - public ArrowFileWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - Map metaData, - IpcOption option, - CompressionCodec.Factory compressionFactory, - CompressionUtil.CodecType codecType, - Optional compressionLevel) { - super(root, provider, out, option, compressionFactory, codecType, compressionLevel); - this.metaData = metaData; - } - - @Override - protected void startInternal(WriteChannel out) throws IOException { - ArrowMagic.writeMagic(out, true); - } - - @Override - protected ArrowBlock writeDictionaryBatch(ArrowDictionaryBatch batch) throws IOException { - ArrowBlock block = super.writeDictionaryBatch(batch); - dictionaryBlocks.add(block); - return block; - } - - @Override - protected ArrowBlock writeRecordBatch(ArrowRecordBatch batch) throws IOException { - ArrowBlock block = super.writeRecordBatch(batch); - recordBlocks.add(block); - return block; - } - - @Override - protected void endInternal(WriteChannel out) throws IOException { - if (!option.write_legacy_ipc_format) { - out.writeIntLittleEndian(MessageSerializer.IPC_CONTINUATION_TOKEN); - } - out.writeIntLittleEndian(0); - - long footerStart = out.getCurrentPosition(); - out.write( - new ArrowFooter(schema, dictionaryBlocks, recordBlocks, metaData, option.metadataVersion), - false); - int footerLength = (int) (out.getCurrentPosition() - footerStart); - if (footerLength <= 0) { - throw new InvalidArrowFileException("invalid footer"); - } - out.writeIntLittleEndian(footerLength); - LOGGER.debug("Footer starts at {}, length: {}", footerStart, footerLength); - ArrowMagic.writeMagic(out, false); - LOGGER.debug("magic written, now at {}", out.getCurrentPosition()); - } - - @Override - protected void ensureDictionariesWritten(DictionaryProvider provider, Set dictionaryIdsUsed) - throws IOException { - if (dictionariesWritten) { - return; - } - dictionariesWritten = true; - // Write out all dictionaries required. - // Replacement dictionaries are not supported in the IPC file format. - for (long id : dictionaryIdsUsed) { - Dictionary dictionary = provider.lookup(id); - writeDictionaryBatch(dictionary); - } - } - - @VisibleForTesting - public List getRecordBlocks() { - return recordBlocks; - } - - @VisibleForTesting - public List getDictionaryBlocks() { - return dictionaryBlocks; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java deleted file mode 100644 index e6a9aa5177f86..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; - -/** - * Magic header/footer helpers for {@link ArrowFileWriter} and {@link ArrowFileReader} formatted - * files. - */ -class ArrowMagic { - private ArrowMagic() {} - - private static final byte[] MAGIC = "ARROW1".getBytes(StandardCharsets.UTF_8); - - public static final int MAGIC_LENGTH = MAGIC.length; - - public static void writeMagic(WriteChannel out, boolean align) throws IOException { - out.write(MAGIC); - if (align) { - out.align(); - } - } - - public static boolean validateMagic(byte[] array) { - return Arrays.equals(MAGIC, array); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java deleted file mode 100644 index 7f4addf2d0dea..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.DictionaryUtility; -import org.apache.arrow.vector.util.VectorBatchAppender; - -/** Abstract class to read Schema and ArrowRecordBatches. */ -public abstract class ArrowReader implements DictionaryProvider, AutoCloseable { - - protected final BufferAllocator allocator; - private VectorLoader loader; - private VectorSchemaRoot root; - protected Map dictionaries; - private boolean initialized = false; - - private final CompressionCodec.Factory compressionFactory; - - protected ArrowReader(BufferAllocator allocator) { - this(allocator, CompressionCodec.Factory.INSTANCE); - } - - protected ArrowReader(BufferAllocator allocator, CompressionCodec.Factory compressionFactory) { - this.allocator = allocator; - this.compressionFactory = compressionFactory; - } - - /** - * Returns the vector schema root. This will be loaded with new values on every call to - * loadNextBatch. - * - * @return the vector schema root - * @throws IOException if reading of schema fails - */ - public VectorSchemaRoot getVectorSchemaRoot() throws IOException { - ensureInitialized(); - return root; - } - - /** - * Returns any dictionaries that were loaded along with ArrowRecordBatches. - * - * @return Map of dictionaries to dictionary id, empty if no dictionaries loaded - * @throws IOException if reading of schema fails - */ - public Map getDictionaryVectors() throws IOException { - ensureInitialized(); - return dictionaries; - } - - /** - * Lookup a dictionary that has been loaded using the dictionary id. - * - * @param id Unique identifier for a dictionary - * @return the requested dictionary or null if not found - */ - @Override - public Dictionary lookup(long id) { - if (!initialized) { - throw new IllegalStateException("Unable to lookup until reader has been initialized"); - } - - return dictionaries.get(id); - } - - @Override - public Set getDictionaryIds() { - return dictionaries.keySet(); - } - - /** - * Load the next ArrowRecordBatch to the vector schema root if available. - * - * @return true if a batch was read, false on EOS - * @throws IOException on error - */ - public abstract boolean loadNextBatch() throws IOException; - - /** - * Return the number of bytes read from the ReadChannel. - * - * @return number of bytes read - */ - public abstract long bytesRead(); - - /** - * Close resources, including vector schema root and dictionary vectors, and the underlying read - * source. - * - * @throws IOException on error - */ - @Override - public void close() throws IOException { - close(true); - } - - /** - * Close resources, including vector schema root and dictionary vectors. If the flag - * closeReadChannel is true then close the underlying read source, otherwise leave it open. - * - * @param closeReadSource Flag to control if closing the underlying read source - * @throws IOException on error - */ - public void close(boolean closeReadSource) throws IOException { - if (initialized) { - root.close(); - for (Dictionary dictionary : dictionaries.values()) { - dictionary.getVector().close(); - } - } - - if (closeReadSource) { - closeReadSource(); - } - } - - /** - * Close the underlying read source. - * - * @throws IOException on error - */ - protected abstract void closeReadSource() throws IOException; - - /** - * Read the Schema from the source, will be invoked at the beginning the initialization. - * - * @return the read Schema - * @throws IOException on error - */ - protected abstract Schema readSchema() throws IOException; - - /** - * Initialize if not done previously. - * - * @throws IOException on error - */ - protected void ensureInitialized() throws IOException { - if (!initialized) { - initialize(); - initialized = true; - } - } - - /** Reads the schema and initializes the vectors. */ - protected void initialize() throws IOException { - Schema originalSchema = readSchema(); - List fields = new ArrayList<>(originalSchema.getFields().size()); - List vectors = new ArrayList<>(originalSchema.getFields().size()); - Map dictionaries = new HashMap<>(); - - // Convert fields with dictionaries to have the index type - for (Field field : originalSchema.getFields()) { - Field updated = DictionaryUtility.toMemoryFormat(field, allocator, dictionaries); - fields.add(updated); - vectors.add(updated.createVector(allocator)); - } - Schema schema = new Schema(fields, originalSchema.getCustomMetadata()); - - this.root = new VectorSchemaRoot(schema, vectors, 0); - this.loader = new VectorLoader(root, compressionFactory); - this.dictionaries = Collections.unmodifiableMap(dictionaries); - } - - /** - * Ensure the reader has been initialized and reset the VectorSchemaRoot row count to 0. - * - * @throws IOException on error - */ - protected void prepareLoadNextBatch() throws IOException { - ensureInitialized(); - root.setRowCount(0); - } - - /** - * Load an ArrowRecordBatch to the readers VectorSchemaRoot. - * - * @param batch the record batch to load - */ - protected void loadRecordBatch(ArrowRecordBatch batch) { - try { - loader.load(batch); - } finally { - batch.close(); - } - } - - /** - * Load an ArrowDictionaryBatch to the readers dictionary vectors. - * - * @param dictionaryBatch dictionary batch to load - */ - protected void loadDictionary(ArrowDictionaryBatch dictionaryBatch) { - long id = dictionaryBatch.getDictionaryId(); - Dictionary dictionary = dictionaries.get(id); - if (dictionary == null) { - throw new IllegalArgumentException("Dictionary ID " + id + " not defined in schema"); - } - FieldVector vector = dictionary.getVector(); - // if is deltaVector, concat it with non-delta vector with the same ID. - if (dictionaryBatch.isDelta()) { - try (FieldVector deltaVector = vector.getField().createVector(allocator)) { - load(dictionaryBatch, deltaVector); - VectorBatchAppender.batchAppend(vector, deltaVector); - } - return; - } - - load(dictionaryBatch, vector); - } - - private void load(ArrowDictionaryBatch dictionaryBatch, FieldVector vector) { - VectorSchemaRoot root = - new VectorSchemaRoot( - Collections.singletonList(vector.getField()), Collections.singletonList(vector), 0); - VectorLoader loader = new VectorLoader(root, this.compressionFactory); - try { - loader.load(dictionaryBatch.getDictionary()); - } finally { - dictionaryBatch.close(); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java deleted file mode 100644 index 69811dc71727c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.channels.Channels; -import java.nio.channels.ReadableByteChannel; -import org.apache.arrow.flatbuf.MessageHeader; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.MessageChannelReader; -import org.apache.arrow.vector.ipc.message.MessageResult; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.validate.MetadataV4UnionChecker; - -/** This class reads from an input stream and produces ArrowRecordBatches. */ -public class ArrowStreamReader extends ArrowReader { - - private MessageChannelReader messageReader; - - private int loadedDictionaryCount; - - /** - * Constructs a streaming reader using a MessageChannelReader. Non-blocking. - * - * @param messageReader reader used to get messages from a ReadChannel - * @param allocator to allocate new buffers - * @param compressionFactory the factory to create compression codec. - */ - public ArrowStreamReader( - MessageChannelReader messageReader, - BufferAllocator allocator, - CompressionCodec.Factory compressionFactory) { - super(allocator, compressionFactory); - this.messageReader = messageReader; - } - - /** - * Constructs a streaming reader using a MessageChannelReader. Non-blocking. - * - * @param messageReader reader used to get messages from a ReadChannel - * @param allocator to allocate new buffers - */ - public ArrowStreamReader(MessageChannelReader messageReader, BufferAllocator allocator) { - this(messageReader, allocator, CompressionCodec.Factory.INSTANCE); - } - - /** - * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking. - * - * @param in ReadableByteChannel to read messages from - * @param allocator to allocate new buffers - * @param compressionFactory the factory to create compression codec. - */ - public ArrowStreamReader( - ReadableByteChannel in, - BufferAllocator allocator, - CompressionCodec.Factory compressionFactory) { - this(new MessageChannelReader(new ReadChannel(in), allocator), allocator, compressionFactory); - } - - /** - * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking. - * - * @param in ReadableByteChannel to read messages from - * @param allocator to allocate new buffers - */ - public ArrowStreamReader(ReadableByteChannel in, BufferAllocator allocator) { - this(new MessageChannelReader(new ReadChannel(in), allocator), allocator); - } - - /** - * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking. - * - * @param in InputStream to read messages from - * @param allocator to allocate new buffers - * @param compressionFactory the factory to create compression codec. - */ - public ArrowStreamReader( - InputStream in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) { - this(Channels.newChannel(in), allocator, compressionFactory); - } - - /** - * Constructs a streaming reader from an InputStream. Non-blocking. - * - * @param in InputStream to read messages from - * @param allocator to allocate new buffers - */ - public ArrowStreamReader(InputStream in, BufferAllocator allocator) { - this(Channels.newChannel(in), allocator); - } - - /** - * Get the number of bytes read from the stream since constructing the reader. - * - * @return number of bytes - */ - @Override - public long bytesRead() { - return messageReader.bytesRead(); - } - - /** - * Closes the underlying read source. - * - * @throws IOException on error - */ - @Override - protected void closeReadSource() throws IOException { - messageReader.close(); - } - - /** - * Load the next ArrowRecordBatch to the vector schema root if available. - * - * @return true if a batch was read, false on EOS - * @throws IOException on error - */ - public boolean loadNextBatch() throws IOException { - prepareLoadNextBatch(); - MessageResult result = messageReader.readNext(); - - // Reached EOS - if (result == null) { - return false; - } - - if (result.getMessage().headerType() == MessageHeader.RecordBatch) { - ArrowBuf bodyBuffer = result.getBodyBuffer(); - - // For zero-length batches, need an empty buffer to deserialize the batch - if (bodyBuffer == null) { - bodyBuffer = allocator.getEmpty(); - } - - ArrowRecordBatch batch = - MessageSerializer.deserializeRecordBatch(result.getMessage(), bodyBuffer); - loadRecordBatch(batch); - checkDictionaries(); - return true; - } else if (result.getMessage().headerType() == MessageHeader.DictionaryBatch) { - // if it's dictionary message, read dictionary message out and continue to read unless get a - // batch or eos. - ArrowDictionaryBatch dictionaryBatch = readDictionary(result); - loadDictionary(dictionaryBatch); - loadedDictionaryCount++; - return loadNextBatch(); - } else { - throw new IOException( - "Expected RecordBatch or DictionaryBatch but header was " - + result.getMessage().headerType()); - } - } - - /** When read a record batch, check whether its dictionaries are available. */ - private void checkDictionaries() throws IOException { - // if all dictionaries are loaded, return. - if (loadedDictionaryCount == dictionaries.size()) { - return; - } - for (FieldVector vector : getVectorSchemaRoot().getFieldVectors()) { - DictionaryEncoding encoding = vector.getField().getDictionary(); - if (encoding != null) { - // if the dictionaries it needs is not available and the vector is not all null, something - // was wrong. - if (!dictionaries.containsKey(encoding.getId()) - && vector.getNullCount() < vector.getValueCount()) { - throw new IOException("The dictionary was not available, id was:" + encoding.getId()); - } - } - } - } - - /** - * Reads the schema message from the beginning of the stream. - * - * @return the deserialized arrow schema - */ - @Override - protected Schema readSchema() throws IOException { - MessageResult result = messageReader.readNext(); - - if (result == null) { - throw new IOException("Unexpected end of input. Missing schema."); - } - - if (result.getMessage().headerType() != MessageHeader.Schema) { - throw new IOException("Expected schema but header was " + result.getMessage().headerType()); - } - - final Schema schema = MessageSerializer.deserializeSchema(result.getMessage()); - MetadataV4UnionChecker.checkRead( - schema, MetadataVersion.fromFlatbufID(result.getMessage().version())); - return schema; - } - - private ArrowDictionaryBatch readDictionary(MessageResult result) throws IOException { - - ArrowBuf bodyBuffer = result.getBodyBuffer(); - - // For zero-length batches, need an empty buffer to deserialize the batch - if (bodyBuffer == null) { - bodyBuffer = allocator.getEmpty(); - } - - return MessageSerializer.deserializeDictionaryBatch(result.getMessage(), bodyBuffer); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java deleted file mode 100644 index 5e1a708653957..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.channels.Channels; -import java.nio.channels.WritableByteChannel; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageSerializer; - -/** Writer for the Arrow stream format to send ArrowRecordBatches over a WriteChannel. */ -public class ArrowStreamWriter extends ArrowWriter { - private final Map previousDictionaries = new HashMap<>(); - - /** - * Construct an ArrowStreamWriter with an optional DictionaryProvider for the OutputStream. - * - * @param root Existing VectorSchemaRoot with vectors to be written. - * @param provider DictionaryProvider for any vectors that are dictionary encoded. (Optional, can - * be null) - * @param out OutputStream for writing. - */ - public ArrowStreamWriter(VectorSchemaRoot root, DictionaryProvider provider, OutputStream out) { - this(root, provider, Channels.newChannel(out)); - } - - /** - * Construct an ArrowStreamWriter with an optional DictionaryProvider for the WritableByteChannel. - */ - public ArrowStreamWriter( - VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) { - this(root, provider, out, IpcOption.DEFAULT); - } - - /** - * Construct an ArrowStreamWriter with an optional DictionaryProvider for the WritableByteChannel. - * - * @param root Existing VectorSchemaRoot with vectors to be written. - * @param provider DictionaryProvider for any vectors that are dictionary encoded. (Optional, can - * be null) - * @param option IPC write options - * @param out WritableByteChannel for writing. - */ - public ArrowStreamWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - IpcOption option) { - super(root, provider, out, option); - } - - /** - * Construct an ArrowStreamWriter with compression enabled. - * - * @param root Existing VectorSchemaRoot with vectors to be written. - * @param provider DictionaryProvider for any vectors that are dictionary encoded. (Optional, can - * be null) - * @param option IPC write options - * @param compressionFactory Compression codec factory - * @param codecType Codec type - * @param out WritableByteChannel for writing. - */ - public ArrowStreamWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - IpcOption option, - CompressionCodec.Factory compressionFactory, - CompressionUtil.CodecType codecType) { - this(root, provider, out, option, compressionFactory, codecType, Optional.empty()); - } - - /** - * Construct an ArrowStreamWriter with compression enabled. - * - * @param root Existing VectorSchemaRoot with vectors to be written. - * @param provider DictionaryProvider for any vectors that are dictionary encoded. (Optional, can - * be null) - * @param option IPC write options - * @param compressionFactory Compression codec factory - * @param codecType Codec type - * @param compressionLevel Compression level - * @param out WritableByteChannel for writing. - */ - public ArrowStreamWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - IpcOption option, - CompressionCodec.Factory compressionFactory, - CompressionUtil.CodecType codecType, - Optional compressionLevel) { - super(root, provider, out, option, compressionFactory, codecType, compressionLevel); - } - - /** - * Write an EOS identifier to the WriteChannel. - * - * @param out Open WriteChannel with an active Arrow stream. - * @param option IPC write option - * @throws IOException on error - */ - public static void writeEndOfStream(WriteChannel out, IpcOption option) throws IOException { - if (!option.write_legacy_ipc_format) { - out.writeIntLittleEndian(MessageSerializer.IPC_CONTINUATION_TOKEN); - } - out.writeIntLittleEndian(0); - } - - @Override - protected void endInternal(WriteChannel out) throws IOException { - writeEndOfStream(out, option); - } - - @Override - protected void ensureDictionariesWritten(DictionaryProvider provider, Set dictionaryIdsUsed) - throws IOException { - // write out any dictionaries that have changes - for (long id : dictionaryIdsUsed) { - Dictionary dictionary = provider.lookup(id); - FieldVector vector = dictionary.getVector(); - if (previousDictionaries.containsKey(id) - && VectorEqualsVisitor.vectorEquals(vector, previousDictionaries.get(id))) { - // Dictionary was previously written and hasn't changed - continue; - } - writeDictionaryBatch(dictionary); - // Store a copy of the vector in case it is later mutated - if (previousDictionaries.containsKey(id)) { - previousDictionaries.get(id).close(); - } - previousDictionaries.put(id, copyVector(vector)); - } - } - - @Override - public void close() { - super.close(); - try { - AutoCloseables.close(previousDictionaries.values()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - private static FieldVector copyVector(FieldVector source) { - FieldVector copy = source.getField().createVector(source.getAllocator()); - copy.allocateNew(); - for (int i = 0; i < source.getValueCount(); i++) { - copy.copyFromSafe(i, i, source); - } - copy.setValueCount(source.getValueCount()); - return copy; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java deleted file mode 100644 index c0f2b113bcb54..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.nio.channels.WritableByteChannel; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Optional; -import java.util.Set; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.compression.CompressionCodec; -import org.apache.arrow.vector.compression.CompressionUtil; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.DictionaryUtility; -import org.apache.arrow.vector.validate.MetadataV4UnionChecker; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Abstract base class for implementing Arrow writers for IPC over a WriteChannel. */ -public abstract class ArrowWriter implements AutoCloseable { - - protected static final Logger LOGGER = LoggerFactory.getLogger(ArrowWriter.class); - - // schema with fields in message format, not memory format - protected final Schema schema; - protected final WriteChannel out; - - private final VectorUnloader unloader; - private final DictionaryProvider dictionaryProvider; - private final Set dictionaryIdsUsed = new HashSet<>(); - - private final CompressionCodec.Factory compressionFactory; - private final CompressionUtil.CodecType codecType; - private final Optional compressionLevel; - private boolean started = false; - private boolean ended = false; - - private final CompressionCodec codec; - - protected IpcOption option; - - protected ArrowWriter( - VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) { - this(root, provider, out, IpcOption.DEFAULT); - } - - protected ArrowWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - IpcOption option) { - this( - root, - provider, - out, - option, - NoCompressionCodec.Factory.INSTANCE, - CompressionUtil.CodecType.NO_COMPRESSION, - Optional.empty()); - } - - /** - * Note: fields are not closed when the writer is closed. - * - * @param root the vectors to write to the output - * @param provider where to find the dictionaries - * @param out the output where to write - * @param option IPC write options - * @param compressionFactory Compression codec factory - * @param codecType Compression codec - * @param compressionLevel Compression level - */ - protected ArrowWriter( - VectorSchemaRoot root, - DictionaryProvider provider, - WritableByteChannel out, - IpcOption option, - CompressionCodec.Factory compressionFactory, - CompressionUtil.CodecType codecType, - Optional compressionLevel) { - this.out = new WriteChannel(out); - this.option = option; - this.dictionaryProvider = provider; - - this.compressionFactory = compressionFactory; - this.codecType = codecType; - this.compressionLevel = compressionLevel; - this.codec = - this.compressionLevel.isPresent() - ? this.compressionFactory.createCodec(this.codecType, this.compressionLevel.get()) - : this.compressionFactory.createCodec(this.codecType); - this.unloader = - new VectorUnloader(root, /*includeNullCount*/ true, codec, /*alignBuffers*/ true); - - List fields = new ArrayList<>(root.getSchema().getFields().size()); - - MetadataV4UnionChecker.checkForUnion( - root.getSchema().getFields().iterator(), option.metadataVersion); - // Convert fields with dictionaries to have dictionary type - for (Field field : root.getSchema().getFields()) { - fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed)); - } - - this.schema = new Schema(fields, root.getSchema().getCustomMetadata()); - } - - public void start() throws IOException { - ensureStarted(); - } - - /** Writes the record batch currently loaded in this instance's VectorSchemaRoot. */ - public void writeBatch() throws IOException { - ensureStarted(); - ensureDictionariesWritten(dictionaryProvider, dictionaryIdsUsed); - try (ArrowRecordBatch batch = unloader.getRecordBatch()) { - writeRecordBatch(batch); - } - } - - protected void writeDictionaryBatch(Dictionary dictionary) throws IOException { - FieldVector vector = dictionary.getVector(); - long id = dictionary.getEncoding().getId(); - int count = vector.getValueCount(); - VectorSchemaRoot dictRoot = - new VectorSchemaRoot( - Collections.singletonList(vector.getField()), Collections.singletonList(vector), count); - VectorUnloader unloader = - new VectorUnloader(dictRoot, /*includeNullCount*/ true, this.codec, /*alignBuffers*/ true); - ArrowRecordBatch batch = unloader.getRecordBatch(); - ArrowDictionaryBatch dictionaryBatch = new ArrowDictionaryBatch(id, batch, false); - try { - writeDictionaryBatch(dictionaryBatch); - } finally { - try { - dictionaryBatch.close(); - } catch (Exception e) { - throw new RuntimeException("Error occurred while closing dictionary.", e); - } - } - } - - protected ArrowBlock writeDictionaryBatch(ArrowDictionaryBatch batch) throws IOException { - ArrowBlock block = MessageSerializer.serialize(out, batch, option); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug( - "DictionaryRecordBatch at {}, metadata: {}, body: {}", - block.getOffset(), - block.getMetadataLength(), - block.getBodyLength()); - } - return block; - } - - protected ArrowBlock writeRecordBatch(ArrowRecordBatch batch) throws IOException { - ArrowBlock block = MessageSerializer.serialize(out, batch, option); - if (LOGGER.isDebugEnabled()) { - LOGGER.debug( - "RecordBatch at {}, metadata: {}, body: {}", - block.getOffset(), - block.getMetadataLength(), - block.getBodyLength()); - } - return block; - } - - public void end() throws IOException { - ensureStarted(); - ensureEnded(); - } - - public long bytesWritten() { - return out.getCurrentPosition(); - } - - private void ensureStarted() throws IOException { - if (!started) { - started = true; - startInternal(out); - // write the schema - for file formats this is duplicated in the footer, but matches - // the streaming format - MessageSerializer.serialize(out, schema, option); - } - } - - /** - * Write dictionaries after schema and before recordBatches, dictionaries won't be written if - * empty stream (only has schema data in IPC). - */ - protected abstract void ensureDictionariesWritten( - DictionaryProvider provider, Set dictionaryIdsUsed) throws IOException; - - private void ensureEnded() throws IOException { - if (!ended) { - ended = true; - endInternal(out); - } - } - - protected void startInternal(WriteChannel out) throws IOException {} - - protected void endInternal(WriteChannel out) throws IOException {} - - @Override - public void close() { - try { - end(); - out.close(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java deleted file mode 100644 index 5e2531610c632..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -/** - * Exception indicating a problem with an Arrow File - * (https://arrow.apache.org/docs/format/IPC.html#file-format). - */ -public class InvalidArrowFileException extends RuntimeException { - private static final long serialVersionUID = 1L; - - public InvalidArrowFileException(String message) { - super(message); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java deleted file mode 100644 index fe0803d2984cb..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ /dev/null @@ -1,990 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static com.fasterxml.jackson.core.JsonToken.END_ARRAY; -import static com.fasterxml.jackson.core.JsonToken.END_OBJECT; -import static com.fasterxml.jackson.core.JsonToken.START_ARRAY; -import static com.fasterxml.jackson.core.JsonToken.START_OBJECT; -import static org.apache.arrow.vector.BufferLayout.BufferType.DATA; -import static org.apache.arrow.vector.BufferLayout.BufferType.OFFSET; -import static org.apache.arrow.vector.BufferLayout.BufferType.SIZE; -import static org.apache.arrow.vector.BufferLayout.BufferType.TYPE; -import static org.apache.arrow.vector.BufferLayout.BufferType.VALIDITY; -import static org.apache.arrow.vector.BufferLayout.BufferType.VARIADIC_DATA_BUFFERS; -import static org.apache.arrow.vector.BufferLayout.BufferType.VIEWS; - -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.JsonParser.Feature; -import com.fasterxml.jackson.core.JsonToken; -import com.fasterxml.jackson.databind.MapperFeature; -import com.fasterxml.jackson.databind.MappingJsonFactory; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.File; -import java.io.IOException; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.BufferLayout.BufferType; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.TypeLayout; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.LargeListView; -import org.apache.arrow.vector.types.pojo.ArrowType.ListView; -import org.apache.arrow.vector.types.pojo.ArrowType.Union; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.DecimalUtility; -import org.apache.arrow.vector.util.DictionaryUtility; -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.binary.Hex; - -/** - * A reader for JSON files that translates them into vectors. This reader is used for integration - * tests. - * - *

    This class uses a streaming parser API, method naming tends to reflect this implementation - * detail. - */ -public class JsonFileReader implements AutoCloseable, DictionaryProvider { - private final JsonParser parser; - private final BufferAllocator allocator; - private Schema schema; - private Map dictionaries; - private Boolean started = false; - - /** - * Constructs a new instance. - * - * @param inputFile The file to read. - * @param allocator The allocator to use for allocating buffers. - */ - public JsonFileReader(File inputFile, BufferAllocator allocator) - throws JsonParseException, IOException { - super(); - this.allocator = allocator; - MappingJsonFactory jsonFactory = - new MappingJsonFactory( - new ObjectMapper() - // ignore case for enums - .configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS, true)); - this.parser = jsonFactory.createParser(inputFile); - // Allow reading NaN for floating point values - this.parser.configure(Feature.ALLOW_NON_NUMERIC_NUMBERS, true); - } - - @Override - public Dictionary lookup(long id) { - if (!started) { - throw new IllegalStateException("Unable to lookup until after read() has started"); - } - - return dictionaries.get(id); - } - - @Override - public Set getDictionaryIds() { - return dictionaries.keySet(); - } - - /** Reads the beginning (schema section) of the json file and returns it. */ - public Schema start() throws JsonParseException, IOException { - readToken(START_OBJECT); - { - Schema originalSchema = readNextField("schema", Schema.class); - List fields = new ArrayList<>(); - dictionaries = new HashMap<>(); - - // Convert fields with dictionaries to have the index type - for (Field field : originalSchema.getFields()) { - fields.add(DictionaryUtility.toMemoryFormat(field, allocator, dictionaries)); - } - this.schema = new Schema(fields, originalSchema.getCustomMetadata()); - - if (!dictionaries.isEmpty()) { - nextFieldIs("dictionaries"); - readDictionaryBatches(); - } - - nextFieldIs("batches"); - readToken(START_ARRAY); - started = true; - return this.schema; - } - } - - private void readDictionaryBatches() throws JsonParseException, IOException { - readToken(START_ARRAY); - JsonToken token = parser.nextToken(); - boolean haveDictionaryBatch = token == START_OBJECT; - while (haveDictionaryBatch) { - - // Lookup what dictionary for the batch about to be read - long id = readNextField("id", Long.class); - Dictionary dict = dictionaries.get(id); - if (dict == null) { - throw new IllegalArgumentException( - "Dictionary with id: " + id + " missing encoding from schema Field"); - } - - // Read the dictionary record batch - nextFieldIs("data"); - FieldVector vector = dict.getVector(); - List fields = Collections.singletonList(vector.getField()); - List vectors = Collections.singletonList(vector); - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount()); - read(root); - - readToken(END_OBJECT); - token = parser.nextToken(); - haveDictionaryBatch = token == START_OBJECT; - } - - if (token != END_ARRAY) { - throw new IllegalArgumentException( - "Invalid token: " + token + " expected end of array at " + parser.getTokenLocation()); - } - } - - /** Reads the next record batch from the file into root. */ - public boolean read(VectorSchemaRoot root) throws IOException { - JsonToken t = parser.nextToken(); - if (t == START_OBJECT) { - { - int count = readNextField("count", Integer.class); - nextFieldIs("columns"); - readToken(START_ARRAY); - { - for (Field field : root.getSchema().getFields()) { - FieldVector vector = root.getVector(field); - readFromJsonIntoVector(field, vector); - } - } - readToken(END_ARRAY); - root.setRowCount(count); - } - readToken(END_OBJECT); - return true; - } else if (t == END_ARRAY) { - root.setRowCount(0); - return false; - } else { - throw new IllegalArgumentException("Invalid token: " + t); - } - } - - /** Returns the next record batch from the file. */ - public VectorSchemaRoot read() throws IOException { - JsonToken t = parser.nextToken(); - if (t == START_OBJECT) { - VectorSchemaRoot recordBatch = VectorSchemaRoot.create(schema, allocator); - { - int count = readNextField("count", Integer.class); - recordBatch.setRowCount(count); - nextFieldIs("columns"); - readToken(START_ARRAY); - { - for (Field field : schema.getFields()) { - FieldVector vector = recordBatch.getVector(field); - readFromJsonIntoVector(field, vector); - } - } - readToken(END_ARRAY); - } - readToken(END_OBJECT); - return recordBatch; - } else if (t == END_ARRAY) { - return null; - } else { - throw new IllegalArgumentException("Invalid token: " + t); - } - } - - /** - * Skips a number of record batches in the file. - * - * @param numBatches the number of batches to skip - * @return the actual number of skipped batches. - */ - // This is currently called using JPype by the integration tests. - public int skip(int numBatches) throws IOException { - for (int i = 0; i < numBatches; ++i) { - JsonToken t = parser.nextToken(); - if (t == START_OBJECT) { - parser.skipChildren(); - assert parser.getCurrentToken() == END_OBJECT; - } else if (t == END_ARRAY) { - return i; - } else { - throw new IllegalArgumentException("Invalid token: " + t); - } - } - return numBatches; - } - - private abstract class BufferReader { - protected abstract ArrowBuf read(BufferAllocator allocator, int count) throws IOException; - - ArrowBuf readBuffer(BufferAllocator allocator, int count) throws IOException { - readToken(START_ARRAY); - ArrowBuf buf = read(allocator, count); - readToken(END_ARRAY); - return buf; - } - } - - /** - * Read all the variadic data buffers from the parser. - * - * @param allocator BufferAllocator - * @param variadicBuffersCount Number of variadic buffers - * @return List of ArrowBuf - * @throws IOException throws IOException in a failure - */ - List readVariadicBuffers(BufferAllocator allocator, int variadicBuffersCount) - throws IOException { - readToken(START_ARRAY); - ArrayList dataBuffers = new ArrayList<>(variadicBuffersCount); - for (int i = 0; i < variadicBuffersCount; i++) { - parser.nextToken(); - final byte[] value; - - String variadicStr = parser.readValueAs(String.class); - if (variadicStr == null) { - value = new byte[0]; - } else { - value = decodeHexSafe(variadicStr); - } - - ArrowBuf buf = allocator.buffer(value.length); - buf.writeBytes(value); - dataBuffers.add(buf); - } - readToken(END_ARRAY); - return dataBuffers; - } - - private ArrowBuf readViewBuffers( - BufferAllocator allocator, int count, List variadicBufferIndices, MinorType type) - throws IOException { - readToken(START_ARRAY); - ArrayList values = new ArrayList<>(count); - long bufferSize = 0L; - for (int i = 0; i < count; i++) { - readToken(START_OBJECT); - final int length = readNextField("SIZE", Integer.class); - byte[] value; - if (length > BaseVariableWidthViewVector.INLINE_SIZE) { - // PREFIX_HEX - final byte[] prefix = decodeHexSafe(readNextField("PREFIX_HEX", String.class)); - // BUFFER_INDEX - final int bufferIndex = readNextField("BUFFER_INDEX", Integer.class); - if (variadicBufferIndices.isEmpty()) { - variadicBufferIndices.add(bufferIndex); - } else { - int lastBufferIndex = variadicBufferIndices.get(variadicBufferIndices.size() - 1); - if (lastBufferIndex != bufferIndex) { - variadicBufferIndices.add(bufferIndex); - } - } - - // OFFSET - final int offset = readNextField("OFFSET", Integer.class); - ByteBuffer buffer = - ByteBuffer.allocate(BaseVariableWidthViewVector.ELEMENT_SIZE) - .order(ByteOrder.LITTLE_ENDIAN); // Allocate a ByteBuffer of size 16 bytes - buffer.putInt(length); // Write 'length' to bytes 0-3 - buffer.put(prefix); // Write 'prefix' to bytes 4-7 - buffer.putInt(bufferIndex); // Write 'bufferIndex' to bytes 8-11 - buffer.putInt(offset); // Write 'offset' to bytes 12-15 - value = buffer.array(); // Convert the ByteBuffer to a byte array - } else { - // in-line - ByteBuffer buffer = - ByteBuffer.allocate(BaseVariableWidthViewVector.ELEMENT_SIZE) - .order(ByteOrder.LITTLE_ENDIAN); // Allocate a ByteBuffer of size 16 bytes - buffer.putInt(length); // Write 'length' to bytes 0-3 - // INLINE - if (type == MinorType.VIEWVARCHAR) { - buffer.put(readNextField("INLINED", String.class).getBytes(StandardCharsets.UTF_8)); - } else { - String inlined = readNextField("INLINED", String.class); - if (inlined == null) { - buffer.put(new byte[length]); - } else { - buffer.put(decodeHexSafe(inlined)); - } - } - value = buffer.array(); // Convert the ByteBuffer to a byte array - } - values.add(value); - bufferSize += value.length; - readToken(END_OBJECT); - } - - ArrowBuf buf = allocator.buffer(bufferSize); - - for (byte[] value : values) { - buf.writeBytes(value); - } - readToken(END_ARRAY); - return buf; - } - - private class BufferHelper { - - BufferReader BIT = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final int bufferSize = BitVectorHelper.getValidityBufferSize(count); - ArrowBuf buf = allocator.buffer(bufferSize); - - // C++ integration test fails without this. - buf.setZero(0, bufferSize); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - BitVectorHelper.setValidityBit(buf, i, parser.readValueAs(Boolean.class) ? 1 : 0); - } - - buf.writerIndex(bufferSize); - return buf; - } - }; - - BufferReader DAY_MILLIS = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * IntervalDayVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - readToken(START_OBJECT); - buf.writeInt(readNextField("days", Integer.class)); - buf.writeInt(readNextField("milliseconds", Integer.class)); - readToken(END_OBJECT); - } - - return buf; - } - }; - - BufferReader MONTH_DAY_NANOS = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * IntervalMonthDayNanoVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - readToken(START_OBJECT); - buf.writeInt(readNextField("months", Integer.class)); - buf.writeInt(readNextField("days", Integer.class)); - buf.writeLong(readNextField("nanoseconds", Long.class)); - readToken(END_OBJECT); - } - - return buf; - } - }; - - BufferReader INT1 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * TinyIntVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - buf.writeByte(parser.getByteValue()); - } - - return buf; - } - }; - - BufferReader INT2 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * SmallIntVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - buf.writeShort(parser.getShortValue()); - } - - return buf; - } - }; - - BufferReader INT4 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * IntVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - buf.writeInt(parser.getIntValue()); - } - - return buf; - } - }; - - BufferReader INT8 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * BigIntVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - String value = parser.getValueAsString(); - buf.writeLong(Long.valueOf(value)); - } - - return buf; - } - }; - - BufferReader UINT1 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * TinyIntVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - buf.writeByte(parser.getShortValue() & 0xFF); - } - - return buf; - } - }; - - BufferReader UINT2 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * SmallIntVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - buf.writeShort(parser.getIntValue() & 0xFFFF); - } - - return buf; - } - }; - - BufferReader UINT4 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * IntVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - buf.writeInt((int) parser.getLongValue()); - } - - return buf; - } - }; - - BufferReader UINT8 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * BigIntVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - BigInteger value = new BigInteger(parser.getValueAsString()); - buf.writeLong(value.longValue()); - } - - return buf; - } - }; - - BufferReader FLOAT4 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * Float4Vector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - buf.writeFloat(parser.getFloatValue()); - } - - return buf; - } - }; - - BufferReader FLOAT8 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * Float8Vector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - buf.writeDouble(parser.getDoubleValue()); - } - - return buf; - } - }; - - BufferReader DECIMAL = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * DecimalVector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - BigDecimal decimalValue = new BigDecimal(parser.readValueAs(String.class)); - DecimalUtility.writeBigDecimalToArrowBuf( - decimalValue, buf, i, DecimalVector.TYPE_WIDTH); - } - - buf.writerIndex(size); - return buf; - } - }; - - BufferReader DECIMAL256 = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - final long size = (long) count * Decimal256Vector.TYPE_WIDTH; - ArrowBuf buf = allocator.buffer(size); - - for (int i = 0; i < count; i++) { - parser.nextToken(); - BigDecimal decimalValue = new BigDecimal(parser.readValueAs(String.class)); - DecimalUtility.writeBigDecimalToArrowBuf( - decimalValue, buf, i, Decimal256Vector.TYPE_WIDTH); - } - - buf.writerIndex(size); - return buf; - } - }; - - ArrowBuf readBinaryValues(BufferAllocator allocator, int count) throws IOException { - ArrayList values = new ArrayList<>(count); - long bufferSize = 0L; - for (int i = 0; i < count; i++) { - parser.nextToken(); - final byte[] value = decodeHexSafe(parser.readValueAs(String.class)); - values.add(value); - bufferSize += value.length; - } - - ArrowBuf buf = allocator.buffer(bufferSize); - - for (byte[] value : values) { - buf.writeBytes(value); - } - - return buf; - } - - ArrowBuf readStringValues(BufferAllocator allocator, int count) throws IOException { - ArrayList values = new ArrayList<>(count); - long bufferSize = 0L; - for (int i = 0; i < count; i++) { - parser.nextToken(); - final byte[] value = parser.getValueAsString().getBytes(StandardCharsets.UTF_8); - values.add(value); - bufferSize += value.length; - } - - ArrowBuf buf = allocator.buffer(bufferSize); - - for (byte[] value : values) { - buf.writeBytes(value); - } - - return buf; - } - - BufferReader FIXEDSIZEBINARY = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - return readBinaryValues(allocator, count); - } - }; - - BufferReader VARCHAR = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - return readStringValues(allocator, count); - } - }; - - BufferReader LARGEVARCHAR = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - return readStringValues(allocator, count); - } - }; - - BufferReader VARBINARY = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - return readBinaryValues(allocator, count); - } - }; - - BufferReader LARGEVARBINARY = - new BufferReader() { - @Override - protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { - return readBinaryValues(allocator, count); - } - }; - } - - private List readIntoBuffer( - BufferAllocator allocator, - BufferType bufferType, - MinorType type, - int count, - List variadicBufferIndices) - throws IOException { - ArrowBuf buf; - - BufferHelper helper = new BufferHelper(); - BufferReader reader; - - if (bufferType.equals(VALIDITY)) { - reader = helper.BIT; - } else if (bufferType.equals(OFFSET) || bufferType.equals(SIZE)) { - if (type == MinorType.LARGELIST - || type == MinorType.LARGEVARCHAR - || type == MinorType.LARGEVARBINARY - || type == MinorType.LARGELISTVIEW) { - reader = helper.INT8; - } else { - reader = helper.INT4; - } - } else if (bufferType.equals(TYPE)) { - reader = helper.INT1; - } else if (bufferType.equals(DATA)) { - switch (type) { - case BIT: - reader = helper.BIT; - break; - case TINYINT: - reader = helper.INT1; - break; - case SMALLINT: - reader = helper.INT2; - break; - case INT: - reader = helper.INT4; - break; - case BIGINT: - reader = helper.INT8; - break; - case UINT1: - reader = helper.UINT1; - break; - case UINT2: - reader = helper.UINT2; - break; - case UINT4: - reader = helper.UINT4; - break; - case UINT8: - reader = helper.UINT8; - break; - case FLOAT4: - reader = helper.FLOAT4; - break; - case FLOAT8: - reader = helper.FLOAT8; - break; - case DECIMAL: - reader = helper.DECIMAL; - break; - case DECIMAL256: - reader = helper.DECIMAL256; - break; - case FIXEDSIZEBINARY: - reader = helper.FIXEDSIZEBINARY; - break; - case VARCHAR: - reader = helper.VARCHAR; - break; - case LARGEVARCHAR: - reader = helper.LARGEVARCHAR; - break; - case VARBINARY: - reader = helper.VARBINARY; - break; - case LARGEVARBINARY: - reader = helper.LARGEVARBINARY; - break; - case DATEDAY: - reader = helper.INT4; - break; - case DATEMILLI: - reader = helper.INT8; - break; - case TIMESEC: - case TIMEMILLI: - reader = helper.INT4; - break; - case TIMEMICRO: - case TIMENANO: - reader = helper.INT8; - break; - case TIMESTAMPNANO: - case TIMESTAMPMICRO: - case TIMESTAMPMILLI: - case TIMESTAMPSEC: - case TIMESTAMPNANOTZ: - case TIMESTAMPMICROTZ: - case TIMESTAMPMILLITZ: - case TIMESTAMPSECTZ: - reader = helper.INT8; - break; - case INTERVALYEAR: - reader = helper.INT4; - break; - case INTERVALDAY: - reader = helper.DAY_MILLIS; - break; - case INTERVALMONTHDAYNANO: - reader = helper.MONTH_DAY_NANOS; - break; - case DURATION: - reader = helper.INT8; - break; - default: - throw new UnsupportedOperationException("Cannot read array of type " + type); - } - } else if (bufferType.equals(VIEWS)) { - return Collections.singletonList( - readViewBuffers(allocator, count, variadicBufferIndices, type)); - } else if (bufferType.equals(VARIADIC_DATA_BUFFERS)) { - return readVariadicBuffers(allocator, variadicBufferIndices.size()); - } else { - throw new InvalidArrowFileException("Unrecognized buffer type " + bufferType); - } - - buf = reader.readBuffer(allocator, count); - Preconditions.checkNotNull(buf); - return Collections.singletonList(buf); - } - - private void readFromJsonIntoVector(Field field, FieldVector vector) throws IOException { - ArrowType type = field.getType(); - TypeLayout typeLayout = TypeLayout.getTypeLayout(type); - List vectorTypes = typeLayout.getBufferTypes(); - List vectorBuffers = new ArrayList<>(vectorTypes.size()); - List variadicBufferIndices = new ArrayList<>(); - - if (!typeLayout.isFixedBufferCount()) { - vectorTypes.add(VARIADIC_DATA_BUFFERS); - } - /* - * The order of inner buffers is: - * Fixed width vector: - * -- validity buffer - * -- data buffer - * Variable width vector: - * -- validity buffer - * -- offset buffer - * -- data buffer - * - * This is similar to what getFieldInnerVectors() used to give but now that we don't have - * inner vectors anymore, we will work directly at the buffer level -- populate buffers - * locally as we read from Json parser and do loadFieldBuffers on the vector followed by - * releasing the local buffers. - */ - readToken(START_OBJECT); - { - // If currently reading dictionaries, field name is not important so don't check - String name = readNextField("name", String.class); - if (started && !Objects.equals(field.getName(), name)) { - throw new IllegalArgumentException( - "Expected field " + field.getName() + " but got " + name); - } - - /* Initialize the vector with required capacity but don't allocateNew since we would - * be doing loadFieldBuffers. - */ - int valueCount = readNextField("count", Integer.class); - - vector.setInitialCapacity(valueCount); - - for (int v = 0; v < vectorTypes.size(); v++) { - BufferType bufferType = vectorTypes.get(v); - nextFieldIs(bufferType.getName()); - int innerBufferValueCount = valueCount; - if (bufferType.equals(OFFSET) - && !(type instanceof Union) - && !(type instanceof ListView) - && !(type instanceof LargeListView)) { - /* offset buffer has 1 additional value capacity except for dense unions and ListView */ - innerBufferValueCount = valueCount + 1; - } - - vectorBuffers.addAll( - readIntoBuffer( - allocator, - bufferType, - vector.getMinorType(), - innerBufferValueCount, - variadicBufferIndices)); - } - - int nullCount; - if (type instanceof ArrowType.RunEndEncoded || type instanceof Union) { - nullCount = 0; - } else if (type instanceof ArrowType.Null) { - nullCount = valueCount; - } else { - nullCount = BitVectorHelper.getNullCount(vectorBuffers.get(0), valueCount); - } - final ArrowFieldNode fieldNode = new ArrowFieldNode(valueCount, nullCount); - vector.loadFieldBuffers(fieldNode, vectorBuffers); - - /* read child vectors (if any) */ - List fields = field.getChildren(); - if (!fields.isEmpty()) { - List vectorChildren = vector.getChildrenFromFields(); - if (fields.size() != vectorChildren.size()) { - throw new IllegalArgumentException( - "fields and children are not the same size: " - + fields.size() - + " != " - + vectorChildren.size()); - } - nextFieldIs("children"); - readToken(START_ARRAY); - for (int i = 0; i < fields.size(); i++) { - Field childField = fields.get(i); - FieldVector childVector = vectorChildren.get(i); - readFromJsonIntoVector(childField, childVector); - } - readToken(END_ARRAY); - } - } - - readToken(END_OBJECT); - - for (ArrowBuf buffer : vectorBuffers) { - buffer.getReferenceManager().release(); - } - } - - private byte[] decodeHexSafe(String hexString) throws IOException { - try { - return Hex.decodeHex(hexString.toCharArray()); - } catch (DecoderException e) { - throw new IOException("Unable to decode hex string: " + hexString, e); - } - } - - @Override - public void close() throws IOException { - parser.close(); - if (dictionaries != null) { - for (Dictionary dictionary : dictionaries.values()) { - dictionary.getVector().close(); - } - } - } - - private T readNextField(String expectedFieldName, Class c) - throws IOException, JsonParseException { - nextFieldIs(expectedFieldName); - parser.nextToken(); - return parser.readValueAs(c); - } - - private void nextFieldIs(String expectedFieldName) throws IOException, JsonParseException { - String name = parser.nextFieldName(); - if (name == null || !name.equals(expectedFieldName)) { - throw new IllegalStateException("Expected " + expectedFieldName + " but got " + name); - } - } - - private void readToken(JsonToken expected) throws JsonParseException, IOException { - JsonToken t = parser.nextToken(); - if (t != expected) { - throw new IllegalStateException("Expected " + expected + " but got " + t); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java deleted file mode 100644 index 68700fe6afd25..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ /dev/null @@ -1,607 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static org.apache.arrow.vector.BufferLayout.BufferType.*; - -import com.fasterxml.jackson.core.JsonEncoding; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; -import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter; -import com.fasterxml.jackson.databind.MappingJsonFactory; -import java.io.File; -import java.io.IOException; -import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.BufferLayout.BufferType; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.TypeLayout; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.DecimalUtility; -import org.apache.arrow.vector.util.DictionaryUtility; -import org.apache.commons.codec.binary.Hex; - -/** - * A writer that converts binary Vectors into an internal, unstable JSON format suitable - * for integration testing. - * - *

    This writer does NOT implement a JSON dataset format like JSONL. - */ -public class JsonFileWriter implements AutoCloseable { - - /** Configuration POJO for writing JSON files. */ - public static final class JSONWriteConfig { - private final boolean pretty; - - private JSONWriteConfig(boolean pretty) { - this.pretty = pretty; - } - - private JSONWriteConfig() { - this.pretty = false; - } - - public JSONWriteConfig pretty(boolean pretty) { - return new JSONWriteConfig(pretty); - } - } - - public static JSONWriteConfig config() { - return new JSONWriteConfig(); - } - - private final JsonGenerator generator; - private Schema schema; - - /** Constructs a new writer that will output to outputFile. */ - public JsonFileWriter(File outputFile) throws IOException { - this(outputFile, config()); - } - - /** Constructs a new writer that will output to outputFile with the given options. */ - public JsonFileWriter(File outputFile, JSONWriteConfig config) throws IOException { - MappingJsonFactory jsonFactory = new MappingJsonFactory(); - this.generator = jsonFactory.createGenerator(outputFile, JsonEncoding.UTF8); - if (config.pretty) { - DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter(); - prettyPrinter.indentArraysWith(NopIndenter.instance); - this.generator.setPrettyPrinter(prettyPrinter); - } - // Allow writing of floating point NaN values not as strings - this.generator.configure(JsonGenerator.Feature.QUOTE_NON_NUMERIC_NUMBERS, false); - } - - /** Writes out the "header" of the file including the schema and any dictionaries required. */ - public void start(Schema schema, DictionaryProvider provider) throws IOException { - List fields = new ArrayList<>(schema.getFields().size()); - Set dictionaryIdsUsed = new HashSet<>(); - this.schema = schema; // Store original Schema to ensure batches written match - - // Convert fields with dictionaries to have dictionary type - for (Field field : schema.getFields()) { - fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed)); - } - Schema updatedSchema = new Schema(fields, schema.getCustomMetadata()); - - generator.writeStartObject(); - generator.writeObjectField("schema", updatedSchema); - - // Write all dictionaries that were used - if (!dictionaryIdsUsed.isEmpty()) { - writeDictionaryBatches(generator, dictionaryIdsUsed, provider); - } - - // Start writing of record batches - generator.writeArrayFieldStart("batches"); - } - - private void writeDictionaryBatches( - JsonGenerator generator, Set dictionaryIdsUsed, DictionaryProvider provider) - throws IOException { - generator.writeArrayFieldStart("dictionaries"); - for (Long id : dictionaryIdsUsed) { - generator.writeStartObject(); - generator.writeObjectField("id", id); - - generator.writeFieldName("data"); - Dictionary dictionary = provider.lookup(id); - FieldVector vector = dictionary.getVector(); - List fields = Collections.singletonList(vector.getField()); - List vectors = Collections.singletonList(vector); - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount()); - writeBatch(root); - - generator.writeEndObject(); - } - generator.writeEndArray(); - } - - /** Writes the record batch to the JSON file. */ - public void write(VectorSchemaRoot recordBatch) throws IOException { - if (!recordBatch.getSchema().equals(schema)) { - throw new IllegalArgumentException("record batches must have the same schema: " + schema); - } - writeBatch(recordBatch); - } - - private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { - generator.writeStartObject(); - { - generator.writeObjectField("count", recordBatch.getRowCount()); - generator.writeArrayFieldStart("columns"); - for (Field field : recordBatch.getSchema().getFields()) { - FieldVector vector = recordBatch.getVector(field); - writeFromVectorIntoJson(field, vector); - } - generator.writeEndArray(); - } - generator.writeEndObject(); - } - - private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException { - TypeLayout typeLayout = TypeLayout.getTypeLayout(field.getType()); - List vectorTypes = typeLayout.getBufferTypes(); - List vectorBuffers = vector.getFieldBuffers(); - - if (typeLayout.isFixedBufferCount()) { - if (vectorTypes.size() != vectorBuffers.size()) { - throw new IllegalArgumentException( - "vector types and inner vector buffers are not the same size: " - + vectorTypes.size() - + " != " - + vectorBuffers.size()); - } - } else { - vectorTypes.add(VARIADIC_DATA_BUFFERS); - } - - generator.writeStartObject(); - { - generator.writeObjectField("name", field.getName()); - int valueCount = vector.getValueCount(); - generator.writeObjectField("count", valueCount); - - for (int v = 0; v < vectorTypes.size(); v++) { - BufferType bufferType = vectorTypes.get(v); - ArrowBuf vectorBuffer = vectorBuffers.get(v); - // Note that in JSON format we cannot have VARIADIC_DATA_BUFFERS repeated, - // thus the values are only written to a single entity. - generator.writeArrayFieldStart(bufferType.getName()); - final int bufferValueCount = - (bufferType.equals(OFFSET) - && vector.getMinorType() != MinorType.DENSEUNION - && vector.getMinorType() != MinorType.LISTVIEW - && vector.getMinorType() != MinorType.LARGELISTVIEW) - ? valueCount + 1 - : valueCount; - for (int i = 0; i < bufferValueCount; i++) { - if (bufferType.equals(DATA) - && (vector.getMinorType() == MinorType.VARCHAR - || vector.getMinorType() == MinorType.VARBINARY)) { - writeValueToGenerator(bufferType, vectorBuffer, vectorBuffers.get(v - 1), vector, i); - } else if (bufferType.equals(VIEWS) - && (vector.getMinorType() == MinorType.VIEWVARCHAR - || vector.getMinorType() == MinorType.VIEWVARBINARY)) { - // writing views - ArrowBuf viewBuffer = vectorBuffers.get(1); - List dataBuffers = vectorBuffers.subList(v + 1, vectorBuffers.size()); - writeValueToViewGenerator(bufferType, viewBuffer, dataBuffers, vector, i); - } else if (bufferType.equals(VARIADIC_DATA_BUFFERS) - && (vector.getMinorType() == MinorType.VIEWVARCHAR - || vector.getMinorType() == MinorType.VIEWVARBINARY)) { - ArrowBuf viewBuffer = vectorBuffers.get(1); // check if this is v-1 - List dataBuffers = vectorBuffers.subList(v, vectorBuffers.size()); - if (!dataBuffers.isEmpty()) { - writeValueToDataBufferGenerator(bufferType, viewBuffer, dataBuffers, vector); - // The variadic buffers are written at once and doesn't require iterating for - // each index. - // So, break the loop. - break; - } - } else if (bufferType.equals(OFFSET) - && vector.getValueCount() == 0 - && (vector.getMinorType() == MinorType.LIST - || vector.getMinorType() == MinorType.LISTVIEW - || vector.getMinorType() == MinorType.MAP - || vector.getMinorType() == MinorType.VARBINARY - || vector.getMinorType() == MinorType.VARCHAR)) { - // Empty vectors may not have allocated an offsets buffer - try (ArrowBuf vectorBufferTmp = vector.getAllocator().buffer(4)) { - vectorBufferTmp.setInt(0, 0); - writeValueToGenerator(bufferType, vectorBufferTmp, null, vector, i); - } - } else if (bufferType.equals(OFFSET) - && vector.getValueCount() == 0 - && (vector.getMinorType() == MinorType.LARGELIST - || vector.getMinorType() == MinorType.LARGELISTVIEW - || vector.getMinorType() == MinorType.LARGEVARBINARY - || vector.getMinorType() == MinorType.LARGEVARCHAR)) { - // Empty vectors may not have allocated an offsets buffer - try (ArrowBuf vectorBufferTmp = vector.getAllocator().buffer(8)) { - vectorBufferTmp.setLong(0, 0); - writeValueToGenerator(bufferType, vectorBufferTmp, null, vector, i); - } - } else { - writeValueToGenerator(bufferType, vectorBuffer, null, vector, i); - } - } - generator.writeEndArray(); - } - - List fields = field.getChildren(); - List children = vector.getChildrenFromFields(); - if (fields.size() != children.size()) { - throw new IllegalArgumentException( - "fields and children are not the same size: " - + fields.size() - + " != " - + children.size()); - } - if (fields.size() > 0) { - generator.writeArrayFieldStart("children"); - for (int i = 0; i < fields.size(); i++) { - Field childField = fields.get(i); - FieldVector childVector = children.get(i); - writeFromVectorIntoJson(childField, childVector); - } - generator.writeEndArray(); - } - } - generator.writeEndObject(); - } - - /** - * Get data of a view by index. - * - * @param viewBuffer view buffer - * @param dataBuffers data buffers - * @param index index of the view - * @return byte array of the view - */ - private byte[] getView(final ArrowBuf viewBuffer, final List dataBuffers, int index) { - final int dataLength = - viewBuffer.getInt((long) index * BaseVariableWidthViewVector.ELEMENT_SIZE); - byte[] result = new byte[dataLength]; - - final int inlineSize = BaseVariableWidthViewVector.INLINE_SIZE; - final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE; - final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH; - final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH; - final int bufIndexWidth = BaseVariableWidthViewVector.BUF_INDEX_WIDTH; - - if (dataLength > inlineSize) { - // data is in the data buffer - // get buffer index - final int bufferIndex = - viewBuffer.getInt(((long) index * elementSize) + lengthWidth + prefixWidth); - // get data offset - final int dataOffset = - viewBuffer.getInt( - ((long) index * elementSize) + lengthWidth + prefixWidth + bufIndexWidth); - dataBuffers.get(bufferIndex).getBytes(dataOffset, result, 0, dataLength); - } else { - // data is in the view buffer - viewBuffer.getBytes((long) index * elementSize + lengthWidth, result, 0, dataLength); - } - return result; - } - - private void writeValueToViewGenerator( - BufferType bufferType, - ArrowBuf viewBuffer, - List dataBuffers, - FieldVector vector, - final int index) - throws IOException { - Preconditions.checkNotNull(viewBuffer); - byte[] b = getView(viewBuffer, dataBuffers, index); - final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE; - final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH; - final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH; - final int bufIndexWidth = BaseVariableWidthViewVector.BUF_INDEX_WIDTH; - final int length = viewBuffer.getInt((long) index * elementSize); - generator.writeStartObject(); - generator.writeFieldName("SIZE"); - generator.writeObject(length); - if (length > 12) { - byte[] prefix = Arrays.copyOfRange(b, 0, prefixWidth); - final int bufferIndex = - viewBuffer.getInt(((long) index * elementSize) + lengthWidth + prefixWidth); - // get data offset - final int dataOffset = - viewBuffer.getInt( - ((long) index * elementSize) + lengthWidth + prefixWidth + bufIndexWidth); - generator.writeFieldName("PREFIX_HEX"); - generator.writeString(Hex.encodeHexString(prefix)); - generator.writeFieldName("BUFFER_INDEX"); - generator.writeObject(bufferIndex); - generator.writeFieldName("OFFSET"); - generator.writeObject(dataOffset); - } else { - generator.writeFieldName("INLINED"); - if (vector.getMinorType() == MinorType.VIEWVARCHAR) { - generator.writeString(new String(b, "UTF-8")); - } else { - generator.writeString(Hex.encodeHexString(b)); - } - } - generator.writeEndObject(); - } - - private void writeValueToDataBufferGenerator( - BufferType bufferType, ArrowBuf viewBuffer, List dataBuffers, FieldVector vector) - throws IOException { - if (bufferType.equals(VARIADIC_DATA_BUFFERS)) { - Preconditions.checkNotNull(viewBuffer); - Preconditions.checkArgument(!dataBuffers.isEmpty()); - - for (int i = 0; i < dataBuffers.size(); i++) { - ArrowBuf dataBuf = dataBuffers.get(i); - byte[] result = new byte[(int) dataBuf.writerIndex()]; - dataBuf.getBytes(0, result); - if (result != null) { - generator.writeString(Hex.encodeHexString(result)); - } - } - } - } - - private void writeValueToGenerator( - BufferType bufferType, - ArrowBuf buffer, - ArrowBuf offsetBuffer, - FieldVector vector, - final int index) - throws IOException { - if (bufferType.equals(TYPE)) { - generator.writeNumber(buffer.getByte(index * TinyIntVector.TYPE_WIDTH)); - } else if (bufferType.equals(OFFSET)) { - switch (vector.getMinorType()) { - case VARCHAR: - case VARBINARY: - case LIST: - case MAP: - generator.writeNumber(buffer.getInt((long) index * BaseVariableWidthVector.OFFSET_WIDTH)); - break; - case LISTVIEW: - generator.writeNumber( - buffer.getInt((long) index * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - break; - case LARGELISTVIEW: - generator.writeNumber( - buffer.getInt((long) index * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - break; - case LARGELIST: - case LARGEVARBINARY: - case LARGEVARCHAR: - generator.writeNumber( - buffer.getLong((long) index * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - break; - default: - throw new IllegalArgumentException("Type has no offset buffer: " + vector.getField()); - } - } else if (bufferType.equals(VALIDITY)) { - generator.writeNumber(vector.isNull(index) ? 0 : 1); - } else if (bufferType.equals(DATA)) { - switch (vector.getMinorType()) { - case TINYINT: - generator.writeNumber(TinyIntVector.get(buffer, index)); - break; - case SMALLINT: - generator.writeNumber(SmallIntVector.get(buffer, index)); - break; - case INT: - generator.writeNumber(IntVector.get(buffer, index)); - break; - case BIGINT: - generator.writeString(String.valueOf(BigIntVector.get(buffer, index))); - break; - case UINT1: - generator.writeNumber(UInt1Vector.getNoOverflow(buffer, index)); - break; - case UINT2: - generator.writeNumber(UInt2Vector.get(buffer, index)); - break; - case UINT4: - generator.writeNumber(UInt4Vector.getNoOverflow(buffer, index)); - break; - case UINT8: - generator.writeString(UInt8Vector.getNoOverflow(buffer, index).toString()); - break; - case FLOAT4: - generator.writeNumber(Float4Vector.get(buffer, index)); - break; - case FLOAT8: - generator.writeNumber(Float8Vector.get(buffer, index)); - break; - case DATEDAY: - generator.writeNumber(DateDayVector.get(buffer, index)); - break; - case DATEMILLI: - generator.writeNumber(DateMilliVector.get(buffer, index)); - break; - case TIMESEC: - generator.writeNumber(TimeSecVector.get(buffer, index)); - break; - case TIMEMILLI: - generator.writeNumber(TimeMilliVector.get(buffer, index)); - break; - case TIMEMICRO: - generator.writeNumber(TimeMicroVector.get(buffer, index)); - break; - case TIMENANO: - generator.writeNumber(TimeNanoVector.get(buffer, index)); - break; - case TIMESTAMPSEC: - generator.writeNumber(TimeStampSecVector.get(buffer, index)); - break; - case TIMESTAMPMILLI: - generator.writeNumber(TimeStampMilliVector.get(buffer, index)); - break; - case TIMESTAMPMICRO: - generator.writeNumber(TimeStampMicroVector.get(buffer, index)); - break; - case TIMESTAMPNANO: - generator.writeNumber(TimeStampNanoVector.get(buffer, index)); - break; - case TIMESTAMPSECTZ: - generator.writeNumber(TimeStampSecTZVector.get(buffer, index)); - break; - case TIMESTAMPMILLITZ: - generator.writeNumber(TimeStampMilliTZVector.get(buffer, index)); - break; - case TIMESTAMPMICROTZ: - generator.writeNumber(TimeStampMicroTZVector.get(buffer, index)); - break; - case TIMESTAMPNANOTZ: - generator.writeNumber(TimeStampNanoTZVector.get(buffer, index)); - break; - case DURATION: - generator.writeNumber(DurationVector.get(buffer, index)); - break; - case INTERVALYEAR: - generator.writeNumber(IntervalYearVector.getTotalMonths(buffer, index)); - break; - case INTERVALDAY: - generator.writeStartObject(); - generator.writeObjectField("days", IntervalDayVector.getDays(buffer, index)); - generator.writeObjectField( - "milliseconds", IntervalDayVector.getMilliseconds(buffer, index)); - generator.writeEndObject(); - break; - case INTERVALMONTHDAYNANO: - generator.writeStartObject(); - generator.writeObjectField("months", IntervalMonthDayNanoVector.getMonths(buffer, index)); - generator.writeObjectField("days", IntervalMonthDayNanoVector.getDays(buffer, index)); - generator.writeObjectField( - "nanoseconds", IntervalMonthDayNanoVector.getNanoseconds(buffer, index)); - generator.writeEndObject(); - break; - case BIT: - generator.writeNumber(BitVectorHelper.get(buffer, index)); - break; - case VARBINARY: - { - Preconditions.checkNotNull(offsetBuffer); - String hexString = - Hex.encodeHexString(BaseVariableWidthVector.get(buffer, offsetBuffer, index)); - generator.writeObject(hexString); - break; - } - case FIXEDSIZEBINARY: - int byteWidth = ((FixedSizeBinaryVector) vector).getByteWidth(); - String fixedSizeHexString = - Hex.encodeHexString(FixedSizeBinaryVector.get(buffer, index, byteWidth)); - generator.writeObject(fixedSizeHexString); - break; - case VARCHAR: - { - Preconditions.checkNotNull(offsetBuffer); - byte[] b = (BaseVariableWidthVector.get(buffer, offsetBuffer, index)); - generator.writeString(new String(b, "UTF-8")); - break; - } - case DECIMAL: - { - int scale = ((DecimalVector) vector).getScale(); - BigDecimal decimalValue = - DecimalUtility.getBigDecimalFromArrowBuf( - buffer, index, scale, DecimalVector.TYPE_WIDTH); - // We write the unscaled value, because the scale is stored in the type metadata. - generator.writeString(decimalValue.unscaledValue().toString()); - break; - } - case DECIMAL256: - { - int scale = ((Decimal256Vector) vector).getScale(); - BigDecimal decimalValue = - DecimalUtility.getBigDecimalFromArrowBuf( - buffer, index, scale, Decimal256Vector.TYPE_WIDTH); - // We write the unscaled value, because the scale is stored in the type metadata. - generator.writeString(decimalValue.unscaledValue().toString()); - break; - } - - default: - throw new UnsupportedOperationException("minor type: " + vector.getMinorType()); - } - } else if (bufferType.equals(SIZE)) { - if (vector.getMinorType() == MinorType.LISTVIEW) { - generator.writeNumber(buffer.getInt((long) index * BaseRepeatedValueViewVector.SIZE_WIDTH)); - } else { - generator.writeNumber( - buffer.getInt((long) index * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - } - } - } - - @Override - public void close() throws IOException { - generator.writeEndArray(); - generator.writeEndObject(); - generator.close(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java deleted file mode 100644 index d2b5e70c4e64c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.ReadableByteChannel; -import org.apache.arrow.memory.ArrowBuf; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Adapter around {@link ReadableByteChannel} that reads into {@linkplain ArrowBuf}s. */ -public class ReadChannel implements AutoCloseable { - - private static final Logger LOGGER = LoggerFactory.getLogger(ReadChannel.class); - - private ReadableByteChannel in; - private long bytesRead = 0; - - public ReadChannel(ReadableByteChannel in) { - this.in = in; - } - - public long bytesRead() { - return bytesRead; - } - - /** - * Reads bytes into buffer until it is full (buffer.remaining() == 0). Returns the number of bytes - * read which can be less than full if there are no more. - * - * @param buffer The buffer to read to - * @return the number of byte read - * @throws IOException if nit enough bytes left to read - */ - public int readFully(ByteBuffer buffer) throws IOException { - if (LOGGER.isDebugEnabled()) { - LOGGER.debug("Reading buffer with size: {}", buffer.remaining()); - } - int totalRead = 0; - while (buffer.remaining() != 0) { - int read = in.read(buffer); - if (read == -1) { - this.bytesRead += totalRead; - return totalRead; - } - totalRead += read; - if (read == 0) { - break; - } - } - this.bytesRead += totalRead; - return totalRead; - } - - /** - * Reads up to len into buffer. Returns bytes read. - * - * @param buffer the buffer to read to - * @param length the amount of bytes to read - * @return the number of bytes read - * @throws IOException if nit enough bytes left to read - */ - public long readFully(ArrowBuf buffer, long length) throws IOException { - boolean fullRead = true; - long bytesLeft = length; - while (fullRead && bytesLeft > 0) { - int bytesToRead = (int) Math.min(bytesLeft, Integer.MAX_VALUE); - int n = readFully(buffer.nioBuffer(buffer.writerIndex(), bytesToRead)); - buffer.writerIndex(buffer.writerIndex() + n); - fullRead = n == bytesToRead; - bytesLeft -= n; - } - return length - bytesLeft; - } - - @Override - public void close() throws IOException { - if (this.in != null) { - in.close(); - in = null; - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java deleted file mode 100644 index f127d2cf262e1..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import java.io.IOException; -import java.nio.channels.SeekableByteChannel; - -/** An {@link ReadChannel} that supports seeking to a random position. */ -public class SeekableReadChannel extends ReadChannel { - - private final SeekableByteChannel in; - - public SeekableReadChannel(SeekableByteChannel in) { - super(in); - this.in = in; - } - - public void setPosition(long position) throws IOException { - in.position(position); - } - - public long size() throws IOException { - return in.size(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java deleted file mode 100644 index eeb2eaf566d6e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import com.google.flatbuffers.FlatBufferBuilder; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.WritableByteChannel; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.ipc.message.FBSerializable; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Wrapper around a WritableByteChannel that maintains the position as well adding some common - * serialization utilities. - * - *

    All write methods in this class follow full write semantics, i.e., write calls only return - * after requested data has been fully written. Note this is different from java WritableByteChannel - * interface where partial write is allowed - * - *

    Please note that objects of this class are not thread-safe. - */ -public class WriteChannel implements AutoCloseable { - private static final Logger LOGGER = LoggerFactory.getLogger(WriteChannel.class); - - private static final byte[] ZERO_BYTES = new byte[8]; - - private final byte[] intBuf = new byte[4]; - - private long currentPosition = 0; - - private final WritableByteChannel out; - - public WriteChannel(WritableByteChannel out) { - this.out = out; - } - - @Override - public void close() throws IOException { - out.close(); - } - - public long getCurrentPosition() { - return currentPosition; - } - - public long write(byte[] buffer) throws IOException { - return write(ByteBuffer.wrap(buffer)); - } - - long write(byte[] buffer, int offset, int length) throws IOException { - return write(ByteBuffer.wrap(buffer, offset, length)); - } - - /** Writes zeroCount zeros the underlying channel. */ - public long writeZeros(long zeroCount) throws IOException { - long bytesWritten = 0; - long wholeWordsEnd = zeroCount - 8; - while (bytesWritten <= wholeWordsEnd) { - bytesWritten += write(ZERO_BYTES); - } - - if (bytesWritten < zeroCount) { - bytesWritten += write(ZERO_BYTES, 0, (int) (zeroCount - bytesWritten)); - } - return bytesWritten; - } - - /** Writes enough bytes to align the channel to an 8-byte boundary. */ - public long align() throws IOException { - int trailingByteSize = (int) (currentPosition % 8); - if (trailingByteSize != 0) { // align on 8 byte boundaries - return writeZeros(8 - trailingByteSize); - } - return 0; - } - - /** Writes all data from buffer to the underlying channel. */ - public long write(ByteBuffer buffer) throws IOException { - long length = buffer.remaining(); - while (buffer.hasRemaining()) { - out.write(buffer); - } - currentPosition += length; - return length; - } - - /** Writes v in little-endian format to the underlying channel. */ - public long writeIntLittleEndian(int v) throws IOException { - MessageSerializer.intToBytes(v, intBuf); - return write(intBuf); - } - - /** Writes the buffer to the underlying channel. */ - public void write(ArrowBuf buffer) throws IOException { - long bytesWritten = 0; - while (bytesWritten < buffer.readableBytes()) { - int bytesToWrite = (int) Math.min(Integer.MAX_VALUE, buffer.readableBytes() - bytesWritten); - ByteBuffer nioBuffer = buffer.nioBuffer(buffer.readerIndex() + bytesWritten, bytesToWrite); - write(nioBuffer); - bytesWritten += bytesToWrite; - } - } - - /** - * Writes the serialized flatbuffer to the underlying channel. If withSizePrefix is true then the - * length in bytes of the buffer will first be written in little endian format. - */ - public long write(FBSerializable writer, boolean withSizePrefix) throws IOException { - ByteBuffer buffer = serialize(writer); - if (withSizePrefix) { - writeIntLittleEndian(buffer.remaining()); - } - return write(buffer); - } - - /** Serializes writer to a ByteBuffer. */ - public static ByteBuffer serialize(FBSerializable writer) { - FlatBufferBuilder builder = new FlatBufferBuilder(); - int root = writer.writeTo(builder); - builder.finish(root); - return builder.dataBuffer(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java deleted file mode 100644 index 455229cc6dda5..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import com.google.flatbuffers.FlatBufferBuilder; -import org.apache.arrow.flatbuf.Block; - -/** Metadata for an arrow message in a channel. */ -public class ArrowBlock implements FBSerializable { - - private final long offset; - private final int metadataLength; - private final long bodyLength; - - /** - * Constructs a new instance. - * - * @param offset The offset into the channel file where the block was written. - * @param metadataLength The length of the flatbuffer metadata in the block. - * @param bodyLength The length of data in the block. - */ - public ArrowBlock(long offset, int metadataLength, long bodyLength) { - super(); - this.offset = offset; - this.metadataLength = metadataLength; - this.bodyLength = bodyLength; - } - - public long getOffset() { - return offset; - } - - public int getMetadataLength() { - return metadataLength; - } - - public long getBodyLength() { - return bodyLength; - } - - @Override - public int writeTo(FlatBufferBuilder builder) { - return Block.createBlock(builder, offset, metadataLength, bodyLength); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + (int) (bodyLength ^ (bodyLength >>> 32)); - result = prime * result + metadataLength; - result = prime * result + (int) (offset ^ (offset >>> 32)); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - ArrowBlock other = (ArrowBlock) obj; - if (bodyLength != other.bodyLength) { - return false; - } - if (metadataLength != other.metadataLength) { - return false; - } - if (offset != other.offset) { - return false; - } - return true; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java deleted file mode 100644 index b076b38fa755e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import com.google.flatbuffers.FlatBufferBuilder; -import org.apache.arrow.flatbuf.BodyCompression; - -/** Compression information about data written to a channel. */ -public class ArrowBodyCompression implements FBSerializable { - - private final byte codec; - - private final byte method; - - public ArrowBodyCompression(byte codec, byte method) { - this.codec = codec; - this.method = method; - } - - @Override - public int writeTo(FlatBufferBuilder builder) { - return BodyCompression.createBodyCompression(builder, codec, method); - } - - public byte getCodec() { - return codec; - } - - public byte getMethod() { - return method; - } - - @Override - public String toString() { - return "ArrowBodyCompression [codec=" + codec + ", method=" + method + "]"; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java deleted file mode 100644 index cebddeb660e25..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import com.google.flatbuffers.FlatBufferBuilder; -import org.apache.arrow.flatbuf.Buffer; - -/** Metadata for a buffer written to a channel. */ -public class ArrowBuffer implements FBSerializable { - - private long offset; - private long size; - - /** - * Constructs a new instance. - * - * @param offset The offset to the start of the buffer in the channel. - * @param size The size of the buffer. - */ - public ArrowBuffer(long offset, long size) { - super(); - this.offset = offset; - this.size = size; - } - - public long getOffset() { - return offset; - } - - public long getSize() { - return size; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + (int) (offset ^ (offset >>> 32)); - result = prime * result + (int) (size ^ (size >>> 32)); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - ArrowBuffer other = (ArrowBuffer) obj; - if (offset != other.offset) { - return false; - } - if (size != other.size) { - return false; - } - return true; - } - - @Override - public int writeTo(FlatBufferBuilder builder) { - return Buffer.createBuffer(builder, offset, size); - } - - @Override - public String toString() { - return "ArrowBuffer [offset=" + offset + ", size=" + size + "]"; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java deleted file mode 100644 index cee76433ea4c7..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import com.google.flatbuffers.FlatBufferBuilder; -import org.apache.arrow.flatbuf.DictionaryBatch; -import org.apache.arrow.flatbuf.MessageHeader; - -/** - * POJO wrapper around a Dictionary Batch IPC messages. - * (https://arrow.apache.org/docs/format/IPC.html#dictionary-batches) - */ -public class ArrowDictionaryBatch implements ArrowMessage { - - private final long dictionaryId; - private final ArrowRecordBatch dictionary; - private final boolean isDelta; - - @Deprecated - public ArrowDictionaryBatch(long dictionaryId, ArrowRecordBatch dictionary) { - this(dictionaryId, dictionary, false); - } - - /** Constructs new instance. */ - public ArrowDictionaryBatch(long dictionaryId, ArrowRecordBatch dictionary, boolean isDelta) { - this.dictionaryId = dictionaryId; - this.dictionary = dictionary; - this.isDelta = isDelta; - } - - public boolean isDelta() { - return isDelta; - } - - public byte getMessageType() { - return MessageHeader.DictionaryBatch; - } - - public long getDictionaryId() { - return dictionaryId; - } - - public ArrowRecordBatch getDictionary() { - return dictionary; - } - - @Override - public int writeTo(FlatBufferBuilder builder) { - int dataOffset = dictionary.writeTo(builder); - DictionaryBatch.startDictionaryBatch(builder); - DictionaryBatch.addId(builder, dictionaryId); - DictionaryBatch.addData(builder, dataOffset); - DictionaryBatch.addIsDelta(builder, isDelta); - return DictionaryBatch.endDictionaryBatch(builder); - } - - @Override - public long computeBodyLength() { - return dictionary.computeBodyLength(); - } - - @Override - public T accepts(ArrowMessageVisitor visitor) { - return visitor.visit(this); - } - - @Override - public String toString() { - return "ArrowDictionaryBatch [dictionaryId=" - + dictionaryId - + ", dictionary=" - + dictionary - + "]"; - } - - @Override - public void close() { - dictionary.close(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java deleted file mode 100644 index ad3f434e12a1d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import com.google.flatbuffers.FlatBufferBuilder; -import org.apache.arrow.flatbuf.FieldNode; - -/** Metadata about Vectors/Arrays that is written to a channel. */ -public class ArrowFieldNode implements FBSerializable { - - private final int length; - private final int nullCount; - - /** - * Constructs a new instance. - * - * @param length The number of values written. - * @param nullCount The number of null values. - */ - public ArrowFieldNode(long length, long nullCount) { - super(); - this.length = checkedCastToInt(length); - this.nullCount = checkedCastToInt(nullCount); - } - - @Override - public int writeTo(FlatBufferBuilder builder) { - return FieldNode.createFieldNode(builder, length, nullCount); - } - - public int getNullCount() { - return nullCount; - } - - public int getLength() { - return length; - } - - @Override - public String toString() { - return "ArrowFieldNode [length=" + length + ", nullCount=" + nullCount + "]"; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java deleted file mode 100644 index bb2b87113faca..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import static org.apache.arrow.vector.ipc.message.FBSerializables.writeAllStructsToVector; -import static org.apache.arrow.vector.ipc.message.FBSerializables.writeKeyValues; - -import com.google.flatbuffers.FlatBufferBuilder; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.flatbuf.Block; -import org.apache.arrow.flatbuf.Footer; -import org.apache.arrow.flatbuf.KeyValue; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Footer metadata for the arrow file format. */ -public class ArrowFooter implements FBSerializable { - - private final Schema schema; - - private final List dictionaries; - - private final List recordBatches; - - private final Map metaData; - - private final MetadataVersion metadataVersion; - - public ArrowFooter(Schema schema, List dictionaries, List recordBatches) { - this(schema, dictionaries, recordBatches, null); - } - - /** - * Constructs a new instance. - * - * @param schema The schema for record batches in the file. - * @param dictionaries The dictionaries relevant to the file. - * @param recordBatches The recordBatches written to the file. - * @param metaData user-defined k-v meta data. - */ - public ArrowFooter( - Schema schema, - List dictionaries, - List recordBatches, - Map metaData) { - this(schema, dictionaries, recordBatches, metaData, MetadataVersion.DEFAULT); - } - - /** - * Constructs a new instance. - * - * @param schema The schema for record batches in the file. - * @param dictionaries The dictionaries relevant to the file. - * @param recordBatches The recordBatches written to the file. - * @param metaData user-defined k-v meta data. - * @param metadataVersion The Arrow metadata version. - */ - public ArrowFooter( - Schema schema, - List dictionaries, - List recordBatches, - Map metaData, - MetadataVersion metadataVersion) { - this.schema = schema; - this.dictionaries = dictionaries; - this.recordBatches = recordBatches; - this.metaData = metaData; - this.metadataVersion = metadataVersion; - } - - /** Constructs from the corresponding Flatbuffer message. */ - public ArrowFooter(Footer footer) { - this( - Schema.convertSchema(footer.schema()), - dictionaries(footer), - recordBatches(footer), - metaData(footer), - MetadataVersion.fromFlatbufID(footer.version())); - } - - private static List recordBatches(Footer footer) { - List recordBatches = new ArrayList<>(); - Block tempBlock = new Block(); - int recordBatchesLength = footer.recordBatchesLength(); - for (int i = 0; i < recordBatchesLength; i++) { - Block block = footer.recordBatches(tempBlock, i); - recordBatches.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength())); - } - return recordBatches; - } - - private static List dictionaries(Footer footer) { - List dictionaries = new ArrayList<>(); - Block tempBlock = new Block(); - - int dictionariesLength = footer.dictionariesLength(); - for (int i = 0; i < dictionariesLength; i++) { - Block block = footer.dictionaries(tempBlock, i); - dictionaries.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength())); - } - return dictionaries; - } - - private static Map metaData(Footer footer) { - Map metaData = new HashMap<>(); - - int metaDataLength = footer.customMetadataLength(); - for (int i = 0; i < metaDataLength; i++) { - KeyValue kv = footer.customMetadata(i); - metaData.put(kv.key(), kv.value()); - } - - return metaData; - } - - public Schema getSchema() { - return schema; - } - - public List getDictionaries() { - return dictionaries; - } - - public List getRecordBatches() { - return recordBatches; - } - - public Map getMetaData() { - return metaData; - } - - public MetadataVersion getMetadataVersion() { - return metadataVersion; - } - - @Override - public int writeTo(FlatBufferBuilder builder) { - int schemaIndex = schema.getSchema(builder); - Footer.startDictionariesVector(builder, dictionaries.size()); - int dicsOffset = writeAllStructsToVector(builder, dictionaries); - Footer.startRecordBatchesVector(builder, recordBatches.size()); - int rbsOffset = writeAllStructsToVector(builder, recordBatches); - - int metaDataOffset = 0; - if (metaData != null) { - metaDataOffset = writeKeyValues(builder, metaData); - } - - Footer.startFooter(builder); - Footer.addSchema(builder, schemaIndex); - Footer.addDictionaries(builder, dicsOffset); - Footer.addRecordBatches(builder, rbsOffset); - Footer.addCustomMetadata(builder, metaDataOffset); - Footer.addVersion(builder, metadataVersion.toFlatbufID()); - return Footer.endFooter(builder); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((dictionaries == null) ? 0 : dictionaries.hashCode()); - result = prime * result + ((recordBatches == null) ? 0 : recordBatches.hashCode()); - result = prime * result + ((schema == null) ? 0 : schema.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - ArrowFooter other = (ArrowFooter) obj; - if (dictionaries == null) { - if (other.dictionaries != null) { - return false; - } - } else if (!dictionaries.equals(other.dictionaries)) { - return false; - } - if (recordBatches == null) { - if (other.recordBatches != null) { - return false; - } - } else if (!recordBatches.equals(other.recordBatches)) { - return false; - } - if (schema == null) { - if (other.schema != null) { - return false; - } - } else if (!schema.equals(other.schema)) { - return false; - } - return true; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java deleted file mode 100644 index 6f8e893405f15..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -/** Interface for Arrow IPC messages (https://arrow.apache.org/docs/format/IPC.html). */ -public interface ArrowMessage extends FBSerializable, AutoCloseable { - - long computeBodyLength(); - - T accepts(ArrowMessageVisitor visitor); - - /** Returns the flatbuffer enum value indicating the type of the message. */ - byte getMessageType(); - - /** - * Visitor interface for implementations of {@link ArrowMessage}. - * - * @param The type of value to return after visiting. - */ - interface ArrowMessageVisitor { - T visit(ArrowDictionaryBatch message); - - T visit(ArrowRecordBatch message); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java deleted file mode 100644 index bc6bfa8c868f7..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java +++ /dev/null @@ -1,386 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import com.google.flatbuffers.FlatBufferBuilder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.arrow.flatbuf.RecordBatch; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * POJO representation of a RecordBatch IPC message - * (https://arrow.apache.org/docs/format/Columnar.html#recordbatch-message). - */ -public class ArrowRecordBatch implements ArrowMessage { - - private static final Logger LOGGER = LoggerFactory.getLogger(ArrowRecordBatch.class); - - /** Number of records. */ - private final int length; - - /** Nodes correspond to the pre-ordered flattened logical schema. */ - private final List nodes; - - private final List buffers; - - private final ArrowBodyCompression bodyCompression; - - private final List buffersLayout; - - private final List variadicBufferCounts; - - private boolean closed = false; - - public ArrowRecordBatch(int length, List nodes, List buffers) { - this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, null, true); - } - - public ArrowRecordBatch( - int length, - List nodes, - List buffers, - ArrowBodyCompression bodyCompression) { - this(length, nodes, buffers, bodyCompression, null, true); - } - - /** - * Construct a record batch from nodes. - * - * @param length how many rows in this batch - * @param nodes field level info - * @param buffers will be retained until this recordBatch is closed - * @param bodyCompression compression info. - * @param alignBuffers Whether to align buffers to an 8 byte boundary. - */ - public ArrowRecordBatch( - int length, - List nodes, - List buffers, - ArrowBodyCompression bodyCompression, - boolean alignBuffers) { - this(length, nodes, buffers, bodyCompression, null, alignBuffers, /*retainBuffers*/ true); - } - - /** - * Construct a record batch from nodes. - * - * @param length how many rows in this batch - * @param nodes field level info - * @param buffers will be retained until this recordBatch is closed - * @param bodyCompression compression info. - * @param alignBuffers Whether to align buffers to an 8 byte boundary. - * @param retainBuffers Whether to retain() each source buffer in the constructor. If false, the - * caller is responsible for retaining the buffers beforehand. - */ - public ArrowRecordBatch( - int length, - List nodes, - List buffers, - ArrowBodyCompression bodyCompression, - boolean alignBuffers, - boolean retainBuffers) { - this(length, nodes, buffers, bodyCompression, null, alignBuffers, retainBuffers); - } - - /** - * Construct a record batch from nodes. - * - * @param length how many rows in this batch - * @param nodes field level info - * @param buffers will be retained until this recordBatch is closed - * @param bodyCompression compression info. - * @param variadicBufferCounts the number of buffers in each variadic section. - * @param alignBuffers Whether to align buffers to an 8 byte boundary. - */ - public ArrowRecordBatch( - int length, - List nodes, - List buffers, - ArrowBodyCompression bodyCompression, - List variadicBufferCounts, - boolean alignBuffers) { - this( - length, - nodes, - buffers, - bodyCompression, - variadicBufferCounts, - alignBuffers, /*retainBuffers*/ - true); - } - - /** - * Construct a record batch from nodes. - * - * @param length how many rows in this batch - * @param nodes field level info - * @param buffers will be retained until this recordBatch is closed - * @param bodyCompression compression info. - * @param variadicBufferCounts the number of buffers in each variadic section. - * @param alignBuffers Whether to align buffers to an 8 byte boundary. - * @param retainBuffers Whether to retain() each source buffer in the constructor. If false, the - * caller is responsible for retaining the buffers beforehand. - */ - public ArrowRecordBatch( - int length, - List nodes, - List buffers, - ArrowBodyCompression bodyCompression, - List variadicBufferCounts, - boolean alignBuffers, - boolean retainBuffers) { - super(); - this.length = length; - this.nodes = nodes; - this.buffers = buffers; - Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); - this.bodyCompression = bodyCompression; - this.variadicBufferCounts = variadicBufferCounts; - List arrowBuffers = new ArrayList<>(buffers.size()); - long offset = 0; - for (ArrowBuf arrowBuf : buffers) { - if (retainBuffers) { - arrowBuf.getReferenceManager().retain(); - } - long size = arrowBuf.readableBytes(); - arrowBuffers.add(new ArrowBuffer(offset, size)); - if (LOGGER.isTraceEnabled()) { - LOGGER.trace("Buffer in RecordBatch at {}, length: {}", offset, size); - } - offset += size; - if (alignBuffers) { // align on 8 byte boundaries - offset = DataSizeRoundingUtil.roundUpTo8Multiple(offset); - } - } - this.buffersLayout = Collections.unmodifiableList(arrowBuffers); - } - - // clone constructor - // this constructor is different from the public ones in that the reference manager's - // retain method is not called, so the first dummy parameter is used - // to distinguish this from the public constructor. - private ArrowRecordBatch( - boolean dummy, - int length, - List nodes, - List buffers, - ArrowBodyCompression bodyCompression, - List variadicBufferCounts) { - this.length = length; - this.nodes = nodes; - this.buffers = buffers; - Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); - this.bodyCompression = bodyCompression; - this.variadicBufferCounts = variadicBufferCounts; - this.closed = false; - List arrowBuffers = new ArrayList<>(); - long offset = 0; - for (ArrowBuf arrowBuf : buffers) { - long size = arrowBuf.readableBytes(); - arrowBuffers.add(new ArrowBuffer(offset, size)); - offset += size; - } - this.buffersLayout = Collections.unmodifiableList(arrowBuffers); - } - - public byte getMessageType() { - return org.apache.arrow.flatbuf.MessageHeader.RecordBatch; - } - - public int getLength() { - return length; - } - - public ArrowBodyCompression getBodyCompression() { - return bodyCompression; - } - - /** - * Get the nodes in this record batch. - * - * @return the FieldNodes corresponding to the schema - */ - public List getNodes() { - return nodes; - } - - /** - * Get the record batch buffers. - * - * @return the buffers containing the data - */ - public List getBuffers() { - if (closed) { - throw new IllegalStateException("already closed"); - } - return buffers; - } - - /** - * Get the record batch variadic buffer counts. - * - * @return the variadic buffer counts - */ - public List getVariadicBufferCounts() { - return variadicBufferCounts; - } - - /** - * Create a new ArrowRecordBatch which has the same information as this batch but whose buffers - * are owned by that Allocator. - * - *

    This will also close this record batch and make it no longer useful. - * - * @return A cloned ArrowRecordBatch - */ - public ArrowRecordBatch cloneWithTransfer(final BufferAllocator allocator) { - final List newBufs = - buffers.stream() - .map( - buf -> - (buf.getReferenceManager() - .transferOwnership(buf, allocator) - .getTransferredBuffer()) - .writerIndex(buf.writerIndex())) - .collect(Collectors.toList()); - close(); - return new ArrowRecordBatch( - false, length, nodes, newBufs, bodyCompression, variadicBufferCounts); - } - - /** - * Get the serialized layout. - * - * @return the serialized layout if we send the buffers on the wire - */ - public List getBuffersLayout() { - return buffersLayout; - } - - @Override - public int writeTo(FlatBufferBuilder builder) { - RecordBatch.startNodesVector(builder, nodes.size()); - int nodesOffset = FBSerializables.writeAllStructsToVector(builder, nodes); - RecordBatch.startBuffersVector(builder, buffers.size()); - int buffersOffset = FBSerializables.writeAllStructsToVector(builder, buffersLayout); - int compressOffset = 0; - if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { - compressOffset = bodyCompression.writeTo(builder); - } - - // Start the variadicBufferCounts vector. - int variadicBufferCountsOffset = 0; - if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { - variadicBufferCountsOffset = variadicBufferCounts.size(); - int elementSizeInBytes = 8; // Size of long in bytes - builder.startVector(elementSizeInBytes, variadicBufferCountsOffset, elementSizeInBytes); - - // Add each long to the builder. Note that elements should be added in reverse order. - for (int i = variadicBufferCounts.size() - 1; i >= 0; i--) { - long value = variadicBufferCounts.get(i); - builder.addLong(value); - } - - // End the vector. This returns an offset that you can use to refer to the vector. - variadicBufferCountsOffset = builder.endVector(); - } - - RecordBatch.startRecordBatch(builder); - RecordBatch.addLength(builder, length); - RecordBatch.addNodes(builder, nodesOffset); - RecordBatch.addBuffers(builder, buffersOffset); - if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { - RecordBatch.addCompression(builder, compressOffset); - } - - // Add the variadicBufferCounts to the RecordBatch - if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { - RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset); - } - - return RecordBatch.endRecordBatch(builder); - } - - @Override - public T accepts(ArrowMessageVisitor visitor) { - return visitor.visit(this); - } - - /** Releases the buffers. */ - @Override - public void close() { - if (!closed) { - closed = true; - for (ArrowBuf arrowBuf : buffers) { - arrowBuf.getReferenceManager().release(); - } - } - } - - @Override - public String toString() { - int variadicBufCount = 0; - if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { - variadicBufCount = variadicBufferCounts.size(); - } - return "ArrowRecordBatch [length=" - + length - + ", nodes=" - + nodes - + ", #buffers=" - + buffers.size() - + ", #variadicBufferCounts=" - + variadicBufCount - + ", buffersLayout=" - + buffersLayout - + ", closed=" - + closed - + "]"; - } - - /** Computes the size of the serialized body for this recordBatch. */ - @Override - public long computeBodyLength() { - long size = 0; - - List buffers = getBuffers(); - List buffersLayout = getBuffersLayout(); - if (buffers.size() != buffersLayout.size()) { - throw new IllegalStateException( - "the layout does not match: " + buffers.size() + " != " + buffersLayout.size()); - } - - for (int i = 0; i < buffers.size(); i++) { - ArrowBuf buffer = buffers.get(i); - ArrowBuffer layout = buffersLayout.get(i); - size = layout.getOffset() + buffer.readableBytes(); - - // round up size to the next multiple of 8 - size = DataSizeRoundingUtil.roundUpTo8Multiple(size); - } - return size; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java deleted file mode 100644 index 1b3f4f0ede39e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import com.google.flatbuffers.FlatBufferBuilder; - -/** Interface for serializing to FlatBuffers. */ -public interface FBSerializable { - /** Returns the number of bytes taken to serialize the data in builder after writing to it. */ - int writeTo(FlatBufferBuilder builder); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java deleted file mode 100644 index 755efc692d26d..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import com.google.flatbuffers.FlatBufferBuilder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import org.apache.arrow.flatbuf.KeyValue; - -/** Utility methods for {@linkplain org.apache.arrow.vector.ipc.message.FBSerializable}s. */ -public class FBSerializables { - private FBSerializables() {} - - /** - * Writes every element of all to builder and calls {@link FlatBufferBuilder#endVector()} - * afterwards. Returns the number of result of calling endVector. - */ - public static int writeAllStructsToVector( - FlatBufferBuilder builder, List all) { - // struct vectors have to be created in reverse order - List reversed = new ArrayList<>(all); - Collections.reverse(reversed); - for (FBSerializable element : reversed) { - element.writeTo(builder); - } - return builder.endVector(); - } - - /** Writes map data with string type. */ - public static int writeKeyValues(FlatBufferBuilder builder, Map metaData) { - int[] metadataOffsets = new int[metaData.size()]; - Iterator> metadataIterator = metaData.entrySet().iterator(); - for (int i = 0; i < metadataOffsets.length; i++) { - Map.Entry kv = metadataIterator.next(); - int keyOffset = builder.createString(kv.getKey()); - int valueOffset = builder.createString(kv.getValue()); - KeyValue.startKeyValue(builder); - KeyValue.addKey(builder, keyOffset); - KeyValue.addValue(builder, valueOffset); - metadataOffsets[i] = KeyValue.endKeyValue(builder); - } - return org.apache.arrow.flatbuf.Field.createCustomMetadataVector(builder, metadataOffsets); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java deleted file mode 100644 index 5b93faa47f000..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import org.apache.arrow.vector.types.MetadataVersion; - -/** IPC options, now only use for write. */ -public class IpcOption { - - // Write the pre-0.15.0 encapsulated IPC message format - // consisting of a 4-byte prefix instead of 8 byte - public final boolean write_legacy_ipc_format; - - // The metadata version. Defaults to V5. - public final MetadataVersion metadataVersion; - - public IpcOption() { - this(false, MetadataVersion.DEFAULT); - } - - public IpcOption(boolean writeLegacyIpcFormat, MetadataVersion metadataVersion) { - this.write_legacy_ipc_format = writeLegacyIpcFormat; - this.metadataVersion = metadataVersion; - } - - public static final IpcOption DEFAULT = new IpcOption(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java deleted file mode 100644 index 2b8bf57005548..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import java.io.IOException; -import org.apache.arrow.flatbuf.Message; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.ReadChannel; - -/** Reads a sequence of messages using a ReadChannel. */ -public class MessageChannelReader implements AutoCloseable { - protected ReadChannel in; - protected BufferAllocator allocator; - - /** - * Construct a MessageReader to read streaming messages from an existing ReadChannel. - * - * @param in Channel to read messages from - * @param allocator BufferAllocator used to read Message body into an ArrowBuf. - */ - public MessageChannelReader(ReadChannel in, BufferAllocator allocator) { - this.in = in; - this.allocator = allocator; - } - - /** - * Read a message from the ReadChannel and return a MessageResult containing the Message metadata - * and optional message body data. Once the end-of-stream has been reached, a null value will be - * returned. If the message has no body, then MessageResult.getBodyBuffer() returns null. - * - * @return MessageResult or null if reached end-of-stream - * @throws IOException on error - */ - public MessageResult readNext() throws IOException { - - // Read the flatbuf message and check for end-of-stream - MessageMetadataResult result = MessageSerializer.readMessage(in); - if (result == null) { - return null; - } - Message message = result.getMessage(); - ArrowBuf bodyBuffer = null; - - // Read message body data if defined in message - if (result.messageHasBody()) { - long bodyLength = result.getMessageBodyLength(); - bodyBuffer = MessageSerializer.readMessageBody(in, bodyLength, allocator); - } - - return new MessageResult(message, bodyBuffer); - } - - /** - * Get the number of bytes read from the ReadChannel. - * - * @return number of bytes - */ - public long bytesRead() { - return in.bytesRead(); - } - - /** - * Close the ReadChannel. - * - * @throws IOException on error - */ - @Override - public void close() throws IOException { - in.close(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java deleted file mode 100644 index bfde5eba1b78c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import java.nio.ByteBuffer; -import org.apache.arrow.flatbuf.Message; - -/** - * Class to hold resulting Message metadata and buffer containing the serialized Flatbuffer message - * when reading messages from a ReadChannel. This handles Message metadata only and does not include - * the message body data, which should be subsequently read into an ArrowBuf. - */ -public class MessageMetadataResult { - - /** - * Construct a container to hold a deserialized Message metadata, and buffer with the serialized - * Message as read from a ReadChannel. - * - * @param messageLength the length of the serialized Flatbuffer message in bytes - * @param messageBuffer contains the serialized Flatbuffer Message metadata - * @param message the deserialized Flatbuffer Message metadata description - */ - MessageMetadataResult(int messageLength, ByteBuffer messageBuffer, Message message) { - this.messageLength = messageLength; - this.messageBuffer = messageBuffer; - this.message = message; - } - - /** - * Creates a new {@link MessageMetadataResult} by parsing it from the beginning of the buffer. - * - * @param messageLength The length of the serialized flatbuffer message in bytes (might not be - * equal to the buffer size). - */ - public static MessageMetadataResult create(ByteBuffer buffer, int messageLength) { - return new MessageMetadataResult(messageLength, buffer, Message.getRootAsMessage(buffer)); - } - - /** - * Get the length of the message metadata in bytes, not including the body length. - * - * @return number of bytes in the message metadata buffer. - */ - public int getMessageLength() { - return messageLength; - } - - /** - * Get the buffer containing the raw message metadata bytes, not including the message body data. - * - * @return buffer containing the message metadata. - */ - public ByteBuffer getMessageBuffer() { - return messageBuffer; - } - - /** Returns the bytes remaining in the buffer after parsing the message from it. */ - public int bytesAfterMessage() { - return message.getByteBuffer().remaining(); - } - - public byte headerType() { - return message.headerType(); - } - - /** - * Check if the message is followed by a body. This will be true if the message has a body length - * > 0, which indicates that a message body needs to be read from the input source. - * - * @return true if message has a defined body - */ - public boolean messageHasBody() { - return message.bodyLength() > 0; - } - - /** - * Get the length of the message body. - * - * @return number of bytes of the message body - */ - public long getMessageBodyLength() { - return message.bodyLength(); - } - - /** - * Get the realized flatbuf Message metadata description. - * - * @return Message metadata - */ - public Message getMessage() { - return message; - } - - private final int messageLength; - private final ByteBuffer messageBuffer; - private final Message message; -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java deleted file mode 100644 index 20f7508dc0394..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import org.apache.arrow.flatbuf.Message; -import org.apache.arrow.memory.ArrowBuf; - -/** - * Class to hold the Message metadata and body data when reading messages through a - * MessageChannelReader. - */ -public class MessageResult { - - /** - * Construct with a valid Message metadata and optional ArrowBuf containing message body data, if - * any. - * - * @param message Deserialized Flatbuffer Message metadata description - * @param bodyBuffer Optional ArrowBuf containing message body data, null if message has no body - */ - MessageResult(Message message, ArrowBuf bodyBuffer) { - this.message = message; - this.bodyBuffer = bodyBuffer; - } - - /** - * Get the Message metadata. - * - * @return the Flatbuffer Message metadata - */ - public Message getMessage() { - return message; - } - - /** - * Get the message body data. - * - * @return an ArrowBuf containing the message body data or null if the message has no body - */ - public ArrowBuf getBodyBuffer() { - return bodyBuffer; - } - - private final Message message; - private final ArrowBuf bodyBuffer; -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java deleted file mode 100644 index 36f6ea449b03c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java +++ /dev/null @@ -1,757 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import com.google.flatbuffers.FlatBufferBuilder; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.flatbuf.Buffer; -import org.apache.arrow.flatbuf.DictionaryBatch; -import org.apache.arrow.flatbuf.FieldNode; -import org.apache.arrow.flatbuf.Message; -import org.apache.arrow.flatbuf.MessageHeader; -import org.apache.arrow.flatbuf.MetadataVersion; -import org.apache.arrow.flatbuf.RecordBatch; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.compression.NoCompressionCodec; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.WriteChannel; -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * Utility class for serializing Messages. Messages are all serialized a similar way. 1. 4 byte - * little endian message header prefix 2. FB serialized Message: This includes it the body length, - * which is the serialized body and the type of the message. 3. Serialized message. - * - *

    For schema messages, the serialization is simply the FB serialized Schema. - * - *

    For RecordBatch messages the serialization is: 1. 4 byte little endian batch metadata header - * 2. FB serialized RowBatch 3. Padding to align to 8 byte boundary. 4. serialized RowBatch buffers. - */ -public class MessageSerializer { - - // This 0xFFFFFFFF value is the first 4 bytes of a valid IPC message - public static final int IPC_CONTINUATION_TOKEN = -1; - - /** - * Convert an array of 4 bytes in little-endian to an native-endian i32 value. - * - * @param bytes byte array with minimum length of 4 in little-endian - * @return converted an native-endian 32-bit integer - */ - public static int bytesToInt(byte[] bytes) { - return ((bytes[3] & 255) << 24) - + ((bytes[2] & 255) << 16) - + ((bytes[1] & 255) << 8) - + ((bytes[0] & 255)); - } - - /** - * Convert an integer to a little endian 4 byte array. - * - * @param value integer value input - * @param bytes existing byte array with minimum length of 4 to contain the conversion output - */ - public static void intToBytes(int value, byte[] bytes) { - bytes[3] = (byte) (value >>> 24); - bytes[2] = (byte) (value >>> 16); - bytes[1] = (byte) (value >>> 8); - bytes[0] = (byte) (value); - } - - /** - * Convert a long to a little-endian 8 byte array. - * - * @param value long value input - * @param bytes existing byte array with minimum length of 8 to contain the conversion output - */ - public static void longToBytes(long value, byte[] bytes) { - bytes[7] = (byte) (value >>> 56); - bytes[6] = (byte) (value >>> 48); - bytes[5] = (byte) (value >>> 40); - bytes[4] = (byte) (value >>> 32); - bytes[3] = (byte) (value >>> 24); - bytes[2] = (byte) (value >>> 16); - bytes[1] = (byte) (value >>> 8); - bytes[0] = (byte) (value); - } - - public static int writeMessageBuffer( - WriteChannel out, int messageLength, ByteBuffer messageBuffer) throws IOException { - return writeMessageBuffer(out, messageLength, messageBuffer, IpcOption.DEFAULT); - } - - /** - * Write the serialized Message metadata, prefixed by the length, to the output Channel. This - * ensures that it aligns to an 8 byte boundary and will adjust the message length to include any - * padding used for alignment. - * - * @param out Output Channel - * @param messageLength Number of bytes in the message buffer, written as little Endian prefix - * @param messageBuffer Message metadata buffer to be written, this does not include any message - * body data which should be subsequently written to the Channel - * @param option IPC write options - * @return Number of bytes written - * @throws IOException on error - */ - public static int writeMessageBuffer( - WriteChannel out, int messageLength, ByteBuffer messageBuffer, IpcOption option) - throws IOException { - - // if write the pre-0.15.0 encapsulated IPC message format consisting of a 4-byte prefix instead - // of 8 byte - int prefixSize = option.write_legacy_ipc_format ? 4 : 8; - - // ensure that message aligns to 8 byte padding - prefix_size bytes, then message body - if ((messageLength + prefixSize) % 8 != 0) { - messageLength += 8 - (messageLength + prefixSize) % 8; - } - if (!option.write_legacy_ipc_format) { - out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN); - } - out.writeIntLittleEndian(messageLength); - out.write(messageBuffer); - out.align(); - - // any bytes written are already captured by our size modification above - return messageLength + prefixSize; - } - - /** Serialize a schema object. */ - public static long serialize(WriteChannel out, Schema schema) throws IOException { - return serialize(out, schema, IpcOption.DEFAULT); - } - - /** - * Serialize a schema object. - * - * @param out where to write the schema - * @param schema the object to serialize to out - * @return the number of bytes written - * @throws IOException if something went wrong - */ - public static long serialize(WriteChannel out, Schema schema, IpcOption option) - throws IOException { - long start = out.getCurrentPosition(); - Preconditions.checkArgument(start % 8 == 0, "out is not aligned"); - - ByteBuffer serializedMessage = serializeMetadata(schema, option); - - int messageLength = serializedMessage.remaining(); - - int bytesWritten = writeMessageBuffer(out, messageLength, serializedMessage, option); - Preconditions.checkArgument(bytesWritten % 8 == 0, "out is not aligned"); - return bytesWritten; - } - - /** Returns the serialized flatbuffer bytes of the schema wrapped in a message table. */ - @Deprecated - public static ByteBuffer serializeMetadata(Schema schema) { - return serializeMetadata(schema, IpcOption.DEFAULT); - } - - /** Returns the serialized flatbuffer bytes of the schema wrapped in a message table. */ - public static ByteBuffer serializeMetadata(Schema schema, IpcOption writeOption) { - FlatBufferBuilder builder = new FlatBufferBuilder(); - int schemaOffset = schema.getSchema(builder); - return MessageSerializer.serializeMessage( - builder, org.apache.arrow.flatbuf.MessageHeader.Schema, schemaOffset, 0, writeOption); - } - - /** - * Deserializes an Arrow Schema object from a schema message. Format is from serialize(). - * - * @param schemaMessage a Message of type MessageHeader.Schema - * @return the deserialized Arrow Schema - */ - public static Schema deserializeSchema(Message schemaMessage) { - Preconditions.checkArgument( - schemaMessage.headerType() == MessageHeader.Schema, - "Expected schema but result was: %s", - schemaMessage.headerType()); - return Schema.convertSchema( - (org.apache.arrow.flatbuf.Schema) - schemaMessage.header(new org.apache.arrow.flatbuf.Schema())); - } - - /** - * Deserializes an Arrow Schema read from the input channel. Format is from serialize(). - * - * @param in the channel to deserialize from - * @return the deserialized Arrow Schema - * @throws IOException if something went wrong - */ - public static Schema deserializeSchema(ReadChannel in) throws IOException { - MessageMetadataResult result = readMessage(in); - if (result == null) { - throw new IOException("Unexpected end of input when reading Schema"); - } - if (result.getMessage().headerType() != MessageHeader.Schema) { - throw new IOException("Expected schema but header was " + result.getMessage().headerType()); - } - return deserializeSchema(result); - } - - /** - * Deserializes an Arrow Schema object from a {@link MessageMetadataResult}. Format is from - * serialize(). - * - * @param message a Message of type MessageHeader.Schema - * @return the deserialized Arrow Schema - */ - public static Schema deserializeSchema(MessageMetadataResult message) { - return deserializeSchema(message.getMessage()); - } - - /** Serializes an ArrowRecordBatch. Returns the offset and length of the written batch. */ - public static ArrowBlock serialize(WriteChannel out, ArrowRecordBatch batch) throws IOException { - return serialize(out, batch, IpcOption.DEFAULT); - } - - /** - * Serializes an ArrowRecordBatch. Returns the offset and length of the written batch. - * - * @param out where to write the batch - * @param batch the object to serialize to out - * @return the serialized block metadata - * @throws IOException if something went wrong - */ - public static ArrowBlock serialize(WriteChannel out, ArrowRecordBatch batch, IpcOption option) - throws IOException { - - long start = out.getCurrentPosition(); - long bodyLength = batch.computeBodyLength(); - Preconditions.checkArgument(bodyLength % 8 == 0, "batch is not aligned"); - - ByteBuffer serializedMessage = serializeMetadata(batch, option); - - int metadataLength = serializedMessage.remaining(); - - int prefixSize = 4; - if (!option.write_legacy_ipc_format) { - out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN); - prefixSize = 8; - } - - // calculate alignment bytes so that metadata length points to the correct location after - // alignment - int padding = (int) ((start + metadataLength + prefixSize) % 8); - if (padding != 0) { - metadataLength += (8 - padding); - } - - out.writeIntLittleEndian(metadataLength); - out.write(serializedMessage); - - // Align the output to 8 byte boundary. - out.align(); - - long bufferLength = writeBatchBuffers(out, batch); - Preconditions.checkArgument(bufferLength % 8 == 0, "out is not aligned"); - - // Metadata size in the Block account for the size prefix - return new ArrowBlock(start, metadataLength + prefixSize, bufferLength); - } - - /** - * Write the Arrow buffers of the record batch to the output channel. - * - * @param out the output channel to write the buffers to - * @param batch an ArrowRecordBatch containing buffers to be written - * @return the number of bytes written - * @throws IOException on error - */ - public static long writeBatchBuffers(WriteChannel out, ArrowRecordBatch batch) - throws IOException { - long bufferStart = out.getCurrentPosition(); - List buffers = batch.getBuffers(); - List buffersLayout = batch.getBuffersLayout(); - - for (int i = 0; i < buffers.size(); i++) { - ArrowBuf buffer = buffers.get(i); - ArrowBuffer layout = buffersLayout.get(i); - long startPosition = bufferStart + layout.getOffset(); - if (startPosition != out.getCurrentPosition()) { - out.writeZeros(startPosition - out.getCurrentPosition()); - } - out.write(buffer); - if (out.getCurrentPosition() != startPosition + layout.getSize()) { - throw new IllegalStateException( - "wrong buffer size: " - + out.getCurrentPosition() - + " != " - + startPosition - + layout.getSize()); - } - } - - out.align(); - return out.getCurrentPosition() - bufferStart; - } - - /** - * Returns the serialized form of {@link RecordBatch} wrapped in a {@link - * org.apache.arrow.flatbuf.Message}. - */ - @Deprecated - public static ByteBuffer serializeMetadata(ArrowMessage message) { - return serializeMetadata(message, IpcOption.DEFAULT); - } - - /** - * Returns the serialized form of {@link RecordBatch} wrapped in a {@link - * org.apache.arrow.flatbuf.Message}. - */ - public static ByteBuffer serializeMetadata(ArrowMessage message, IpcOption writeOption) { - FlatBufferBuilder builder = new FlatBufferBuilder(); - int batchOffset = message.writeTo(builder); - return serializeMessage( - builder, message.getMessageType(), batchOffset, message.computeBodyLength(), writeOption); - } - - /** - * Deserializes an ArrowRecordBatch from a record batch message and data in an ArrowBuf. - * - * @param recordBatchMessage a Message of type MessageHeader.RecordBatch - * @param bodyBuffer Arrow buffer containing the RecordBatch data - * @return the deserialized ArrowRecordBatch - * @throws IOException if something went wrong - */ - public static ArrowRecordBatch deserializeRecordBatch( - Message recordBatchMessage, ArrowBuf bodyBuffer) throws IOException { - RecordBatch recordBatchFB = (RecordBatch) recordBatchMessage.header(new RecordBatch()); - return deserializeRecordBatch(recordBatchFB, bodyBuffer); - } - - /** - * Deserializes an ArrowRecordBatch read from the input channel. This uses the given allocator to - * create an ArrowBuf for the batch body data. - * - * @param in Channel to read a RecordBatch message and data from - * @param allocator BufferAllocator to allocate an Arrow buffer to read message body data - * @return the deserialized ArrowRecordBatch - * @throws IOException on error - */ - public static ArrowRecordBatch deserializeRecordBatch(ReadChannel in, BufferAllocator allocator) - throws IOException { - MessageMetadataResult result = readMessage(in); - if (result == null) { - throw new IOException("Unexpected end of input when reading a RecordBatch"); - } - if (result.getMessage().headerType() != MessageHeader.RecordBatch) { - throw new IOException( - "Expected RecordBatch but header was " + result.getMessage().headerType()); - } - long bodyLength = result.getMessageBodyLength(); - ArrowBuf bodyBuffer = readMessageBody(in, bodyLength, allocator); - return deserializeRecordBatch(result.getMessage(), bodyBuffer); - } - - /** - * Deserializes an ArrowRecordBatch knowing the size of the entire message up front. This - * minimizes the number of reads to the underlying stream. - * - * @param in the channel to deserialize from - * @param block the object to deserialize to - * @param alloc to allocate buffers - * @return the deserialized ArrowRecordBatch - * @throws IOException if something went wrong - */ - public static ArrowRecordBatch deserializeRecordBatch( - ReadChannel in, ArrowBlock block, BufferAllocator alloc) throws IOException { - // Metadata length contains prefix_size bytes plus byte padding - long totalLen = block.getMetadataLength() + block.getBodyLength(); - - ArrowBuf buffer = alloc.buffer(totalLen); - if (in.readFully(buffer, totalLen) != totalLen) { - throw new IOException("Unexpected end of input trying to read batch."); - } - - int prefixSize = buffer.getInt(0) == IPC_CONTINUATION_TOKEN ? 8 : 4; - - ArrowBuf metadataBuffer = buffer.slice(prefixSize, block.getMetadataLength() - prefixSize); - - Message messageFB = Message.getRootAsMessage(metadataBuffer.nioBuffer().asReadOnlyBuffer()); - - RecordBatch recordBatchFB = (RecordBatch) messageFB.header(new RecordBatch()); - - // Now read the body - final ArrowBuf body = - buffer.slice(block.getMetadataLength(), totalLen - block.getMetadataLength()); - return deserializeRecordBatch(recordBatchFB, body); - } - - /** - * Deserializes an ArrowRecordBatch given the Flatbuffer metadata and in-memory body. - * - * @param recordBatchFB Deserialized FlatBuffer record batch - * @param body Read body of the record batch - * @return ArrowRecordBatch from metadata and in-memory body - * @throws IOException on error - */ - public static ArrowRecordBatch deserializeRecordBatch(RecordBatch recordBatchFB, ArrowBuf body) - throws IOException { - // Now read the body - int nodesLength = recordBatchFB.nodesLength(); - List nodes = new ArrayList<>(); - for (int i = 0; i < nodesLength; ++i) { - FieldNode node = recordBatchFB.nodes(i); - if ((int) node.length() != node.length() || (int) node.nullCount() != node.nullCount()) { - throw new IOException( - "Cannot currently deserialize record batches with " - + "node length larger than INT_MAX records."); - } - nodes.add(new ArrowFieldNode(node.length(), node.nullCount())); - } - List buffers = new ArrayList<>(); - for (int i = 0; i < recordBatchFB.buffersLength(); ++i) { - Buffer bufferFB = recordBatchFB.buffers(i); - ArrowBuf vectorBuffer = body.slice(bufferFB.offset(), bufferFB.length()); - buffers.add(vectorBuffer); - } - - ArrowBodyCompression bodyCompression = - recordBatchFB.compression() == null - ? NoCompressionCodec.DEFAULT_BODY_COMPRESSION - : new ArrowBodyCompression( - recordBatchFB.compression().codec(), recordBatchFB.compression().method()); - - List variadicBufferCounts = new ArrayList<>(); - for (int i = 0; i < recordBatchFB.variadicBufferCountsLength(); i++) { - variadicBufferCounts.add(recordBatchFB.variadicBufferCounts(i)); - } - - if ((int) recordBatchFB.length() != recordBatchFB.length()) { - throw new IOException( - "Cannot currently deserialize record batches with more than INT_MAX records."); - } - ArrowRecordBatch arrowRecordBatch = - new ArrowRecordBatch( - checkedCastToInt(recordBatchFB.length()), - nodes, - buffers, - bodyCompression, - variadicBufferCounts, - /*alignBuffers*/ true); - body.getReferenceManager().release(); - return arrowRecordBatch; - } - - /** - * Reads a record batch based on the metadata in serializedMessage and the underlying data buffer. - */ - public static ArrowRecordBatch deserializeRecordBatch( - MessageMetadataResult serializedMessage, ArrowBuf underlying) throws IOException { - return deserializeRecordBatch(serializedMessage.getMessage(), underlying); - } - - public static ArrowBlock serialize(WriteChannel out, ArrowDictionaryBatch batch) - throws IOException { - return serialize(out, batch, IpcOption.DEFAULT); - } - - /** - * Serializes a dictionary ArrowRecordBatch. Returns the offset and length of the written batch. - * - * @param out where to serialize - * @param batch the batch to serialize - * @param option options for IPC - * @return the metadata of the serialized block - * @throws IOException if something went wrong - */ - public static ArrowBlock serialize(WriteChannel out, ArrowDictionaryBatch batch, IpcOption option) - throws IOException { - long start = out.getCurrentPosition(); - - long bodyLength = batch.computeBodyLength(); - Preconditions.checkArgument(bodyLength % 8 == 0, "batch is not aligned"); - - ByteBuffer serializedMessage = serializeMetadata(batch, option); - - int metadataLength = serializedMessage.remaining(); - - int prefixSize = 4; - if (!option.write_legacy_ipc_format) { - out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN); - prefixSize = 8; - } - - // calculate alignment bytes so that metadata length points to the correct location after - // alignment - int padding = (int) ((start + metadataLength + prefixSize) % 8); - if (padding != 0) { - metadataLength += (8 - padding); - } - - out.writeIntLittleEndian(metadataLength); - out.write(serializedMessage); - - // Align the output to 8 byte boundary. - out.align(); - - // write the embedded record batch - long bufferLength = writeBatchBuffers(out, batch.getDictionary()); - Preconditions.checkArgument(bufferLength % 8 == 0, "out is not aligned"); - - // Metadata size in the Block account for the size prefix - return new ArrowBlock(start, metadataLength + prefixSize, bufferLength); - } - - /** - * Deserializes an ArrowDictionaryBatch from a dictionary batch Message and data in an ArrowBuf. - * - * @param message a message of type MessageHeader.DictionaryBatch - * @param bodyBuffer Arrow buffer containing the DictionaryBatch data of type - * MessageHeader.DictionaryBatch - * @return the deserialized ArrowDictionaryBatch - * @throws IOException if something went wrong - */ - public static ArrowDictionaryBatch deserializeDictionaryBatch( - Message message, ArrowBuf bodyBuffer) throws IOException { - DictionaryBatch dictionaryBatchFB = (DictionaryBatch) message.header(new DictionaryBatch()); - ArrowRecordBatch recordBatch = deserializeRecordBatch(dictionaryBatchFB.data(), bodyBuffer); - return new ArrowDictionaryBatch( - dictionaryBatchFB.id(), recordBatch, dictionaryBatchFB.isDelta()); - } - - /** - * Deserializes an ArrowDictionaryBatch from a dictionary batch Message and data in an ArrowBuf. - * - * @param message a message of type MessageHeader.DictionaryBatch - * @param bodyBuffer Arrow buffer containing the DictionaryBatch data of type - * MessageHeader.DictionaryBatch - * @return the deserialized ArrowDictionaryBatch - * @throws IOException if something went wrong - */ - public static ArrowDictionaryBatch deserializeDictionaryBatch( - MessageMetadataResult message, ArrowBuf bodyBuffer) throws IOException { - return deserializeDictionaryBatch(message.getMessage(), bodyBuffer); - } - - /** - * Deserializes an ArrowDictionaryBatch read from the input channel. This uses the given allocator - * to create an ArrowBuf for the batch body data. - * - * @param in Channel to read a DictionaryBatch message and data from - * @param allocator BufferAllocator to allocate an Arrow buffer to read message body data - * @return the deserialized ArrowDictionaryBatch - * @throws IOException on error - */ - public static ArrowDictionaryBatch deserializeDictionaryBatch( - ReadChannel in, BufferAllocator allocator) throws IOException { - MessageMetadataResult result = readMessage(in); - if (result == null) { - throw new IOException("Unexpected end of input when reading a DictionaryBatch"); - } - if (result.getMessage().headerType() != MessageHeader.DictionaryBatch) { - throw new IOException( - "Expected DictionaryBatch but header was " + result.getMessage().headerType()); - } - long bodyLength = result.getMessageBodyLength(); - ArrowBuf bodyBuffer = readMessageBody(in, bodyLength, allocator); - return deserializeDictionaryBatch(result.getMessage(), bodyBuffer); - } - - /** - * Deserializes a DictionaryBatch knowing the size of the entire message up front. This minimizes - * the number of reads to the underlying stream. - * - * @param in where to read from - * @param block block metadata for deserializing - * @param alloc to allocate new buffers - * @return the deserialized ArrowDictionaryBatch - * @throws IOException if something went wrong - */ - public static ArrowDictionaryBatch deserializeDictionaryBatch( - ReadChannel in, ArrowBlock block, BufferAllocator alloc) throws IOException { - // Metadata length contains integer prefix plus byte padding - long totalLen = block.getMetadataLength() + block.getBodyLength(); - - ArrowBuf buffer = alloc.buffer(totalLen); - if (in.readFully(buffer, totalLen) != totalLen) { - throw new IOException("Unexpected end of input trying to read batch."); - } - - int prefixSize = buffer.getInt(0) == IPC_CONTINUATION_TOKEN ? 8 : 4; - - ArrowBuf metadataBuffer = buffer.slice(prefixSize, block.getMetadataLength() - prefixSize); - - Message messageFB = Message.getRootAsMessage(metadataBuffer.nioBuffer().asReadOnlyBuffer()); - - DictionaryBatch dictionaryBatchFB = (DictionaryBatch) messageFB.header(new DictionaryBatch()); - - // Now read the body - final ArrowBuf body = - buffer.slice(block.getMetadataLength(), totalLen - block.getMetadataLength()); - ArrowRecordBatch recordBatch = deserializeRecordBatch(dictionaryBatchFB.data(), body); - return new ArrowDictionaryBatch( - dictionaryBatchFB.id(), recordBatch, dictionaryBatchFB.isDelta()); - } - - /** - * Deserialize a message that is either an ArrowDictionaryBatch or ArrowRecordBatch. - * - * @param reader MessageChannelReader to read a sequence of messages from a ReadChannel - * @return The deserialized record batch - * @throws IOException if the message is not an ArrowDictionaryBatch or ArrowRecordBatch - */ - public static ArrowMessage deserializeMessageBatch(MessageChannelReader reader) - throws IOException { - MessageResult result = reader.readNext(); - if (result == null) { - return null; - } else if (result.getMessage().bodyLength() > Integer.MAX_VALUE) { - throw new IOException("Cannot currently deserialize record batches over 2GB"); - } - - if (result.getMessage().version() != MetadataVersion.V4 - && result.getMessage().version() != MetadataVersion.V5) { - throw new IOException( - "Received metadata with an incompatible version number: " - + result.getMessage().version()); - } - - switch (result.getMessage().headerType()) { - case MessageHeader.RecordBatch: - return deserializeRecordBatch(result.getMessage(), result.getBodyBuffer()); - case MessageHeader.DictionaryBatch: - return deserializeDictionaryBatch(result.getMessage(), result.getBodyBuffer()); - default: - throw new IOException("Unexpected message header type " + result.getMessage().headerType()); - } - } - - /** - * Deserialize a message that is either an ArrowDictionaryBatch or ArrowRecordBatch. - * - * @param in ReadChannel to read messages from - * @param alloc Allocator for message data - * @return The deserialized record batch - * @throws IOException if the message is not an ArrowDictionaryBatch or ArrowRecordBatch - */ - public static ArrowMessage deserializeMessageBatch(ReadChannel in, BufferAllocator alloc) - throws IOException { - return deserializeMessageBatch(new MessageChannelReader(in, alloc)); - } - - @Deprecated - public static ByteBuffer serializeMessage( - FlatBufferBuilder builder, byte headerType, int headerOffset, long bodyLength) { - return serializeMessage(builder, headerType, headerOffset, bodyLength, IpcOption.DEFAULT); - } - - /** - * Serializes a message header. - * - * @param builder to write the flatbuf to - * @param headerType headerType field - * @param headerOffset header offset field - * @param bodyLength body length field - * @param writeOption IPC write options - * @return the corresponding ByteBuffer - */ - public static ByteBuffer serializeMessage( - FlatBufferBuilder builder, - byte headerType, - int headerOffset, - long bodyLength, - IpcOption writeOption) { - Message.startMessage(builder); - Message.addHeaderType(builder, headerType); - Message.addHeader(builder, headerOffset); - Message.addVersion(builder, writeOption.metadataVersion.toFlatbufID()); - Message.addBodyLength(builder, bodyLength); - builder.finish(Message.endMessage(builder)); - return builder.dataBuffer(); - } - - /** - * Read a Message from the input channel and return a MessageMetadataResult that contains the - * Message metadata, buffer containing the serialized Message metadata as read, and length of the - * Message in bytes. Returns null if the end-of-stream has been reached. - * - * @param in ReadChannel to read messages from - * @return MessageMetadataResult with deserialized Message metadata and message information if a - * valid Message was read, or null if end-of-stream - * @throws IOException on error - */ - public static MessageMetadataResult readMessage(ReadChannel in) throws IOException { - - // Read the message size. There is an i32 little endian prefix. - ByteBuffer buffer = ByteBuffer.allocate(4); - if (in.readFully(buffer) == 4) { - - int messageLength = MessageSerializer.bytesToInt(buffer.array()); - if (messageLength == IPC_CONTINUATION_TOKEN) { - // Avoid breaking change in signature of ByteBuffer.clear() in JDK9+ - ((java.nio.Buffer) buffer).clear(); - // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length - if (in.readFully(buffer) == 4) { - messageLength = MessageSerializer.bytesToInt(buffer.array()); - } - } - - // Length of 0 indicates end of stream - if (messageLength != 0) { - - // Read the message into the buffer. - ByteBuffer messageBuffer = ByteBuffer.allocate(messageLength); - if (in.readFully(messageBuffer) != messageLength) { - throw new IOException("Unexpected end of stream trying to read message."); - } - // see https://github.com/apache/arrow/issues/41717 for reason why we cast to - // java.nio.Buffer - ByteBuffer rewindBuffer = (ByteBuffer) ((java.nio.Buffer) messageBuffer).rewind(); - - // Load the message. - Message message = Message.getRootAsMessage(messageBuffer); - - return new MessageMetadataResult(messageLength, messageBuffer, message); - } - } - return null; - } - - /** - * Read a Message body from the in channel into an ArrowBuf. - * - * @param in ReadChannel to read message body from - * @param bodyLength Length in bytes of the message body to read - * @param allocator Allocate the ArrowBuf to contain message body data - * @return an ArrowBuf containing the message body data - * @throws IOException on error - */ - public static ArrowBuf readMessageBody(ReadChannel in, long bodyLength, BufferAllocator allocator) - throws IOException { - ArrowBuf bodyBuffer = allocator.buffer(bodyLength); - try { - if (in.readFully(bodyBuffer, bodyLength) != bodyLength) { - throw new IOException("Unexpected end of input trying to read batch."); - } - } catch (RuntimeException | IOException e) { - bodyBuffer.close(); - throw e; - } - return bodyBuffer; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/table/BaseTable.java b/java/vector/src/main/java/org/apache/arrow/vector/table/BaseTable.java deleted file mode 100644 index 86f3b5dca918b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/table/BaseTable.java +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.table; - -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Abstract base class for Table. - * - *

    This API is EXPERIMENTAL. - */ -public abstract class BaseTable implements AutoCloseable { - - /** The field vectors holding the data in this table. */ - protected final List fieldVectors; - - /** - * An optional DictionaryProvider. One must be present if any vector in the table is dictionary - * encoded. - */ - protected DictionaryProvider dictionaryProvider; - - /** A map of Fields to FieldVectors used to select Fields. */ - protected final Map fieldVectorsMap = new LinkedHashMap<>(); - - /** The schema for the table. */ - protected Schema schema; - - /** - * The number of rows of data in the table; not necessarily the same as the table row capacity. - */ - protected int rowCount; - - /** - * Constructs new instance with the given rowCount, and containing the schema and each of the - * given vectors. - * - * @param fieldVectors the FieldVectors containing the table's data - * @param rowCount the number of rows in the table - * @param provider a dictionary provider, may be null if none of the vectors in the table are - * encoded - */ - public BaseTable(List fieldVectors, int rowCount, DictionaryProvider provider) { - - this.dictionaryProvider = provider; - this.rowCount = rowCount; - this.fieldVectors = new ArrayList<>(); - List fields = new ArrayList<>(); - for (FieldVector fv : fieldVectors) { - TransferPair transferPair = fv.getTransferPair(fv.getAllocator()); - transferPair.transfer(); - FieldVector newVector = (FieldVector) transferPair.getTo(); - newVector.setValueCount(rowCount); - - Field newField = newVector.getField(); - this.fieldVectors.add(newVector); - fields.add(newField); - fieldVectorsMap.put(newField, newVector); - } - this.schema = new Schema(fields); - } - - BaseTable() { - this.fieldVectors = new ArrayList<>(); - } - - /** - * Returns a FieldReader for the vector with the given name. - * - * @param name The name of a vector in this Table (case-sensitive) - * @return A FieldReader for the named FieldVector - */ - public FieldReader getReader(String name) { - for (Map.Entry entry : fieldVectorsMap.entrySet()) { - if (entry.getKey().getName().equals(name)) { - return entry.getValue().getReader(); - } - } - return null; - } - - /** - * Returns a FieldReader for the given field. - * - * @param field The field to be read - * @return A FieldReader for the given field - */ - public FieldReader getReader(Field field) { - return fieldVectorsMap.get(field).getReader(); - } - - /** - * Returns a FieldReader for the field at the given vector index. - * - * @param index The 0-based index of the field desired. - * @return A FieldReader for the requested field - */ - public FieldReader getReader(int index) { - Preconditions.checkArgument(index >= 0 && index < fieldVectors.size()); - return fieldVectors.get(index).getReader(); - } - - /** Returns the schema for this Table. */ - public Schema getSchema() { - return schema; - } - - /** - * Returns the Field with the given name if one exists in this table. - * - * @param fieldName the name of the field to return - * @return a field with the given name if one is present - * @throws IllegalArgumentException – if the field was not found - */ - public Field getField(String fieldName) { - return getSchema().findField(fieldName); - } - - /** - * Returns a list of Field created by adding the given vector to the vectors in this Table. - * - * @param index field index - * @param vector vector to be added. - * @return out List of FieldVectors with vector added - */ - List insertVector(int index, FieldVector vector) { - Preconditions.checkNotNull(vector); - Preconditions.checkArgument(index >= 0 && index <= fieldVectors.size()); - List newVectors = new ArrayList<>(); - if (index == fieldVectors.size()) { - newVectors.addAll(fieldVectors); - newVectors.add(vector); - } else { - for (int i = 0; i < fieldVectors.size(); i++) { - if (i == index) { - newVectors.add(vector); - } - newVectors.add(fieldVectors.get(i)); - } - } - return newVectors; - } - - /** - * Returns a new List of FieldVectors created by removing the selected Vector from the list in - * this Table. - * - * @param index field index - * @return out List of FieldVectors like the list in this table, but with the argument removed - */ - List extractVector(int index) { - Preconditions.checkArgument(index >= 0 && index < fieldVectors.size()); - List newVectors = new ArrayList<>(); - for (int i = 0; i < fieldVectors.size(); i++) { - if (i != index) { - newVectors.add(fieldVectors.get(i)); - } - } - return newVectors; - } - - /** Returns the number of vectors (columns) in this table. */ - public int getVectorCount() { - return fieldVectors.size(); - } - - /** - * Closes all the vectors holding data for this table and sets the rowcount to 0, preventing - * enumeration. - */ - void clear() { - close(); - rowCount = 0; - } - - /** Closes all the vectors holding data for this table. */ - @Override - public void close() { - try { - AutoCloseables.close(fieldVectors); - } catch (RuntimeException ex) { - throw ex; - } catch (Exception ex) { - // should never happen since FieldVector.close() doesn't throw IOException - throw new RuntimeException(ex); - } - } - - /** Returns the number of rows in this table. */ - public long getRowCount() { - return rowCount; - } - - /** - * Returns a new VectorSchemaRoot with the data and schema from this table. Data is transferred to - * the new VectorSchemaRoot, so this table is cleared and the rowCount is set to 0; - * - * @return a new VectorSchemaRoot - */ - public VectorSchemaRoot toVectorSchemaRoot() { - VectorSchemaRoot vsr = - new VectorSchemaRoot( - fieldVectors.stream() - .map( - v -> { - TransferPair transferPair = v.getTransferPair(v.getAllocator()); - transferPair.transfer(); - return (FieldVector) transferPair.getTo(); - }) - .collect(Collectors.toList())); - clear(); - return vsr; - } - - /** - * Returns the vector with the given name, or throws IllegalArgumentException if the name is not - * found. Names are case-sensitive. - * - * @param columnName The name of the vector - * @return the Vector with the given name, or null - * @throws IllegalArgumentException if the name is not the name of a vector in the table. - */ - FieldVector getVector(String columnName) { - for (Map.Entry entry : fieldVectorsMap.entrySet()) { - if (entry.getKey().getName().equals(columnName)) { - return entry.getValue(); - } - } - throw new IllegalArgumentException( - String.format("No vector named '%s' is present in the table", columnName)); - } - - /** - * Returns the vector at the given position. - * - * @param columnIndex The 0-based position of the vector - */ - FieldVector getVector(int columnIndex) { - return fieldVectors.get(columnIndex); - } - - /** - * Returns a copy of the vector with the given name, or throws IllegalArgumentException if the - * name is not found. Names are case-sensitive. - * - * @param columnName The name of the vector to copy - * @return A copy of the Vector with the given name - * @throws IllegalArgumentException if the name is not the name of a vector in the table. - */ - public FieldVector getVectorCopy(String columnName) { - FieldVector source; - for (Map.Entry entry : fieldVectorsMap.entrySet()) { - if (entry.getKey().getName().equals(columnName)) { - source = entry.getValue(); - FieldVector copy = source.getField().createVector(source.getAllocator()); - copy.allocateNew(); - for (int i = 0; i < source.getValueCount(); i++) { - copy.copyFromSafe(i, i, source); - } - copy.setValueCount(source.getValueCount()); - return copy; - } - } - throw new IllegalStateException( - String.format("No vector named '%s' is present in the table", columnName)); - } - - /** - * Returns a copy of the vector at the given position. - * - * @param columnIndex The 0-based position of the vector to be copied - */ - public FieldVector getVectorCopy(int columnIndex) { - FieldVector source = fieldVectors.get(columnIndex); - FieldVector copy = source.getField().createVector(source.getAllocator()); - copy.allocateNew(); - for (int i = 0; i < source.getValueCount(); i++) { - copy.copyFromSafe(i, i, source); - } - copy.setValueCount(source.getValueCount()); - return copy; - } - - /** - * Returns an immutable Row object holding a reference to this table. The default character - * encoding used by the cursor to decode Strings will be StandardCharsets.UTF_8 as this is the - * only charset supported in Arrow format. - */ - public Row immutableRow() { - return new Row(this); - } - - /** Returns a tab separated value of vectors (based on their java object representation). */ - public String contentToTSVString() { - StringBuilder sb = new StringBuilder(); - List row = new ArrayList<>(schema.getFields().size()); - for (Field field : schema.getFields()) { - row.add(field.getName()); - } - printRow(sb, row); - for (int i = 0; i < rowCount; i++) { - row.clear(); - for (FieldVector v : fieldVectors) { - row.add(v.getObject(i)); - } - printRow(sb, row); - } - return sb.toString(); - } - - /** - * Prints a single row without a header to the given StringBuilder. - * - * @param sb the StringBuilder to write to - * @param row the row to write - */ - private void printRow(StringBuilder sb, List row) { - boolean first = true; - for (Object v : row) { - if (first) { - first = false; - } else { - sb.append("\t"); - } - sb.append(v); - } - sb.append("\n"); - } - - /** - * Returns true if the row at the given index has been deleted and false otherwise. - * - *

    If the index is larger than the number of rows, the method returns true. - * - * @param rowNumber The 0-based index of the possibly deleted row - * @return true if the row at the index was deleted; false otherwise - */ - public boolean isRowDeleted(int rowNumber) { - return false; - } - - /** Returns the DictionaryProvider for this table. It can be used to decode an encoded values */ - public DictionaryProvider getDictionaryProvider() { - return dictionaryProvider; - } - - /** - * Returns a ValueVector containing the decoded version of the vector with the given name. - * - * @param vectorName The name of the vector to decode - * @param dictionaryId The identifier for the dictionary to use when decoding. Must match the id - * returned by the dictionary's getId() method. - * @return A ValueVector - */ - public ValueVector decode(String vectorName, long dictionaryId) { - Dictionary dictionary = getDictionary(dictionaryId); - - FieldVector vector = getVector(vectorName); - if (vector == null) { - throw new IllegalArgumentException( - String.format("No vector with name '%s' is present in table", vectorName)); - } - - DictionaryEncoder decoder = new DictionaryEncoder(dictionary, vector.getAllocator()); - return decoder.decode(vector); - } - - /** - * Returns a ValueVector containing the encoded version of the vector with the given name. - * - * @param vectorName The name of the vector to encode - * @param dictionaryId The identifier for the dictionary to use when encoding. Must match the id - * returned by the dictionary's getId() method. - * @return A ValueVector - */ - public ValueVector encode(String vectorName, long dictionaryId) { - Dictionary dictionary = getDictionary(dictionaryId); - FieldVector vector = getVector(vectorName); - if (vector == null) { - throw new IllegalArgumentException( - String.format("No vector with name '%s' is present in table", vectorName)); - } - DictionaryEncoder decoder = new DictionaryEncoder(dictionary, vector.getAllocator()); - return decoder.encode(vector); - } - - /** - * Returns the dictionary with given id. - * - * @param dictionaryId A long integer that is the id returned by the dictionary's getId() method - */ - private Dictionary getDictionary(long dictionaryId) { - if (dictionaryProvider == null) { - throw new IllegalStateException("No dictionary provider is present in table."); - } - - Dictionary dictionary = dictionaryProvider.lookup(dictionaryId); - if (dictionary == null) { - throw new IllegalArgumentException("No dictionary with id '%n' exists in the table"); - } - return dictionary; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/table/Row.java b/java/vector/src/main/java/org/apache/arrow/vector/table/Row.java deleted file mode 100644 index b89159b5ee754..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/table/Row.java +++ /dev/null @@ -1,1944 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.table; - -import java.math.BigDecimal; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.time.Duration; -import java.time.LocalDateTime; -import java.time.Period; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.PeriodDuration; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableBitHolder; -import org.apache.arrow.vector.holders.NullableDateDayHolder; -import org.apache.arrow.vector.holders.NullableDateMilliHolder; -import org.apache.arrow.vector.holders.NullableDecimalHolder; -import org.apache.arrow.vector.holders.NullableDurationHolder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableIntervalDayHolder; -import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder; -import org.apache.arrow.vector.holders.NullableIntervalYearHolder; -import org.apache.arrow.vector.holders.NullableSmallIntHolder; -import org.apache.arrow.vector.holders.NullableTimeMicroHolder; -import org.apache.arrow.vector.holders.NullableTimeMilliHolder; -import org.apache.arrow.vector.holders.NullableTimeNanoHolder; -import org.apache.arrow.vector.holders.NullableTimeSecHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMicroTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder; -import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampSecHolder; -import org.apache.arrow.vector.holders.NullableTimeStampSecTZHolder; -import org.apache.arrow.vector.holders.NullableTinyIntHolder; -import org.apache.arrow.vector.holders.NullableUInt1Holder; -import org.apache.arrow.vector.holders.NullableUInt2Holder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.holders.NullableUInt8Holder; - -/** - * Row is a positionable, immutable cursor backed by a {@link Table}. - * - *

    Getters are provided for most vector types. The exceptions being {@link - * org.apache.arrow.vector.NullVector}, which only contains null values and has no getter, and - * {@link org.apache.arrow.vector.ZeroVector}, which is a zero-length vector of any type - * - *

    This API is EXPERIMENTAL. - */ -public class Row implements Iterator { - - /** - * Returns the standard character set to use for decoding strings. The Arrow format only supports - * UTF-8. - */ - private static final Charset DEFAULT_CHARACTER_SET = StandardCharsets.UTF_8; - - /** The table we're enumerating. */ - protected final BaseTable table; - /** the current row number. */ - protected int rowNumber = -1; - /** Indicates whether the next non-deleted row has been determined yet. */ - private boolean nextRowSet; - - /** - * An iterator that returns every row in the table, deleted or not. The implemented next() and - * hasNext() methods in Row wrap it with a filter to get only the non-deleted ones. - */ - private final Iterator iterator = intIterator(); - - /** - * Constructs a new Row backed by the given table. - * - * @param table the table that this Row object represents - */ - public Row(BaseTable table) { - this.table = table; - } - - /** Resets the current row to -1 and returns this object. */ - public Row resetPosition() { - rowNumber = -1; - return this; - } - - /** - * Moves this Row to the given 0-based row index. - * - * @return this Row for chaining - */ - public Row setPosition(int rowNumber) { - this.rowNumber = rowNumber; - this.nextRowSet = false; - return this; - } - - /** - * For vectors other than Union and DenseUnion, returns true if the value at columnName is null, - * and false otherwise. - * - *

    UnionVector#isNull always returns false, but the underlying vector may hold null values. - */ - public boolean isNull(String columnName) { - ValueVector vector = table.getVector(columnName); - return vector.isNull(rowNumber); - } - - /** - * For vectors other than Union and DenseUnion, returns true if the value at columnIndex is null, - * and false otherwise. - * - *

    UnionVector#isNull always returns false, but the underlying vector may hold null values. - */ - public boolean isNull(int columnIndex) { - ValueVector vector = table.getVector(columnIndex); - return vector.isNull(rowNumber); - } - - /** - * Returns an object representing the value in the ExtensionTypeVector at the currentRow and - * vectorIndex. An IllegalArgumentException is thrown if the column is not present in the Row and - * a ClassCastException is thrown if the type is incorrect. - */ - public Object getExtensionType(int vectorIndex) { - FieldVector vector = table.getVector(vectorIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an object representing the value in the named ExtensionTypeVector at the currentRow. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type. - * - * @param columnName The name of the vector providing the result - * @return The object in the named column at the current row - */ - public Object getExtensionType(String columnName) { - FieldVector vector = table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a Map from the column of the given vectorIndex at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type. - */ - public List getMap(int vectorIndex) { - ListVector vector = (ListVector) table.getVector(vectorIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns a Map from the column of the given name at the current row. An IllegalArgumentException - * is thrown if the column is not present in the Row and a ClassCastException is thrown if it has - * a different type - */ - public List getMap(String columnName) { - ListVector vector = (ListVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns an Object from the column at vectorIndex at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public Object getStruct(int vectorIndex) { - StructVector vector = (StructVector) table.getVector(vectorIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an Object from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public Object getStruct(String columnName) { - StructVector vector = (StructVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns an Object from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public Object getUnion(int vectorIndex) { - UnionVector vector = (UnionVector) table.getVector(vectorIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an Object from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public Object getUnion(String columnName) { - UnionVector vector = (UnionVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns an Object from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public Object getDenseUnion(String columnName) { - DenseUnionVector vector = (DenseUnionVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns an Object from the column with the given vectorIndex at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public Object getDenseUnion(int vectorIndex) { - DenseUnionVector vector = (DenseUnionVector) table.getVector(vectorIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns a List from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public List getList(String columnName) { - ListVector vector = (ListVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a List from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present and a ClassCastException is - * thrown if it has a different type - */ - public List getList(int columnIndex) { - ListVector vector = (ListVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an int from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public int getInt(String columnName) { - IntVector vector = (IntVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an int from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present and a ClassCastException is - * thrown if it has a different type - */ - public int getInt(int columnIndex) { - IntVector vector = (IntVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public void getInt(String columnName, NullableIntHolder holder) { - IntVector vector = (IntVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present and a ClassCastException is - * thrown if it has a different type - */ - public void getInt(int columnIndex, NullableIntHolder holder) { - IntVector vector = (IntVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns an int from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public int getUInt4(String columnName) { - UInt4Vector vector = (UInt4Vector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an int from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present and a ClassCastException is - * thrown if it has a different type - */ - public int getUInt4(int columnIndex) { - UInt4Vector vector = (UInt4Vector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value at the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public void getUInt4(String columnName, NullableUInt4Holder holder) { - UInt4Vector vector = (UInt4Vector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value at the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present and a ClassCastException is - * thrown if it has a different type - */ - public void getUInt4(int columnIndex, NullableUInt4Holder holder) { - UInt4Vector vector = (UInt4Vector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a short from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public short getSmallInt(String columnName) { - SmallIntVector vector = (SmallIntVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a short from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public short getSmallInt(int columnIndex) { - SmallIntVector vector = (SmallIntVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getSmallInt(String columnName, NullableSmallIntHolder holder) { - SmallIntVector vector = (SmallIntVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getSmallInt(int columnIndex, NullableSmallIntHolder holder) { - SmallIntVector vector = (SmallIntVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a char from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public char getUInt2(String columnName) { - UInt2Vector vector = (UInt2Vector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a char from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public char getUInt2(int columnIndex) { - UInt2Vector vector = (UInt2Vector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getUInt2(String columnName, NullableUInt2Holder holder) { - UInt2Vector vector = (UInt2Vector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getUInt2(int columnIndex, NullableUInt2Holder holder) { - UInt2Vector vector = (UInt2Vector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a byte from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte getTinyInt(String columnName) { - TinyIntVector vector = (TinyIntVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a byte from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte getTinyInt(int columnIndex) { - TinyIntVector vector = (TinyIntVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTinyInt(String columnName, NullableTinyIntHolder holder) { - TinyIntVector vector = (TinyIntVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column at the given index and current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTinyInt(int columnIndex, NullableTinyIntHolder holder) { - TinyIntVector vector = (TinyIntVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a byte from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte getUInt1(String columnName) { - UInt1Vector vector = (UInt1Vector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a byte from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte getUInt1(int columnIndex) { - UInt1Vector vector = (UInt1Vector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getUInt1(String columnName, NullableUInt1Holder holder) { - UInt1Vector vector = (UInt1Vector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getUInt1(int columnIndex, NullableUInt1Holder holder) { - UInt1Vector vector = (UInt1Vector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getBigInt(String columnName) { - BigIntVector vector = (BigIntVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getBigInt(int columnIndex) { - BigIntVector vector = (BigIntVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getBigInt(String columnName, NullableBigIntHolder holder) { - BigIntVector vector = (BigIntVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getBigInt(int columnIndex, NullableBigIntHolder holder) { - BigIntVector vector = (BigIntVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getUInt8(String columnName) { - UInt8Vector vector = (UInt8Vector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getUInt8(int columnIndex) { - UInt8Vector vector = (UInt8Vector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getUInt8(String columnName, NullableUInt8Holder holder) { - UInt8Vector vector = (UInt8Vector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getUInt8(int columnIndex, NullableUInt8Holder holder) { - UInt8Vector vector = (UInt8Vector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a float from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public float getFloat4(String columnName) { - Float4Vector vector = (Float4Vector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a float from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public float getFloat4(int columnIndex) { - Float4Vector vector = (Float4Vector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getFloat4(String columnName, NullableFloat4Holder holder) { - Float4Vector vector = (Float4Vector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getFloat4(int columnIndex, NullableFloat4Holder holder) { - Float4Vector vector = (Float4Vector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a double from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public double getFloat8(String columnName) { - Float8Vector vector = (Float8Vector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a double from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public double getFloat8(int columnIndex) { - Float8Vector vector = (Float8Vector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getFloat8(String columnName, NullableFloat8Holder holder) { - Float8Vector vector = (Float8Vector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getFloat8(int columnIndex, NullableFloat8Holder holder) { - Float8Vector vector = (Float8Vector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns an int from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public int getBit(String columnName) { - BitVector vector = (BitVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an int from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public int getBit(int columnIndex) { - BitVector vector = (BitVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getBit(String columnName, NullableBitHolder holder) { - BitVector vector = (BitVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getBit(int columnIndex, NullableBitHolder holder) { - BitVector vector = (BitVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public long getDateMilli(String columnName) { - DateMilliVector vector = (DateMilliVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public long getDateMilli(int columnIndex) { - DateMilliVector vector = (DateMilliVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getDateMilli(String columnName, NullableDateMilliHolder holder) { - DateMilliVector vector = (DateMilliVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getDateMilli(int columnIndex, NullableDateMilliHolder holder) { - DateMilliVector vector = (DateMilliVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns an int from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public int getDateDay(String columnName) { - DateDayVector vector = (DateDayVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an int from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public int getDateDay(int columnIndex) { - DateDayVector vector = (DateDayVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getDateDay(String columnName, NullableDateDayHolder holder) { - DateDayVector vector = (DateDayVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getDateDay(int columnIndex, NullableDateDayHolder holder) { - DateDayVector vector = (DateDayVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeNano(String columnName) { - TimeNanoVector vector = (TimeNanoVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeNano(int columnIndex) { - TimeNanoVector vector = (TimeNanoVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value in the column with the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeNano(String columnName, NullableTimeNanoHolder holder) { - TimeNanoVector vector = (TimeNanoVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value in the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public void getTimeNano(int columnIndex, NullableTimeNanoHolder holder) { - TimeNanoVector vector = (TimeNanoVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public long getTimeMicro(String columnName) { - TimeMicroVector vector = (TimeMicroVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public long getTimeMicro(int columnIndex) { - TimeMicroVector vector = (TimeMicroVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public void getTimeMicro(String columnName, NullableTimeMicroHolder holder) { - TimeMicroVector vector = (TimeMicroVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public void getTimeMicro(int columnIndex, NullableTimeMicroHolder holder) { - TimeMicroVector vector = (TimeMicroVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns an int from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public int getTimeMilli(String columnName) { - TimeMilliVector vector = (TimeMilliVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an int from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public int getTimeMilli(int columnIndex) { - TimeMilliVector vector = (TimeMilliVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public void getTimeMilli(String columnName, NullableTimeMilliHolder holder) { - TimeMilliVector vector = (TimeMilliVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public void getTimeMilli(int columnIndex, NullableTimeMilliHolder holder) { - TimeMilliVector vector = (TimeMilliVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a LocalDateTime from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public LocalDateTime getTimeMilliObj(String columnName) { - TimeMilliVector vector = (TimeMilliVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a LocalDateTime from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public LocalDateTime getTimeMilliObj(int columnIndex) { - TimeMilliVector vector = (TimeMilliVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an int from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public int getTimeSec(String columnName) { - TimeSecVector vector = (TimeSecVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an int from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public int getTimeSec(int columnIndex) { - TimeSecVector vector = (TimeSecVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public void getTimeSec(String columnName, NullableTimeSecHolder holder) { - TimeSecVector vector = (TimeSecVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public void getTimeSec(int columnIndex, NullableTimeSecHolder holder) { - TimeSecVector vector = (TimeSecVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type. - */ - public long getTimeStampSec(String columnName) { - TimeStampSecVector vector = (TimeStampSecVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampSec(int columnIndex) { - TimeStampSecVector vector = (TimeStampSecVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampSec(String columnName, NullableTimeStampSecHolder holder) { - TimeStampSecVector vector = (TimeStampSecVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampSec(int columnIndex, NullableTimeStampSecHolder holder) { - TimeStampSecVector vector = (TimeStampSecVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a LocalDateTime from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public LocalDateTime getTimeStampSecObj(String columnName) { - TimeStampSecVector vector = (TimeStampSecVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a LocalDateTime from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public LocalDateTime getTimeStampSecObj(int columnIndex) { - TimeStampSecVector vector = (TimeStampSecVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampSecTZ(String columnName) { - TimeStampSecTZVector vector = (TimeStampSecTZVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampSecTZ(int columnIndex) { - TimeStampSecTZVector vector = (TimeStampSecTZVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampSecTZ(String columnName, NullableTimeStampSecTZHolder holder) { - TimeStampSecTZVector vector = (TimeStampSecTZVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampSecTZ(int columnIndex, NullableTimeStampSecTZHolder holder) { - TimeStampSecTZVector vector = (TimeStampSecTZVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampNano(String columnName) { - TimeStampNanoVector vector = (TimeStampNanoVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampNano(int columnIndex) { - TimeStampNanoVector vector = (TimeStampNanoVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampNano(String columnName, NullableTimeStampNanoHolder holder) { - TimeStampNanoVector vector = (TimeStampNanoVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampNano(int columnIndex, NullableTimeStampNanoHolder holder) { - TimeStampNanoVector vector = (TimeStampNanoVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a LocalDateTime from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public LocalDateTime getTimeStampNanoObj(String columnName) { - TimeStampNanoVector vector = (TimeStampNanoVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a LocalDateTime from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public LocalDateTime getTimeStampNanoObj(int columnIndex) { - TimeStampNanoVector vector = (TimeStampNanoVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampNanoTZ(String columnName) { - TimeStampNanoTZVector vector = (TimeStampNanoTZVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampNanoTZ(int columnIndex) { - TimeStampNanoTZVector vector = (TimeStampNanoTZVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampNanoTZ(String columnName, NullableTimeStampNanoTZHolder holder) { - TimeStampNanoTZVector vector = (TimeStampNanoTZVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampNanoTZ(int columnIndex, NullableTimeStampNanoTZHolder holder) { - TimeStampNanoTZVector vector = (TimeStampNanoTZVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampMilli(String columnName) { - TimeStampMilliVector vector = (TimeStampMilliVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampMilli(int columnIndex) { - TimeStampMilliVector vector = (TimeStampMilliVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampMilli(String columnName, NullableTimeStampMilliHolder holder) { - TimeStampMilliVector vector = (TimeStampMilliVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampMilli(int columnIndex, NullableTimeStampMilliHolder holder) { - TimeStampMilliVector vector = (TimeStampMilliVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a LocalDateTime from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public LocalDateTime getTimeStampMilliObj(String columnName) { - TimeStampMilliVector vector = (TimeStampMilliVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a LocalDateTime from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public LocalDateTime getTimeStampMilliObj(int columnIndex) { - TimeStampMilliVector vector = (TimeStampMilliVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampMilliTZ(String columnName) { - TimeStampMilliTZVector vector = (TimeStampMilliTZVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampMilliTZ(int columnIndex) { - TimeStampMilliTZVector vector = (TimeStampMilliTZVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different types - */ - public void getTimeStampMilliTZ(String columnName, NullableTimeStampMilliTZHolder holder) { - TimeStampMilliTZVector vector = (TimeStampMilliTZVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampMilliTZ(int columnIndex, NullableTimeStampMilliTZHolder holder) { - TimeStampMilliTZVector vector = (TimeStampMilliTZVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampMicro(String columnName) { - TimeStampMicroVector vector = (TimeStampMicroVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampMicro(int columnIndex) { - TimeStampMicroVector vector = (TimeStampMicroVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampMicro(String columnName, NullableTimeStampMicroHolder holder) { - TimeStampMicroVector vector = (TimeStampMicroVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampMicro(int columnIndex, NullableTimeStampMicroHolder holder) { - TimeStampMicroVector vector = (TimeStampMicroVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a LocalDateTime from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public LocalDateTime getTimeStampMicroObj(String columnName) { - TimeStampMicroVector vector = (TimeStampMicroVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a LocalDateTime from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public LocalDateTime getTimeStampMicroObj(int columnIndex) { - TimeStampMicroVector vector = (TimeStampMicroVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns a long from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampMicroTZ(String columnName) { - TimeStampMicroTZVector vector = (TimeStampMicroTZVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a long from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public long getTimeStampMicroTZ(int columnIndex) { - TimeStampMicroTZVector vector = (TimeStampMicroTZVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampMicroTZ(String columnName, NullableTimeStampMicroTZHolder holder) { - TimeStampMicroTZVector vector = (TimeStampMicroTZVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getTimeStampMicroTZ(int columnIndex, NullableTimeStampMicroTZHolder holder) { - TimeStampMicroTZVector vector = (TimeStampMicroTZVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a Duration from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public Duration getDurationObj(String columnName) { - DurationVector vector = (DurationVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a Duration from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public Duration getDurationObj(int columnIndex) { - DurationVector vector = (DurationVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an ArrowBuf from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public ArrowBuf getDuration(String columnName) { - DurationVector vector = (DurationVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an ArrowBuf from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public ArrowBuf getDuration(int columnIndex) { - DurationVector vector = (DurationVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getDuration(String columnName, NullableDurationHolder holder) { - DurationVector vector = (DurationVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getDuration(int columnIndex, NullableDurationHolder holder) { - DurationVector vector = (DurationVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a PeriodDuration from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public PeriodDuration getIntervalMonthDayNanoObj(String columnName) { - IntervalMonthDayNanoVector vector = (IntervalMonthDayNanoVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a PeriodDuration from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public PeriodDuration getIntervalMonthDayNanoObj(int columnIndex) { - IntervalMonthDayNanoVector vector = (IntervalMonthDayNanoVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an ArrowBuf from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public ArrowBuf getIntervalMonthDayNano(String columnName) { - IntervalMonthDayNanoVector vector = (IntervalMonthDayNanoVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an ArrowBuf from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public ArrowBuf getIntervalMonthDayNano(int columnIndex) { - IntervalMonthDayNanoVector vector = (IntervalMonthDayNanoVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getIntervalMonthDayNano( - String columnName, NullableIntervalMonthDayNanoHolder holder) { - IntervalMonthDayNanoVector vector = (IntervalMonthDayNanoVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getIntervalMonthDayNano(int columnIndex, NullableIntervalMonthDayNanoHolder holder) { - IntervalMonthDayNanoVector vector = (IntervalMonthDayNanoVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns an ArrowBuf from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public ArrowBuf getIntervalDay(String columnName) { - IntervalDayVector vector = (IntervalDayVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an ArrowBuf from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public ArrowBuf getIntervalDay(int columnIndex) { - IntervalDayVector vector = (IntervalDayVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getIntervalDay(String columnName, NullableIntervalDayHolder holder) { - IntervalDayVector vector = (IntervalDayVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getIntervalDay(int columnIndex, NullableIntervalDayHolder holder) { - IntervalDayVector vector = (IntervalDayVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Returns a Duration from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public Duration getIntervalDayObj(int columnIndex) { - IntervalDayVector vector = (IntervalDayVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns a Duration from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public Duration getIntervalDayObj(String columnName) { - IntervalDayVector vector = (IntervalDayVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a Period from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - * - * @return a Period of n MONTHS, not YEARS - */ - public Period getIntervalYearObj(String columnName) { - IntervalYearVector vector = (IntervalYearVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a Period from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - * - * @return a Period of n MONTHS, not YEARS - */ - public Period getIntervalYearObj(int columnIndex) { - IntervalYearVector vector = (IntervalYearVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an int from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - * - * @return the number of MONTHS in the interval (not YEARS) - */ - public int getIntervalYear(String columnName) { - IntervalYearVector vector = (IntervalYearVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an int from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - * - * @return the number of MONTHS in the interval (not YEARS) - */ - public int getIntervalYear(int columnIndex) { - IntervalYearVector vector = (IntervalYearVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Updates the holder with the value from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - * - * @param holder a holder to store the interval. Note that the value of the holder represents - * MONTHS not years - */ - public void getIntervalYear(String columnName, NullableIntervalYearHolder holder) { - IntervalYearVector vector = (IntervalYearVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Updates the holder with the value from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - * - * @param holder a holder to store the interval. Note that the value of the holder represents - * MONTHS not years - */ - public void getIntervalYear(int columnIndex, NullableIntervalYearHolder holder) { - IntervalYearVector vector = (IntervalYearVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Updates the value of the holder with data from vector at the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getDecimal(int columnIndex, NullableDecimalHolder holder) { - DecimalVector vector = (DecimalVector) table.getVector(columnIndex); - vector.get(rowNumber, holder); - } - - /** - * Updates the value of the holder with data from the vector with given name at the current row. - * An IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public void getDecimal(String columnName, NullableDecimalHolder holder) { - DecimalVector vector = (DecimalVector) table.getVector(columnName); - vector.get(rowNumber, holder); - } - - /** - * Returns a BigDecimal from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public BigDecimal getDecimalObj(String columnName) { - DecimalVector vector = (DecimalVector) table.getVector(columnName); - return vector.getObject(rowNumber); - } - - /** - * Returns a BigDecimal from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public BigDecimal getDecimalObj(int columnIndex) { - DecimalVector vector = (DecimalVector) table.getVector(columnIndex); - return vector.getObject(rowNumber); - } - - /** - * Returns an ArrowBuf from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public ArrowBuf getDecimal(String columnName) { - DecimalVector vector = (DecimalVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns an ArrowBuf from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public ArrowBuf getDecimal(int columnIndex) { - DecimalVector vector = (DecimalVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Returns a byte[] from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte[] getVarBinary(String columnName) { - VarBinaryVector vector = (VarBinaryVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a byte[] from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte[] getVarBinary(int columnIndex) { - VarBinaryVector vector = (VarBinaryVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Returns a byte[] from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte[] getFixedSizeBinary(String columnName) { - FixedSizeBinaryVector vector = (FixedSizeBinaryVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a byte[] from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte[] getFixedSizeBinary(int columnIndex) { - FixedSizeBinaryVector vector = (FixedSizeBinaryVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Returns a byte[] from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte[] getLargeVarBinary(String columnName) { - LargeVarBinaryVector vector = (LargeVarBinaryVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a byte[] from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present, and a ClassCastException is - * thrown if it is present but has a different type - */ - public byte[] getLargeVarBinary(int columnIndex) { - LargeVarBinaryVector vector = (LargeVarBinaryVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Returns a String from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - * - *

    StandardCharsets.UTF_8 is used as the charset - */ - public String getVarCharObj(String columnName) { - VarCharVector vector = (VarCharVector) table.getVector(columnName); - return new String(vector.get(rowNumber), getDefaultCharacterSet()); - } - - /** - * Returns a String from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - * - * @param columnIndex the index of the FieldVector holding the value - */ - public String getVarCharObj(int columnIndex) { - VarCharVector vector = (VarCharVector) table.getVector(columnIndex); - return new String(vector.get(rowNumber), getDefaultCharacterSet()); - } - - /** - * Returns a byte[] from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - * - *

    StandardCharsets.UTF_8 is used as the charset - */ - public byte[] getVarChar(String columnName) { - VarCharVector vector = (VarCharVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a byte[] from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - * - * @param columnIndex the index of the FieldVector holding the value - */ - public byte[] getVarChar(int columnIndex) { - VarCharVector vector = (VarCharVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Returns a String from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - * - *

    StandardCharsets.UTF_8 is used as the charset, unless this cursor was created with a default - * Charset - */ - public String getLargeVarCharObj(String columnName) { - LargeVarCharVector vector = (LargeVarCharVector) table.getVector(columnName); - return new String(vector.get(rowNumber), getDefaultCharacterSet()); - } - - /** - * Returns a String from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public String getLargeVarCharObj(int columnIndex) { - LargeVarCharVector vector = (LargeVarCharVector) table.getVector(columnIndex); - return new String(vector.get(rowNumber), getDefaultCharacterSet()); - } - - /** - * Returns a byte[] from the column of the given name at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - * - *

    StandardCharsets.UTF_8 is used as the charset, unless this cursor was created with a default - * Charset - */ - public byte[] getLargeVarChar(String columnName) { - LargeVarCharVector vector = (LargeVarCharVector) table.getVector(columnName); - return vector.get(rowNumber); - } - - /** - * Returns a byte[] from the column with the given index at the current row. An - * IllegalArgumentException is thrown if the column is not present in the Row and a - * ClassCastException is thrown if it has a different type - */ - public byte[] getLargeVarChar(int columnIndex) { - LargeVarCharVector vector = (LargeVarCharVector) table.getVector(columnIndex); - return vector.get(rowNumber); - } - - /** - * Returns true if there is at least one more non-deleted row in the table that has yet to be - * processed. - */ - @Override - public boolean hasNext() { - return nextRowSet || setNextObject(); - } - - /** - * Returns the next non-deleted row in the table. - * - * @throws NoSuchElementException if there are no more rows - */ - @Override - public Row next() { - if (!nextRowSet && !setNextObject()) { - throw new NoSuchElementException(); - } - nextRowSet = false; - return this; - } - - /** - * Set rowNumber to the next non-deleted row. If there are no more rows return false. Otherwise, - * return true. - */ - private boolean setNextObject() { - while (iterator.hasNext()) { - final int row = iterator.next(); - if (!rowIsDeleted(row)) { - rowNumber = row; - nextRowSet = true; - return true; - } - } - return false; - } - - /** - * Returns new internal iterator that processes every row, deleted or not. Use the wrapping next() - * and hasNext() methods rather than using this iterator directly, unless you want to see any - * deleted rows. - */ - private Iterator intIterator() { - return new Iterator() { - - @Override - public boolean hasNext() { - return rowNumber < table.getRowCount() - 1; - } - - @Override - public Integer next() { - rowNumber++; - return rowNumber; - } - }; - } - - public int getRowNumber() { - return rowNumber; - } - - private boolean rowIsDeleted(int rowNumber) { - return table.isRowDeleted(rowNumber); - } - - /** Returns the default character set for use with character vectors. */ - public Charset getDefaultCharacterSet() { - return DEFAULT_CHARACTER_SET; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/table/Table.java b/java/vector/src/main/java/org/apache/arrow/vector/table/Table.java deleted file mode 100644 index fa269a70995fc..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/table/Table.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.table; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.util.TransferPair; - -/** - * Table is an immutable tabular data structure. - * - *

    See {@link VectorSchemaRoot} for batch processing use cases - * - *

    This API is EXPERIMENTAL. - */ -public class Table extends BaseTable implements Iterable { - - /** Constructs new instance containing each of the given vectors. */ - public Table(Iterable vectors) { - this(StreamSupport.stream(vectors.spliterator(), false).collect(Collectors.toList())); - } - - /** Constructs a new instance from vectors. */ - public static Table of(FieldVector... vectors) { - return new Table(Arrays.stream(vectors).collect(Collectors.toList())); - } - - /** - * Constructs a new instance with the number of rows set to the value count of the first - * FieldVector. - * - *

    All vectors must have the same value count. Although this is not checked, inconsistent - * counts may lead to exceptions or other undefined behavior later. - * - * @param fieldVectors The data vectors (must be equal in size to fields. - */ - public Table(List fieldVectors) { - this(fieldVectors, fieldVectors.size() == 0 ? 0 : fieldVectors.get(0).getValueCount()); - } - - /** - * Constructs a new instance. - * - * @param fieldVectors The data vectors. - * @param rowCount The number of rows - */ - public Table(List fieldVectors, int rowCount) { - super(fieldVectors, rowCount, null); - } - - /** - * Constructs a new instance. - * - * @param fieldVectors The data vectors. - * @param rowCount The number of rows - * @param provider A dictionary provider. May be null if none of the vectors is dictionary encoded - */ - public Table(List fieldVectors, int rowCount, DictionaryProvider provider) { - super(fieldVectors, rowCount, provider); - } - - /** - * Constructs a new instance containing the data from the argument. Vectors are shared between the - * Table and VectorSchemaRoot. Direct modification of those vectors is unsafe and should be - * avoided. - * - * @param vsr The VectorSchemaRoot providing data for this Table - */ - public Table(VectorSchemaRoot vsr) { - this(vsr.getFieldVectors(), vsr.getRowCount()); - vsr.clear(); - } - - /** Returns a deep copy of this table. */ - public Table copy() { - List vectorCopies = new ArrayList<>(); - for (int i = 0; i < getVectorCount(); i++) { - vectorCopies.add(getVectorCopy(i)); - } - DictionaryProvider providerCopy = null; - if (dictionaryProvider != null) { - Set ids = dictionaryProvider.getDictionaryIds(); - Dictionary[] dictionaryCopies = new Dictionary[ids.size()]; - int i = 0; - for (Long id : ids) { - Dictionary src = dictionaryProvider.lookup(id); - FieldVector srcVector = src.getVector(); - FieldVector destVector = srcVector.getField().createVector(srcVector.getAllocator()); - destVector.copyFromSafe( - 0, srcVector.getValueCount(), srcVector); // TODO: Remove safe copy for perf - DictionaryEncoding srcEncoding = src.getEncoding(); - Dictionary dest = - new Dictionary( - destVector, - new DictionaryEncoding( - srcEncoding.getId(), srcEncoding.isOrdered(), srcEncoding.getIndexType())); - dictionaryCopies[i] = dest; - i++; - } - providerCopy = new DictionaryProvider.MapDictionaryProvider(dictionaryCopies); - } - return new Table(vectorCopies, (int) getRowCount(), providerCopy); - } - - /** - * Returns a new Table created by adding the given vector to the vectors in this Table. - * - * @param index field index - * @param vector vector to be added. - * @return out a new Table with vector added - */ - public Table addVector(int index, FieldVector vector) { - return new Table(insertVector(index, vector)); - } - - /** - * Returns a new Table created by removing the selected Vector from this Table. - * - * @param index field index - * @return out a new Table with vector removed - */ - public Table removeVector(int index) { - return new Table(extractVector(index)); - } - - /** - * Slice this table from desired index. Memory is NOT transferred from the vectors in this table - * to new vectors in the target table. This table is unchanged. - * - * @param index start position of the slice - * @return the sliced table - */ - public Table slice(int index) { - return slice(index, this.rowCount - index); - } - - /** - * Slice this table at desired index and length. Memory is NOT transferred from the vectors in - * this table to new vectors in the target table. This table is unchanged. - * - * @param index start position of the slice - * @param length length of the slice - * @return the sliced table - */ - public Table slice(int index, int length) { - Preconditions.checkArgument(index >= 0, "expecting non-negative index"); - Preconditions.checkArgument(length >= 0, "expecting non-negative length"); - Preconditions.checkArgument(index + length <= rowCount, "index + length should <= rowCount"); - - if (index == 0 && length == rowCount) { - return this; - } - - List sliceVectors = - fieldVectors.stream() - .map( - v -> { - TransferPair transferPair = v.getTransferPair(v.getAllocator()); - transferPair.splitAndTransfer(index, length); - return (FieldVector) transferPair.getTo(); - }) - .collect(Collectors.toList()); - - return new Table(sliceVectors); - } - - /** Returns a Row iterator for this Table. */ - @Override - public Iterator iterator() { - - return new Iterator() { - - private final Row row = new Row(Table.this); - - @Override - public Row next() { - row.next(); - return row; - } - - @Override - public boolean hasNext() { - return row.hasNext(); - } - }; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/table/package-info.java b/java/vector/src/main/java/org/apache/arrow/vector/table/package-info.java deleted file mode 100644 index b11ada51292f9..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/table/package-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector.table; - -/* - * Support for Table, an immutable, columnar, tabular data structure based on FieldVectors. - * See the Arrow Java documentation for details: Table - */ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java b/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java deleted file mode 100644 index a11034bfa1cc3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types; - -/** Resolutions that dates can be stored at. */ -public enum DateUnit { - /** Days since epoch. */ - DAY(org.apache.arrow.flatbuf.DateUnit.DAY), - /** Milliseconds since epoch. */ - MILLISECOND(org.apache.arrow.flatbuf.DateUnit.MILLISECOND); - - private static final DateUnit[] valuesByFlatbufId = new DateUnit[DateUnit.values().length]; - - static { - for (DateUnit v : DateUnit.values()) { - valuesByFlatbufId[v.flatbufID] = v; - } - } - - private final short flatbufID; - - DateUnit(short flatbufID) { - this.flatbufID = flatbufID; - } - - public short getFlatbufID() { - return flatbufID; - } - - public static DateUnit fromFlatbufID(short id) { - return valuesByFlatbufId[id]; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java b/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java deleted file mode 100644 index ddce41e8b3497..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types; - -import org.apache.arrow.flatbuf.Precision; - -/** Precisions of primitive floating point numbers. */ -public enum FloatingPointPrecision { - /** 16-bit (not a standard java type). */ - HALF(Precision.HALF), - /** 32-bit (i.e. float in java). */ - SINGLE(Precision.SINGLE), - /** 64-bit (i.e. double in java). */ - DOUBLE(Precision.DOUBLE); - - private static final FloatingPointPrecision[] valuesByFlatbufId = - new FloatingPointPrecision[FloatingPointPrecision.values().length]; - - static { - for (FloatingPointPrecision v : FloatingPointPrecision.values()) { - valuesByFlatbufId[v.flatbufID] = v; - } - } - - private final short flatbufID; - - private FloatingPointPrecision(short flatbufID) { - this.flatbufID = flatbufID; - } - - public short getFlatbufID() { - return flatbufID; - } - - public static FloatingPointPrecision fromFlatbufID(short id) { - return valuesByFlatbufId[id]; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java b/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java deleted file mode 100644 index 22d8a047032a6..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types; - -/** Resolutions for Interval Vectors. */ -public enum IntervalUnit { - /** - * Values are stored as number of months (which can be converted into years and months via - * division). - */ - YEAR_MONTH(org.apache.arrow.flatbuf.IntervalUnit.YEAR_MONTH), - /** Values are stored as some number of days and some number of milliseconds within that day. */ - DAY_TIME(org.apache.arrow.flatbuf.IntervalUnit.DAY_TIME), - /** Values are stored as number of months, days and nanoseconds. */ - MONTH_DAY_NANO(org.apache.arrow.flatbuf.IntervalUnit.MONTH_DAY_NANO); - - private static final IntervalUnit[] valuesByFlatbufId = - new IntervalUnit[IntervalUnit.values().length]; - - static { - for (IntervalUnit v : IntervalUnit.values()) { - valuesByFlatbufId[v.flatbufID] = v; - } - } - - private final short flatbufID; - - private IntervalUnit(short flatbufID) { - this.flatbufID = flatbufID; - } - - public short getFlatbufID() { - return flatbufID; - } - - public static IntervalUnit fromFlatbufID(short id) { - return valuesByFlatbufId[id]; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java b/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java deleted file mode 100644 index f8418afaa4fb3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types; - -/** Metadata version for Arrow metadata. */ -public enum MetadataVersion { - /// 0.1.0 - V1(org.apache.arrow.flatbuf.MetadataVersion.V1), - - /// 0.2.0 - V2(org.apache.arrow.flatbuf.MetadataVersion.V2), - - /// 0.3.0 to 0.7.1 - V3(org.apache.arrow.flatbuf.MetadataVersion.V3), - - /// 0.8.0 to 0.17.1 - V4(org.apache.arrow.flatbuf.MetadataVersion.V4), - - /// >= 1.0.0 - V5(org.apache.arrow.flatbuf.MetadataVersion.V5), - ; - - public static final MetadataVersion DEFAULT = V5; - - private static final MetadataVersion[] valuesByFlatbufId = - new MetadataVersion[MetadataVersion.values().length]; - - static { - for (MetadataVersion v : MetadataVersion.values()) { - valuesByFlatbufId[v.flatbufID] = v; - } - } - - private final short flatbufID; - - MetadataVersion(short flatbufID) { - this.flatbufID = flatbufID; - } - - public short toFlatbufID() { - return flatbufID; - } - - public static MetadataVersion fromFlatbufID(short id) { - return valuesByFlatbufId[id]; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java b/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java deleted file mode 100644 index b0eacb56b43dc..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types; - -/** Resolutions that times can be stored with. */ -public enum TimeUnit { - SECOND(org.apache.arrow.flatbuf.TimeUnit.SECOND), - MILLISECOND(org.apache.arrow.flatbuf.TimeUnit.MILLISECOND), - MICROSECOND(org.apache.arrow.flatbuf.TimeUnit.MICROSECOND), - NANOSECOND(org.apache.arrow.flatbuf.TimeUnit.NANOSECOND); - - private static final TimeUnit[] valuesByFlatbufId = new TimeUnit[TimeUnit.values().length]; - - static { - for (TimeUnit v : TimeUnit.values()) { - valuesByFlatbufId[v.flatbufID] = v; - } - } - - private final short flatbufID; - - TimeUnit(short flatbufID) { - this.flatbufID = flatbufID; - } - - public short getFlatbufID() { - return flatbufID; - } - - public static TimeUnit fromFlatbufID(short id) { - return valuesByFlatbufId[id]; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java deleted file mode 100644 index e9b963b62c13b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ /dev/null @@ -1,1046 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types; - -import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; -import static org.apache.arrow.vector.types.FloatingPointPrecision.HALF; -import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; -import static org.apache.arrow.vector.types.UnionMode.Dense; -import static org.apache.arrow.vector.types.UnionMode.Sparse; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float2Vector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.ViewVarBinaryVector; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.BigIntWriterImpl; -import org.apache.arrow.vector.complex.impl.BitWriterImpl; -import org.apache.arrow.vector.complex.impl.DateDayWriterImpl; -import org.apache.arrow.vector.complex.impl.DateMilliWriterImpl; -import org.apache.arrow.vector.complex.impl.Decimal256WriterImpl; -import org.apache.arrow.vector.complex.impl.DecimalWriterImpl; -import org.apache.arrow.vector.complex.impl.DenseUnionWriter; -import org.apache.arrow.vector.complex.impl.DurationWriterImpl; -import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl; -import org.apache.arrow.vector.complex.impl.Float2WriterImpl; -import org.apache.arrow.vector.complex.impl.Float4WriterImpl; -import org.apache.arrow.vector.complex.impl.Float8WriterImpl; -import org.apache.arrow.vector.complex.impl.IntWriterImpl; -import org.apache.arrow.vector.complex.impl.IntervalDayWriterImpl; -import org.apache.arrow.vector.complex.impl.IntervalMonthDayNanoWriterImpl; -import org.apache.arrow.vector.complex.impl.IntervalYearWriterImpl; -import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl; -import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.SmallIntWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeMicroWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeMilliWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeNanoWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeSecWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeStampMicroTZWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeStampMicroWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeStampMilliTZWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeStampMilliWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeStampNanoTZWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeStampNanoWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeStampSecTZWriterImpl; -import org.apache.arrow.vector.complex.impl.TimeStampSecWriterImpl; -import org.apache.arrow.vector.complex.impl.TinyIntWriterImpl; -import org.apache.arrow.vector.complex.impl.UInt1WriterImpl; -import org.apache.arrow.vector.complex.impl.UInt2WriterImpl; -import org.apache.arrow.vector.complex.impl.UInt4WriterImpl; -import org.apache.arrow.vector.complex.impl.UInt8WriterImpl; -import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.impl.UnionWriter; -import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; -import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; -import org.apache.arrow.vector.complex.impl.ViewVarBinaryWriterImpl; -import org.apache.arrow.vector.complex.impl.ViewVarCharWriterImpl; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; -import org.apache.arrow.vector.types.pojo.ArrowType.Binary; -import org.apache.arrow.vector.types.pojo.ArrowType.BinaryView; -import org.apache.arrow.vector.types.pojo.ArrowType.Bool; -import org.apache.arrow.vector.types.pojo.ArrowType.Date; -import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; -import org.apache.arrow.vector.types.pojo.ArrowType.Duration; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; -import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.apache.arrow.vector.types.pojo.ArrowType.Interval; -import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; -import org.apache.arrow.vector.types.pojo.ArrowType.LargeListView; -import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; -import org.apache.arrow.vector.types.pojo.ArrowType.List; -import org.apache.arrow.vector.types.pojo.ArrowType.ListView; -import org.apache.arrow.vector.types.pojo.ArrowType.Map; -import org.apache.arrow.vector.types.pojo.ArrowType.Null; -import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.ArrowType.Time; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; -import org.apache.arrow.vector.types.pojo.ArrowType.Union; -import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.apache.arrow.vector.types.pojo.ArrowType.Utf8View; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; - -/** An enumeration of all logical types supported by this library. */ -public class Types { - - /** The actual enumeration of types. */ - public enum MinorType { - NULL(Null.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new NullVector(field.getName()); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return null; - } - }, - STRUCT(Struct.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new StructVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new NullableStructWriter((StructVector) vector); - } - }, - TINYINT(new Int(8, true)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TinyIntVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TinyIntWriterImpl((TinyIntVector) vector); - } - }, - SMALLINT(new Int(16, true)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new SmallIntVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new SmallIntWriterImpl((SmallIntVector) vector); - } - }, - INT(new Int(32, true)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new IntVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new IntWriterImpl((IntVector) vector); - } - }, - BIGINT(new Int(64, true)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new BigIntVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new BigIntWriterImpl((BigIntVector) vector); - } - }, - DATEDAY(new Date(DateUnit.DAY)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new DateDayVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new DateDayWriterImpl((DateDayVector) vector); - } - }, - DATEMILLI(new Date(DateUnit.MILLISECOND)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new DateMilliVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new DateMilliWriterImpl((DateMilliVector) vector); - } - }, - TIMESEC(new Time(TimeUnit.SECOND, 32)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeSecVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeSecWriterImpl((TimeSecVector) vector); - } - }, - TIMEMILLI(new Time(TimeUnit.MILLISECOND, 32)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeMilliVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeMilliWriterImpl((TimeMilliVector) vector); - } - }, - TIMEMICRO(new Time(TimeUnit.MICROSECOND, 64)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeMicroVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeMicroWriterImpl((TimeMicroVector) vector); - } - }, - TIMENANO(new Time(TimeUnit.NANOSECOND, 64)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeNanoVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeNanoWriterImpl((TimeNanoVector) vector); - } - }, - // time in second from the Unix epoch, 00:00:00 on 1 January 1970, UTC. - TIMESTAMPSEC(new Timestamp(org.apache.arrow.vector.types.TimeUnit.SECOND, null)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeStampSecVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeStampSecWriterImpl((TimeStampSecVector) vector); - } - }, - // time in millis from the Unix epoch, 00:00:00.000 on 1 January 1970, UTC. - TIMESTAMPMILLI(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, null)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeStampMilliVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeStampMilliWriterImpl((TimeStampMilliVector) vector); - } - }, - // time in microsecond from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC. - TIMESTAMPMICRO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, null)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeStampMicroVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeStampMicroWriterImpl((TimeStampMicroVector) vector); - } - }, - // time in nanosecond from the Unix epoch, 00:00:00.000000000 on 1 January 1970, UTC. - TIMESTAMPNANO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.NANOSECOND, null)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeStampNanoVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeStampNanoWriterImpl((TimeStampNanoVector) vector); - } - }, - INTERVALDAY(new Interval(IntervalUnit.DAY_TIME)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new IntervalDayVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new IntervalDayWriterImpl((IntervalDayVector) vector); - } - }, - INTERVALMONTHDAYNANO(new Interval(IntervalUnit.MONTH_DAY_NANO)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new IntervalMonthDayNanoVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new IntervalMonthDayNanoWriterImpl((IntervalMonthDayNanoVector) vector); - } - }, - DURATION(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new DurationVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new DurationWriterImpl((DurationVector) vector); - } - }, - - INTERVALYEAR(new Interval(IntervalUnit.YEAR_MONTH)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new IntervalYearVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new IntervalYearWriterImpl((IntervalYearVector) vector); - } - }, - FLOAT2(new FloatingPoint(HALF)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new Float2Vector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new Float2WriterImpl((Float2Vector) vector); - } - }, - // 4 byte ieee 754 - FLOAT4(new FloatingPoint(SINGLE)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new Float4Vector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new Float4WriterImpl((Float4Vector) vector); - } - }, - // 8 byte ieee 754 - FLOAT8(new FloatingPoint(DOUBLE)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new Float8Vector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new Float8WriterImpl((Float8Vector) vector); - } - }, - BIT(Bool.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new BitVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new BitWriterImpl((BitVector) vector); - } - }, - VARCHAR(Utf8.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new VarCharVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new VarCharWriterImpl((VarCharVector) vector); - } - }, - VIEWVARCHAR(Utf8View.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new ViewVarCharVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new ViewVarCharWriterImpl((ViewVarCharVector) vector); - } - }, - LARGEVARCHAR(LargeUtf8.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new LargeVarCharVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new LargeVarCharWriterImpl((LargeVarCharVector) vector); - } - }, - LARGEVARBINARY(LargeBinary.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new LargeVarBinaryVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new LargeVarBinaryWriterImpl((LargeVarBinaryVector) vector); - } - }, - VARBINARY(Binary.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new VarBinaryVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new VarBinaryWriterImpl((VarBinaryVector) vector); - } - }, - VIEWVARBINARY(BinaryView.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new ViewVarBinaryVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new ViewVarBinaryWriterImpl((ViewVarBinaryVector) vector); - } - }, - DECIMAL(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new DecimalVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new DecimalWriterImpl((DecimalVector) vector); - } - }, - DECIMAL256(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new Decimal256Vector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new Decimal256WriterImpl((Decimal256Vector) vector); - } - }, - FIXEDSIZEBINARY(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new FixedSizeBinaryVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new FixedSizeBinaryWriterImpl((FixedSizeBinaryVector) vector); - } - }, - UINT1(new Int(8, false)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new UInt1Vector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UInt1WriterImpl((UInt1Vector) vector); - } - }, - UINT2(new Int(16, false)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new UInt2Vector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UInt2WriterImpl((UInt2Vector) vector); - } - }, - UINT4(new Int(32, false)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new UInt4Vector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UInt4WriterImpl((UInt4Vector) vector); - } - }, - UINT8(new Int(64, false)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new UInt8Vector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UInt8WriterImpl((UInt8Vector) vector); - } - }, - LIST(List.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new ListVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UnionListWriter((ListVector) vector); - } - }, - LISTVIEW(ListView.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new ListViewVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UnionListWriter((ListVector) vector); - } - }, - LARGELIST(ArrowType.LargeList.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new LargeListVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UnionLargeListWriter((LargeListVector) vector); - } - }, - LARGELISTVIEW(ArrowType.LargeListView.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new LargeListViewVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UnionLargeListViewWriter((LargeListViewVector) vector); - } - }, - FIXED_SIZE_LIST(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new FixedSizeListVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - throw new UnsupportedOperationException( - "FieldWriter not implemented for FixedSizeList " + "type"); - } - }, - UNION(new Union(Sparse, null)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - if (field.getFieldType().getDictionary() != null) { - throw new UnsupportedOperationException( - "Dictionary encoding not supported for complex " + "types"); - } - return new UnionVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UnionWriter((UnionVector) vector); - } - }, - DENSEUNION(new Union(Dense, null)) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - if (field.getFieldType().getDictionary() != null) { - throw new UnsupportedOperationException( - "Dictionary encoding not supported for complex " + "types"); - } - return new DenseUnionVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new DenseUnionWriter((DenseUnionVector) vector); - } - }, - MAP(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new MapVector( - field.getName(), allocator, field.getFieldType(), schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new UnionListWriter((MapVector) vector); - } - }, - TIMESTAMPSECTZ(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeStampSecTZVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeStampSecTZWriterImpl((TimeStampSecTZVector) vector); - } - }, - TIMESTAMPMILLITZ(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeStampMilliTZVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeStampMilliTZWriterImpl((TimeStampMilliTZVector) vector); - } - }, - TIMESTAMPMICROTZ(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeStampMicroTZVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeStampMicroTZWriterImpl((TimeStampMicroTZVector) vector); - } - }, - TIMESTAMPNANOTZ(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new TimeStampNanoTZVector(field, allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return new TimeStampNanoTZWriterImpl((TimeStampNanoTZVector) vector); - } - }, - EXTENSIONTYPE(null) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return ((ExtensionType) field.getFieldType().getType()) - .getNewVector(field.getName(), field.getFieldType(), allocator); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - return ((ExtensionTypeVector) vector) - .getUnderlyingVector() - .getMinorType() - .getNewFieldWriter(vector); - } - }, - RUNENDENCODED(RunEndEncoded.INSTANCE) { - @Override - public FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { - return new RunEndEncodedVector(field, allocator, schemaChangeCallback); - } - - @Override - public FieldWriter getNewFieldWriter(ValueVector vector) { - throw new UnsupportedOperationException( - "FieldWriter for run-end encoded vector is not implemented yet."); - } - }, - ; - - private final ArrowType type; - - MinorType(ArrowType type) { - this.type = type; - } - - /** Returns the {@link ArrowType} equivalent of this type. */ - public final ArrowType getType() { - if (type == null) { - throw new UnsupportedOperationException("Cannot get simple type for type " + name()); - } - return type; - } - - /** Constructs a new vector for the given type. */ - public final FieldVector getNewVector( - String name, - FieldType fieldType, - BufferAllocator allocator, - CallBack schemaChangeCallback) { - return getNewVector(new Field(name, fieldType, null), allocator, schemaChangeCallback); - } - - /** Constructs a new vector for the given type. */ - public abstract FieldVector getNewVector( - Field field, BufferAllocator allocator, CallBack schemaChangeCallback); - - public abstract FieldWriter getNewFieldWriter(ValueVector vector); - } - - /** Maps the ArrowType to the java implementations MinorType. */ - public static MinorType getMinorTypeForArrowType(ArrowType arrowType) { - return arrowType.accept( - new ArrowTypeVisitor() { - @Override - public MinorType visit(Null type) { - return MinorType.NULL; - } - - @Override - public MinorType visit(Struct type) { - return MinorType.STRUCT; - } - - @Override - public MinorType visit(List type) { - return MinorType.LIST; - } - - @Override - public MinorType visit(FixedSizeList type) { - return MinorType.FIXED_SIZE_LIST; - } - - @Override - public MinorType visit(Union type) { - switch (type.getMode()) { - case Sparse: - return MinorType.UNION; - case Dense: - return MinorType.DENSEUNION; - default: - throw new IllegalArgumentException( - "only Dense or Sparse unions supported: " + type); - } - } - - @Override - public MinorType visit(Map type) { - return MinorType.MAP; - } - - @Override - public MinorType visit(ArrowType.LargeList type) { - return MinorType.LARGELIST; - } - - @Override - public MinorType visit(Int type) { - switch (type.getBitWidth()) { - case 8: - return type.getIsSigned() ? MinorType.TINYINT : MinorType.UINT1; - case 16: - return type.getIsSigned() ? MinorType.SMALLINT : MinorType.UINT2; - case 32: - return type.getIsSigned() ? MinorType.INT : MinorType.UINT4; - case 64: - return type.getIsSigned() ? MinorType.BIGINT : MinorType.UINT8; - default: - throw new IllegalArgumentException("only 8, 16, 32, 64 supported: " + type); - } - } - - @Override - public MinorType visit(FloatingPoint type) { - switch (type.getPrecision()) { - case HALF: - return MinorType.FLOAT2; - case SINGLE: - return MinorType.FLOAT4; - case DOUBLE: - return MinorType.FLOAT8; - default: - throw new IllegalArgumentException("unknown precision: " + type); - } - } - - @Override - public MinorType visit(Utf8 type) { - return MinorType.VARCHAR; - } - - @Override - public MinorType visit(Utf8View type) { - return MinorType.VIEWVARCHAR; - } - - @Override - public Types.MinorType visit(LargeUtf8 type) { - return MinorType.LARGEVARCHAR; - } - - @Override - public MinorType visit(Binary type) { - return MinorType.VARBINARY; - } - - @Override - public MinorType visit(BinaryView type) { - return MinorType.VIEWVARBINARY; - } - - @Override - public MinorType visit(LargeBinary type) { - return MinorType.LARGEVARBINARY; - } - - @Override - public MinorType visit(Bool type) { - return MinorType.BIT; - } - - @Override - public MinorType visit(Decimal type) { - if (type.getBitWidth() == 256) { - return MinorType.DECIMAL256; - } - return MinorType.DECIMAL; - } - - @Override - public MinorType visit(FixedSizeBinary type) { - return MinorType.FIXEDSIZEBINARY; - } - - @Override - public MinorType visit(Date type) { - switch (type.getUnit()) { - case DAY: - return MinorType.DATEDAY; - case MILLISECOND: - return MinorType.DATEMILLI; - default: - throw new IllegalArgumentException("unknown unit: " + type); - } - } - - @Override - public MinorType visit(Time type) { - switch (type.getUnit()) { - case SECOND: - return MinorType.TIMESEC; - case MILLISECOND: - return MinorType.TIMEMILLI; - case MICROSECOND: - return MinorType.TIMEMICRO; - case NANOSECOND: - return MinorType.TIMENANO; - default: - throw new IllegalArgumentException("unknown unit: " + type); - } - } - - @Override - public MinorType visit(Timestamp type) { - String tz = type.getTimezone(); - switch (type.getUnit()) { - case SECOND: - return tz == null ? MinorType.TIMESTAMPSEC : MinorType.TIMESTAMPSECTZ; - case MILLISECOND: - return tz == null ? MinorType.TIMESTAMPMILLI : MinorType.TIMESTAMPMILLITZ; - case MICROSECOND: - return tz == null ? MinorType.TIMESTAMPMICRO : MinorType.TIMESTAMPMICROTZ; - case NANOSECOND: - return tz == null ? MinorType.TIMESTAMPNANO : MinorType.TIMESTAMPNANOTZ; - default: - throw new IllegalArgumentException("unknown unit: " + type); - } - } - - @Override - public MinorType visit(Interval type) { - switch (type.getUnit()) { - case DAY_TIME: - return MinorType.INTERVALDAY; - case YEAR_MONTH: - return MinorType.INTERVALYEAR; - case MONTH_DAY_NANO: - return MinorType.INTERVALMONTHDAYNANO; - default: - throw new IllegalArgumentException("unknown unit: " + type); - } - } - - @Override - public MinorType visit(Duration type) { - return MinorType.DURATION; - } - - @Override - public MinorType visit(ListView type) { - return MinorType.LISTVIEW; - } - - @Override - public MinorType visit(LargeListView type) { - return MinorType.LARGELISTVIEW; - } - - @Override - public MinorType visit(ExtensionType type) { - return MinorType.EXTENSIONTYPE; - } - - @Override - public MinorType visit(RunEndEncoded type) { - return MinorType.RUNENDENCODED; - } - }); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java b/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java deleted file mode 100644 index f1fc35a8ed49e..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types; - -/** Different memory layouts for Union Vectors. */ -public enum UnionMode { - /** - * Each child vector is the same length as the overall vector, and there is one 8-bit integer - * buffer to indicate the index of a child vector to use at any given position. - */ - Sparse(org.apache.arrow.flatbuf.UnionMode.Sparse), - /** - * Each child vector is of variable width. The parent vector contains both a child index vector - * (like in {@link #Sparse}) and in addition a slot index buffer to determine the offset into the - * child vector indicated by the index vector. - */ - Dense(org.apache.arrow.flatbuf.UnionMode.Dense); - - private static final UnionMode[] valuesByFlatbufId = new UnionMode[UnionMode.values().length]; - - static { - for (UnionMode v : UnionMode.values()) { - valuesByFlatbufId[v.flatbufID] = v; - } - } - - private final short flatbufID; - - private UnionMode(short flatbufID) { - this.flatbufID = flatbufID; - } - - public short getFlatbufID() { - return flatbufID; - } - - public static UnionMode fromFlatbufID(short id) { - return valuesByFlatbufId[id]; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java deleted file mode 100644 index b5e5d8c3aeef3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonGetter; -import com.fasterxml.jackson.annotation.JsonProperty; -import java.util.Objects; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; - -/** A POJO representation of Arrow Dictionary metadata. */ -public class DictionaryEncoding { - - private final long id; - private final boolean ordered; - private final Int indexType; - - /** - * Constructs a new instance. - * - * @param id The ID of the dictionary to use for encoding. - * @param ordered Whether the keys in values in the dictionary are ordered. - * @param indexType (nullable). The integer type to use for indexing in the dictionary. Defaults - * to a signed 32 bit integer. - */ - @JsonCreator - public DictionaryEncoding( - @JsonProperty("id") long id, - @JsonProperty("isOrdered") boolean ordered, - @JsonProperty("indexType") Int indexType) { - this.id = id; - this.ordered = ordered; - this.indexType = indexType == null ? new Int(32, true) : indexType; - } - - public long getId() { - return id; - } - - @JsonGetter("isOrdered") - public boolean isOrdered() { - return ordered; - } - - public Int getIndexType() { - return indexType; - } - - @Override - public String toString() { - return "DictionaryEncoding[id=" + id + ",ordered=" + ordered + ",indexType=" + indexType + "]"; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } else if (!(o instanceof DictionaryEncoding)) { - return false; - } - DictionaryEncoding that = (DictionaryEncoding) o; - return id == that.id && ordered == that.ordered && Objects.equals(indexType, that.indexType); - } - - @Override - public int hashCode() { - return Objects.hash(id, ordered, indexType); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java deleted file mode 100644 index e323bf67d914c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; - -/** A registry of recognized extension types. */ -public final class ExtensionTypeRegistry { - private static final ConcurrentMap registry = new ConcurrentHashMap<>(); - - public static void register(ExtensionType type) { - registry.put(type.extensionName(), type); - } - - public static void unregister(ExtensionType type) { - registry.remove(type.extensionName()); - } - - public static ExtensionType lookup(String name) { - return registry.get(name); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java deleted file mode 100644 index d65ef1bee75fa..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import static org.apache.arrow.util.Preconditions.checkNotNull; -import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME; -import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField; -import static org.apache.arrow.vector.types.pojo.Schema.convertMetadata; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonInclude.Include; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.flatbuffers.FlatBufferBuilder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.stream.Collectors; -import org.apache.arrow.flatbuf.KeyValue; -import org.apache.arrow.flatbuf.Type; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** A POJO abstraction for the Flatbuffer description of Vector Type. */ -public class Field { - - private static final Logger logger = LoggerFactory.getLogger(Field.class); - - public static Field nullablePrimitive(String name, ArrowType.PrimitiveType type) { - return nullable(name, type); - } - - public static Field nullable(String name, ArrowType type) { - return new Field(name, FieldType.nullable(type), null); - } - - public static Field notNullable(String name, ArrowType type) { - return new Field(name, FieldType.notNullable(type), null); - } - - private final String name; - private final FieldType fieldType; - private final List children; - - private Field( - String name, - boolean nullable, - ArrowType type, - DictionaryEncoding dictionary, - List children, - Map metadata) { - this(name, new FieldType(nullable, type, dictionary, metadata), children); - } - - @JsonCreator - private Field( - @JsonProperty("name") String name, - @JsonProperty("nullable") boolean nullable, - @JsonProperty("type") ArrowType type, - @JsonProperty("dictionary") DictionaryEncoding dictionary, - @JsonProperty("children") List children, - @JsonProperty("metadata") List> metadata) { - this(name, new FieldType(nullable, type, dictionary, convertMetadata(metadata)), children); - } - - /** - * Constructs a new Field object. - * - * @param name name of the field - * @param fieldType type of the field - * @param children child fields, if any - */ - public Field(String name, FieldType fieldType, List children) { - this.name = name; - this.fieldType = checkNotNull(fieldType); - this.children = - children == null ? Collections.emptyList() : Collections2.toImmutableList(children); - } - - /** Construct a new vector of this type using the given allocator. */ - public FieldVector createVector(BufferAllocator allocator) { - FieldVector vector = fieldType.createNewSingleVector(this, allocator, null); - vector.initializeChildrenFromFields(children); - return vector; - } - - /** Constructs a new instance from a flatbuffer representation of the field. */ - public static Field convertField(org.apache.arrow.flatbuf.Field field) { - Map metadata = new HashMap<>(); - for (int i = 0; i < field.customMetadataLength(); i++) { - KeyValue kv = field.customMetadata(i); - String key = kv.key(); - String value = kv.value(); - metadata.put(key == null ? "" : key, value == null ? "" : value); - } - metadata = Collections.unmodifiableMap(metadata); - - String name = field.name(); - boolean nullable = field.nullable(); - ArrowType type = getTypeForField(field); - - if (metadata.containsKey(ExtensionType.EXTENSION_METADATA_KEY_NAME)) { - final String extensionName = metadata.get(ExtensionType.EXTENSION_METADATA_KEY_NAME); - final String extensionMetadata = - metadata.getOrDefault(ExtensionType.EXTENSION_METADATA_KEY_METADATA, ""); - ExtensionType extensionType = ExtensionTypeRegistry.lookup(extensionName); - if (extensionType != null) { - type = extensionType.deserialize(type, extensionMetadata); - } else { - // Otherwise, we haven't registered the type - logger.info("Unrecognized extension type: {}", extensionName); - } - } - - DictionaryEncoding dictionary = null; - org.apache.arrow.flatbuf.DictionaryEncoding dictionaryFB = field.dictionary(); - if (dictionaryFB != null) { - ArrowType.Int indexType = null; - org.apache.arrow.flatbuf.Int indexTypeFB = dictionaryFB.indexType(); - if (indexTypeFB != null) { - indexType = new ArrowType.Int(indexTypeFB.bitWidth(), indexTypeFB.isSigned()); - } - dictionary = new DictionaryEncoding(dictionaryFB.id(), dictionaryFB.isOrdered(), indexType); - } - List children = new ArrayList<>(); - for (int i = 0; i < field.childrenLength(); i++) { - Field childField = convertField(field.children(i)); - childField = mutateOriginalNameIfNeeded(field, childField); - children.add(childField); - } - children = Collections.unmodifiableList(children); - return new Field(name, nullable, type, dictionary, children, metadata); - } - - /** - * Helper method to ensure backward compatibility with schemas generated prior to ARROW-1347, - * ARROW-1663. - * - * @param field the field to check - * @param originalChildField original field which name might be mutated - * @return original or mutated field - */ - private static Field mutateOriginalNameIfNeeded( - org.apache.arrow.flatbuf.Field field, Field originalChildField) { - if ((field.typeType() == Type.List || field.typeType() == Type.FixedSizeList) - && originalChildField.getName().equals("[DEFAULT]")) { - return new Field( - DATA_VECTOR_NAME, - originalChildField.isNullable(), - originalChildField.getType(), - originalChildField.getDictionary(), - originalChildField.getChildren(), - originalChildField.getMetadata()); - } - return originalChildField; - } - - /** - * Puts this object into builder and returns the length of the serialized flatbuffer. - */ - public int getField(FlatBufferBuilder builder) { - int nameOffset = name == null ? -1 : builder.createString(name); - int typeOffset = getType().getType(builder); - int dictionaryOffset = -1; - DictionaryEncoding dictionary = getDictionary(); - if (dictionary != null) { - int dictionaryType = dictionary.getIndexType().getType(builder); - org.apache.arrow.flatbuf.DictionaryEncoding.startDictionaryEncoding(builder); - org.apache.arrow.flatbuf.DictionaryEncoding.addId(builder, dictionary.getId()); - org.apache.arrow.flatbuf.DictionaryEncoding.addIsOrdered(builder, dictionary.isOrdered()); - org.apache.arrow.flatbuf.DictionaryEncoding.addIndexType(builder, dictionaryType); - dictionaryOffset = org.apache.arrow.flatbuf.DictionaryEncoding.endDictionaryEncoding(builder); - } - int[] childrenData = new int[children.size()]; - for (int i = 0; i < children.size(); i++) { - childrenData[i] = children.get(i).getField(builder); - } - int childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, childrenData); - int[] metadataOffsets = new int[getMetadata().size()]; - Iterator> metadataIterator = getMetadata().entrySet().iterator(); - for (int i = 0; i < metadataOffsets.length; i++) { - Entry kv = metadataIterator.next(); - int keyOffset = builder.createString(kv.getKey()); - int valueOffset = builder.createString(kv.getValue()); - KeyValue.startKeyValue(builder); - KeyValue.addKey(builder, keyOffset); - KeyValue.addValue(builder, valueOffset); - metadataOffsets[i] = KeyValue.endKeyValue(builder); - } - int metadataOffset = - org.apache.arrow.flatbuf.Field.createCustomMetadataVector(builder, metadataOffsets); - org.apache.arrow.flatbuf.Field.startField(builder); - if (name != null) { - org.apache.arrow.flatbuf.Field.addName(builder, nameOffset); - } - org.apache.arrow.flatbuf.Field.addNullable(builder, isNullable()); - org.apache.arrow.flatbuf.Field.addTypeType(builder, getType().getTypeID().getFlatbufID()); - org.apache.arrow.flatbuf.Field.addType(builder, typeOffset); - org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset); - org.apache.arrow.flatbuf.Field.addCustomMetadata(builder, metadataOffset); - if (dictionary != null) { - org.apache.arrow.flatbuf.Field.addDictionary(builder, dictionaryOffset); - } - return org.apache.arrow.flatbuf.Field.endField(builder); - } - - public String getName() { - return name; - } - - public boolean isNullable() { - return fieldType.isNullable(); - } - - public ArrowType getType() { - return fieldType.getType(); - } - - @JsonIgnore - public FieldType getFieldType() { - return fieldType; - } - - @JsonInclude(Include.NON_NULL) - public DictionaryEncoding getDictionary() { - return fieldType.getDictionary(); - } - - public List getChildren() { - return children; - } - - @JsonIgnore - public Map getMetadata() { - return fieldType.getMetadata(); - } - - @JsonProperty("metadata") - @JsonInclude(Include.NON_EMPTY) - List> getMetadataForJson() { - return convertMetadata(getMetadata()); - } - - @Override - public int hashCode() { - return Objects.hash(name, isNullable(), getType(), getDictionary(), getMetadata(), children); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof Field)) { - return false; - } - Field that = (Field) obj; - return Objects.equals(this.name, that.name) - && this.isNullable() == that.isNullable() - && Objects.equals(this.getType(), that.getType()) - && Objects.equals(this.getDictionary(), that.getDictionary()) - && Objects.equals(this.getMetadata(), that.getMetadata()) - && Objects.equals(this.children, that.children); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - if (name != null) { - sb.append(name).append(": "); - } - sb.append(getType()); - if (getDictionary() != null) { - sb.append("[dictionary: ").append(getDictionary().getId()).append("]"); - } - if (!children.isEmpty()) { - sb.append("<") - .append(children.stream().map(t -> t.toString()).collect(Collectors.joining(", "))) - .append(">"); - } - if (!isNullable()) { - sb.append(" not null"); - } - return sb.toString(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java deleted file mode 100644 index 1d598f4f9111b..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Objects; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.apache.arrow.vector.util.CallBack; - -/** - * POJO representation of an Arrow field type. It consists of a logical type, nullability and - * whether the field (column) is dictionary encoded. - */ -public class FieldType { - - public static FieldType nullable(ArrowType type) { - return new FieldType(true, type, null, null); - } - - public static FieldType notNullable(ArrowType type) { - return new FieldType(false, type, null, null); - } - - private final boolean nullable; - private final ArrowType type; - private final DictionaryEncoding dictionary; - private final Map metadata; - - public FieldType(boolean nullable, ArrowType type, DictionaryEncoding dictionary) { - this(nullable, type, dictionary, null); - } - - /** - * Constructs a new instance. - * - * @param nullable Whether the Vector is nullable - * @param type The logical arrow type of the field. - * @param dictionary The dictionary encoding of the field. - * @param metadata Custom metadata for the field. - */ - public FieldType( - boolean nullable, - ArrowType type, - DictionaryEncoding dictionary, - Map metadata) { - super(); - this.nullable = nullable; - this.type = Preconditions.checkNotNull(type); - this.dictionary = dictionary; - if (type instanceof ExtensionType) { - // Save the extension type name/metadata - final Map extensionMetadata = new HashMap<>(); - extensionMetadata.put( - ExtensionType.EXTENSION_METADATA_KEY_NAME, ((ExtensionType) type).extensionName()); - extensionMetadata.put( - ExtensionType.EXTENSION_METADATA_KEY_METADATA, ((ExtensionType) type).serialize()); - if (metadata != null) { - extensionMetadata.putAll(metadata); - } - this.metadata = Collections.unmodifiableMap(extensionMetadata); - } else { - this.metadata = - metadata == null - ? java.util.Collections.emptyMap() - : Collections2.immutableMapCopy(metadata); - } - } - - public boolean isNullable() { - return nullable; - } - - public ArrowType getType() { - return type; - } - - public DictionaryEncoding getDictionary() { - return dictionary; - } - - public Map getMetadata() { - return metadata; - } - - public FieldVector createNewSingleVector( - String name, BufferAllocator allocator, CallBack schemaCallBack) { - MinorType minorType = Types.getMinorTypeForArrowType(type); - return minorType.getNewVector(name, this, allocator, schemaCallBack); - } - - public FieldVector createNewSingleVector( - Field field, BufferAllocator allocator, CallBack schemaCallBack) { - MinorType minorType = Types.getMinorTypeForArrowType(type); - return minorType.getNewVector(field, allocator, schemaCallBack); - } - - @Override - public int hashCode() { - return Objects.hash(nullable, type, dictionary, metadata); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof FieldType)) { - return false; - } - FieldType that = (FieldType) obj; - return this.isNullable() == that.isNullable() - && Objects.equals(this.getType(), that.getType()) - && Objects.equals(this.getDictionary(), that.getDictionary()) - && Objects.equals(this.getMetadata(), that.getMetadata()); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java deleted file mode 100644 index 293f1499df218..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import static org.apache.arrow.vector.types.pojo.Field.convertField; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonInclude.Include; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectReader; -import com.fasterxml.jackson.databind.ObjectWriter; -import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; -import com.google.flatbuffers.FlatBufferBuilder; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.Channels; -import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.stream.Collectors; -import org.apache.arrow.flatbuf.Endianness; -import org.apache.arrow.flatbuf.KeyValue; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.WriteChannel; -import org.apache.arrow.vector.ipc.message.FBSerializables; -import org.apache.arrow.vector.ipc.message.MessageSerializer; - -/** An Arrow Schema. */ -public class Schema { - - /** - * Search for a field by name in given the list of fields. - * - * @param fields the list of the fields - * @param name the name of the field to return - * @return the corresponding field - * @throws IllegalArgumentException if the field was not found - */ - public static Field findField(List fields, String name) { - for (Field field : fields) { - if (field.getName().equals(name)) { - return field; - } - } - throw new IllegalArgumentException(String.format("field %s not found in %s", name, fields)); - } - - static final String METADATA_KEY = "key"; - static final String METADATA_VALUE = "value"; - - private static final ObjectMapper mapper = new ObjectMapper(); - private static final ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); - private static final ObjectReader reader = mapper.readerFor(Schema.class); - private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; - - public static Schema fromJSON(String json) throws IOException { - return reader.readValue(Preconditions.checkNotNull(json)); - } - - /** - * Deserialize a schema that has been serialized using {@link #toByteArray()}. - * - * @param buffer the bytes to deserialize. - * @return The deserialized schema. - */ - @Deprecated - public static Schema deserialize(ByteBuffer buffer) { - return convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(buffer)); - } - - /** - * Deserialize a schema that has been serialized as a message using {@link #serializeAsMessage()}. - * - * @param buffer the bytes to deserialize. - * @return The deserialized schema. - */ - public static Schema deserializeMessage(ByteBuffer buffer) { - ByteBufferBackedInputStream stream = new ByteBufferBackedInputStream(buffer); - try (ReadChannel channel = new ReadChannel(Channels.newChannel(stream))) { - return MessageSerializer.deserializeSchema(channel); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - } - - /** Converts a flatbuffer schema to its POJO representation. */ - public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) { - List fields = new ArrayList<>(); - for (int i = 0; i < schema.fieldsLength(); i++) { - fields.add(convertField(schema.fields(i))); - } - Map metadata = new HashMap<>(); - for (int i = 0; i < schema.customMetadataLength(); i++) { - KeyValue kv = schema.customMetadata(i); - String key = kv.key(); - String value = kv.value(); - metadata.put(key == null ? "" : key, value == null ? "" : value); - } - return new Schema( - true, Collections.unmodifiableList(fields), Collections.unmodifiableMap(metadata)); - } - - private final List fields; - private final Map metadata; - - public Schema(Iterable fields) { - this(fields, (Map) null); - } - - /** Constructor with metadata. */ - public Schema(Iterable fields, Map metadata) { - this( - true, - Collections2.toImmutableList(fields), - metadata == null ? Collections.emptyMap() : Collections2.immutableMapCopy(metadata)); - } - - /** Constructor used for JSON deserialization. */ - @JsonCreator - private Schema( - @JsonProperty("fields") Iterable fields, - @JsonProperty("metadata") List> metadata) { - this(fields, convertMetadata(metadata)); - } - - /** - * Private constructor to bypass automatic collection copy. - * - * @param ignored an ignored argument. Its only purpose is to prevent using the constructor by - * accident because of type collisions (List vs Iterable). - */ - private Schema(boolean ignored, List fields, Map metadata) { - this.fields = fields; - this.metadata = metadata; - } - - static Map convertMetadata(List> metadata) { - return (metadata == null) - ? null - : metadata.stream() - .map( - e -> - new AbstractMap.SimpleImmutableEntry<>( - e.get(METADATA_KEY), e.get(METADATA_VALUE))) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - } - - static List> convertMetadata(Map metadata) { - return (metadata == null) - ? null - : metadata.entrySet().stream() - .map(Schema::convertEntryToKeyValueMap) - .collect(Collectors.toList()); - } - - private static Map convertEntryToKeyValueMap(Map.Entry entry) { - Map map = new HashMap<>(2); - map.put(METADATA_KEY, entry.getKey()); - map.put(METADATA_VALUE, entry.getValue()); - return Collections.unmodifiableMap(map); - } - - public List getFields() { - return fields; - } - - @JsonIgnore - public Map getCustomMetadata() { - return metadata; - } - - @JsonProperty("metadata") - @JsonInclude(Include.NON_EMPTY) - List> getCustomMetadataForJson() { - return convertMetadata(getCustomMetadata()); - } - - /** - * Search for a field by name in this Schema. - * - * @param name the name of the field to return - * @return the corresponding field - * @throws IllegalArgumentException if the field was not found - */ - public Field findField(String name) { - return findField(getFields(), name); - } - - /** Returns the JSON string representation of this schema. */ - public String toJson() { - try { - return writer.writeValueAsString(this); - } catch (JsonProcessingException e) { - // this should not happen - throw new RuntimeException(e); - } - } - - /** Adds this schema to the builder returning the size of the builder after adding. */ - public int getSchema(FlatBufferBuilder builder) { - int[] fieldOffsets = new int[fields.size()]; - for (int i = 0; i < fields.size(); i++) { - fieldOffsets[i] = fields.get(i).getField(builder); - } - int fieldsOffset = org.apache.arrow.flatbuf.Schema.createFieldsVector(builder, fieldOffsets); - int metadataOffset = FBSerializables.writeKeyValues(builder, metadata); - org.apache.arrow.flatbuf.Schema.startSchema(builder); - org.apache.arrow.flatbuf.Schema.addEndianness( - builder, (LITTLE_ENDIAN ? Endianness.Little : Endianness.Big)); - org.apache.arrow.flatbuf.Schema.addFields(builder, fieldsOffset); - org.apache.arrow.flatbuf.Schema.addCustomMetadata(builder, metadataOffset); - return org.apache.arrow.flatbuf.Schema.endSchema(builder); - } - - /** - * Returns the serialized flatbuffer bytes of the schema wrapped in a message table. Use {@link - * #deserializeMessage(ByteBuffer)} to rebuild the Schema. - */ - public byte[] serializeAsMessage() { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - try (WriteChannel channel = new WriteChannel(Channels.newChannel(out))) { - MessageSerializer.serialize(channel, this); - return out.toByteArray(); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Returns the serialized flatbuffer representation of this schema. - * - * @deprecated This method does not encapsulate the schema in a Message payload which is - * incompatible with other languages. Use {@link #serializeAsMessage()} instead. - */ - @Deprecated - public byte[] toByteArray() { - FlatBufferBuilder builder = new FlatBufferBuilder(); - int schemaOffset = this.getSchema(builder); - builder.finish(schemaOffset); - ByteBuffer bb = builder.dataBuffer(); - byte[] bytes = new byte[bb.remaining()]; - bb.get(bytes); - return bytes; - } - - @Override - public int hashCode() { - return Objects.hash(fields, metadata); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof Schema)) { - return false; - } - return Objects.equals(this.fields, ((Schema) obj).fields) - && Objects.equals(this.metadata, ((Schema) obj).metadata); - } - - @Override - public String toString() { - String meta = metadata.isEmpty() ? "" : "(metadata: " + metadata.toString() + ")"; - return "Schema<" - + fields.stream().map(t -> t.toString()).collect(Collectors.joining(", ")) - + ">" - + meta; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java deleted file mode 100644 index eaba29c3281e3..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.SeekableByteChannel; - -/** A {@link SeekableByteChannel} backed by a byte array. */ -public class ByteArrayReadableSeekableByteChannel implements SeekableByteChannel { - private byte[] byteArray; - private int position = 0; - - /** Construct a new object using the given byteArray as a backing store. */ - public ByteArrayReadableSeekableByteChannel(byte[] byteArray) { - if (byteArray == null) { - throw new NullPointerException(); - } - this.byteArray = byteArray; - } - - @Override - public boolean isOpen() { - return byteArray != null; - } - - @Override - public void close() throws IOException { - byteArray = null; - } - - @Override - public int read(final ByteBuffer dst) throws IOException { - int remainingInBuf = byteArray.length - this.position; - int length = Math.min(dst.remaining(), remainingInBuf); - dst.put(this.byteArray, this.position, length); - this.position += length; - return length; - } - - @Override - public long position() throws IOException { - return this.position; - } - - @Override - public SeekableByteChannel position(final long newPosition) throws IOException { - this.position = (int) newPosition; - return this; - } - - @Override - public long size() throws IOException { - return this.byteArray.length; - } - - @Override - public int write(final ByteBuffer src) throws IOException { - throw new UnsupportedOperationException("Read only"); - } - - @Override - public SeekableByteChannel truncate(final long size) throws IOException { - throw new UnsupportedOperationException("Read only"); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java b/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java deleted file mode 100644 index 635afb92e30ce..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -/** Generic callback interface to be notified of events on value vectors. */ -public interface CallBack { - void doWork(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java deleted file mode 100644 index d62d67890e334..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -/** Utilities for rounding data size. */ -public final class DataSizeRoundingUtil { - - /** The mask for rounding an integer to a multiple of 8. (i.e. clear the lowest 3 bits) */ - public static int ROUND_8_MASK_INT = 0xFFFFFFF8; - - /** The mask for rounding a long integer to a multiple of 8. (i.e. clear the lowest 3 bits) */ - public static long ROUND_8_MASK_LONG = 0xFFFFFFFFFFFFFFF8L; - - /** The number of bits to shift for dividing by 8. */ - public static int DIVIDE_BY_8_SHIFT_BITS = 3; - - /** - * Round up the number to the nearest multiple of 8. - * - * @param input the number to round. - * @return the rounded number. - */ - public static int roundUpTo8Multiple(int input) { - return (input + 7) & ROUND_8_MASK_INT; - } - - /** - * Round up the number to the nearest multiple of 8. - * - * @param input the number to round. - * @return the rounded number - */ - public static long roundUpTo8Multiple(long input) { - return (input + 7L) & ROUND_8_MASK_LONG; - } - - /** - * Round down the number to the nearest multiple of 8. - * - * @param input the number to round. - * @return the rounded number. - */ - public static int roundDownTo8Multiple(int input) { - return input & ROUND_8_MASK_INT; - } - - /** - * Round down the number to the nearest multiple of 8. - * - * @param input the number to round. - * @return the rounded number - */ - public static long roundDownTo8Multiple(long input) { - return input & ROUND_8_MASK_LONG; - } - - /** - * A fast way to compute Math.ceil(input / 8.0). - * - * @param input the input number. - * @return the computed number. - */ - public static int divideBy8Ceil(int input) { - return (input + 7) >>> DIVIDE_BY_8_SHIFT_BITS; - } - - /** - * A fast way to compute Math.ceil(input / 8.0). - * - * @param input the input number. - * @return the computed number. - */ - public static long divideBy8Ceil(long input) { - return (input + 7) >>> (long) DIVIDE_BY_8_SHIFT_BITS; - } - - public static long roundUpToMultipleOf16(long num) { - return (num + 15) & 0xFFFFFFFFFFFFFFF0L; - } - - private DataSizeRoundingUtil() {} -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java deleted file mode 100644 index 6f9ccd1cfa1cf..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.time.Instant; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeFormatterBuilder; -import java.time.temporal.ChronoUnit; -import java.util.TimeZone; - -/** Utility class for Date, DateTime, TimeStamp, Interval data types. */ -public class DateUtility { - private DateUtility() {} - - private static final String UTC = "UTC"; - - public static final DateTimeFormatter formatDate = DateTimeFormatter.ofPattern("yyyy-MM-dd"); - public static final DateTimeFormatter formatTimeStampMilli = - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"); - public static final DateTimeFormatter formatTimeStampTZ = - DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS ZZZ"); - public static final DateTimeFormatter formatTime = DateTimeFormatter.ofPattern("HH:mm:ss.SSS"); - - public static DateTimeFormatter dateTimeTZFormat = null; - public static DateTimeFormatter timeFormat = null; - - public static final int yearsToMonths = 12; - public static final int hoursToMillis = 60 * 60 * 1000; - public static final int minutesToMillis = 60 * 1000; - public static final int secondsToMillis = 1000; - public static final int monthToStandardDays = 30; - public static final long monthsToMillis = 2592000000L; // 30 * 24 * 60 * 60 * 1000 - public static final int daysToStandardMillis = 24 * 60 * 60 * 1000; - - /** Returns the date time formatter used to parse date strings. */ - public static DateTimeFormatter getDateTimeFormatter() { - - if (dateTimeTZFormat == null) { - DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); - DateTimeFormatter optionalTime = DateTimeFormatter.ofPattern(" HH:mm:ss"); - DateTimeFormatter optionalSec = DateTimeFormatter.ofPattern(".SSS"); - DateTimeFormatter optionalZone = DateTimeFormatter.ofPattern(" ZZZ"); - - dateTimeTZFormat = - new DateTimeFormatterBuilder() - .append(dateFormatter) - .appendOptional(optionalTime) - .appendOptional(optionalSec) - .appendOptional(optionalZone) - .toFormatter(); - } - - return dateTimeTZFormat; - } - - /** Returns time formatter used to parse time strings. */ - public static DateTimeFormatter getTimeFormatter() { - if (timeFormat == null) { - DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss"); - DateTimeFormatter optionalSec = DateTimeFormatter.ofPattern(".SSS"); - timeFormat = - new DateTimeFormatterBuilder() - .append(timeFormatter) - .appendOptional(optionalSec) - .toFormatter(); - } - return timeFormat; - } - - /** - * Convert milliseconds from epoch to a LocalDateTime with timeZone offset. - * - * @param epochMillis milliseconds from epoch - * @param timeZone current timeZone - * @return LocalDateTime object with timeZone offset - */ - public static LocalDateTime getLocalDateTimeFromEpochMilli(long epochMillis, String timeZone) { - final LocalDateTime localDateTime = - LocalDateTime.ofInstant( - Instant.ofEpochMilli(epochMillis), TimeZone.getTimeZone(timeZone).toZoneId()); - return localDateTime; - } - - /** Convert milliseconds from epoch to a LocalDateTime with UTC offset. */ - public static LocalDateTime getLocalDateTimeFromEpochMilli(long epochMillis) { - return getLocalDateTimeFromEpochMilli(epochMillis, UTC); - } - - /** - * Convert microseconds from epoch to a LocalDateTime with timeZone offset. - * - * @param epochMicros microseconds from epoch - * @param timeZone current timeZone - * @return LocalDateTime object with timeZone offset - */ - public static LocalDateTime getLocalDateTimeFromEpochMicro(long epochMicros, String timeZone) { - final long millis = java.util.concurrent.TimeUnit.MICROSECONDS.toMillis(epochMicros); - final long addl_micros = epochMicros - (millis * 1000); - return DateUtility.getLocalDateTimeFromEpochMilli(millis, timeZone) - .plus(addl_micros, ChronoUnit.MICROS); - } - - /** Convert microseconds from epoch to a LocalDateTime with UTC offset. */ - public static LocalDateTime getLocalDateTimeFromEpochMicro(long epochMicros) { - return getLocalDateTimeFromEpochMicro(epochMicros, UTC); - } - - /** - * Convert nanoseconds from epoch to a LocalDateTime with timeZone offset. - * - * @param epochNanos nanoseconds from epoch - * @param timeZone current timeZone - * @return LocalDateTime object with timeZone offset - */ - public static LocalDateTime getLocalDateTimeFromEpochNano(long epochNanos, String timeZone) { - final long millis = java.util.concurrent.TimeUnit.NANOSECONDS.toMillis(epochNanos); - final long addl_nanos = epochNanos - (millis * 1000 * 1000); - return DateUtility.getLocalDateTimeFromEpochMilli(millis, timeZone).plusNanos(addl_nanos); - } - - /** Convert nanoseconds from epoch to a LocalDateTime with UTC offset. */ - public static LocalDateTime getLocalDateTimeFromEpochNano(long epochNanos) { - return getLocalDateTimeFromEpochNano(epochNanos, UTC); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java deleted file mode 100644 index 31b79fe53a4a5..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.util.MemoryUtil; - -/** Utility methods for configurable precision Decimal values (e.g. {@link BigDecimal}). */ -public class DecimalUtility { - private DecimalUtility() {} - - public static final byte[] zeroes = - new byte[] { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - public static final byte[] minus_one = - new byte[] { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 - }; - private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; - - /** - * Read an ArrowType.Decimal at the given value index in the ArrowBuf and convert to a BigDecimal - * with the given scale. - */ - public static BigDecimal getBigDecimalFromArrowBuf( - ArrowBuf bytebuf, int index, int scale, int byteWidth) { - byte[] value = new byte[byteWidth]; - byte temp; - final long startIndex = (long) index * byteWidth; - - bytebuf.getBytes(startIndex, value, 0, byteWidth); - if (LITTLE_ENDIAN) { - // Decimal stored as native endian, need to swap bytes to make BigDecimal if native endian is - // LE - int stop = byteWidth / 2; - for (int i = 0, j; i < stop; i++) { - temp = value[i]; - j = (byteWidth - 1) - i; - value[i] = value[j]; - value[j] = temp; - } - } - BigInteger unscaledValue = new BigInteger(value); - return new BigDecimal(unscaledValue, scale); - } - - /** - * Read an ArrowType.Decimal from the ByteBuffer and convert to a BigDecimal with the given scale. - */ - public static BigDecimal getBigDecimalFromByteBuffer( - ByteBuffer bytebuf, int scale, int byteWidth) { - byte[] value = new byte[byteWidth]; - bytebuf.get(value); - BigInteger unscaledValue = new BigInteger(value); - return new BigDecimal(unscaledValue, scale); - } - - /** - * Read an ArrowType.Decimal from the ArrowBuf at the given value index and return it as a byte - * array. - */ - public static byte[] getByteArrayFromArrowBuf(ArrowBuf bytebuf, int index, int byteWidth) { - final byte[] value = new byte[byteWidth]; - final long startIndex = (long) index * byteWidth; - bytebuf.getBytes(startIndex, value, 0, byteWidth); - return value; - } - - /** - * Check that the BigDecimal scale equals the vectorScale and that the BigDecimal precision is - * less than or equal to the vectorPrecision. If not, then an UnsupportedOperationException is - * thrown, otherwise returns true. - */ - public static boolean checkPrecisionAndScale( - BigDecimal value, int vectorPrecision, int vectorScale) { - if (value.scale() != vectorScale) { - throw new UnsupportedOperationException( - "BigDecimal scale must equal that in the Arrow vector: " - + value.scale() - + " != " - + vectorScale); - } - if (value.precision() > vectorPrecision) { - throw new UnsupportedOperationException( - "BigDecimal precision cannot be greater than that in the Arrow " - + "vector: " - + value.precision() - + " > " - + vectorPrecision); - } - return true; - } - - /** - * Check that the BigDecimal scale equals the vectorScale and that the BigDecimal precision is - * less than or equal to the vectorPrecision. Return true if so, otherwise return false. - */ - public static boolean checkPrecisionAndScaleNoThrow( - BigDecimal value, int vectorPrecision, int vectorScale) { - return value.scale() == vectorScale && value.precision() < vectorPrecision; - } - - /** - * Check that the decimal scale equals the vectorScale and that the decimal precision is less than - * or equal to the vectorPrecision. If not, then an UnsupportedOperationException is thrown, - * otherwise returns true. - */ - public static boolean checkPrecisionAndScale( - int decimalPrecision, int decimalScale, int vectorPrecision, int vectorScale) { - if (decimalScale != vectorScale) { - throw new UnsupportedOperationException( - "BigDecimal scale must equal that in the Arrow vector: " - + decimalScale - + " != " - + vectorScale); - } - if (decimalPrecision > vectorPrecision) { - throw new UnsupportedOperationException( - "BigDecimal precision cannot be greater than that in the Arrow " - + "vector: " - + decimalPrecision - + " > " - + vectorPrecision); - } - return true; - } - - /** - * Write the given BigDecimal to the ArrowBuf at the given value index. Will throw an - * UnsupportedOperationException if the decimal size is greater than the Decimal vector byte - * width. - */ - public static void writeBigDecimalToArrowBuf( - BigDecimal value, ArrowBuf bytebuf, int index, int byteWidth) { - final byte[] bytes = value.unscaledValue().toByteArray(); - writeByteArrayToArrowBufHelper(bytes, bytebuf, index, byteWidth); - } - - /** - * Write the given long to the ArrowBuf at the given value index. This routine extends the - * original sign bit to a new upper area in 128-bit or 256-bit. - */ - public static void writeLongToArrowBuf(long value, ArrowBuf bytebuf, int index, int byteWidth) { - if (byteWidth != 16 && byteWidth != 32) { - throw new UnsupportedOperationException( - "DecimalUtility.writeLongToArrowBuf() currently supports " - + "128-bit or 256-bit width data"); - } - final long addressOfValue = bytebuf.memoryAddress() + (long) index * byteWidth; - final long padValue = Long.signum(value) == -1 ? -1L : 0L; - if (LITTLE_ENDIAN) { - MemoryUtil.putLong(addressOfValue, value); - for (int i = 1; i <= (byteWidth - 8) / 8; i++) { - MemoryUtil.putLong(addressOfValue + Long.BYTES * i, padValue); - } - } else { - for (int i = 0; i < (byteWidth - 8) / 8; i++) { - MemoryUtil.putLong(addressOfValue + Long.BYTES * i, padValue); - } - MemoryUtil.putLong(addressOfValue + Long.BYTES * (byteWidth - 8) / 8, value); - } - } - - /** - * Write the given byte array to the ArrowBuf at the given value index. Will throw an - * UnsupportedOperationException if the decimal size is greater than the Decimal vector byte - * width. - */ - public static void writeByteArrayToArrowBuf( - byte[] bytes, ArrowBuf bytebuf, int index, int byteWidth) { - writeByteArrayToArrowBufHelper(bytes, bytebuf, index, byteWidth); - } - - private static void writeByteArrayToArrowBufHelper( - byte[] bytes, ArrowBuf bytebuf, int index, int byteWidth) { - final long startIndex = (long) index * byteWidth; - if (bytes.length > byteWidth) { - throw new UnsupportedOperationException( - "Decimal size greater than " + byteWidth + " bytes: " + bytes.length); - } - - byte[] padBytes = bytes[0] < 0 ? minus_one : zeroes; - if (LITTLE_ENDIAN) { - // Decimal stored as native-endian, need to swap data bytes before writing to ArrowBuf if LE - byte[] bytesLE = new byte[bytes.length]; - for (int i = 0; i < bytes.length; i++) { - bytesLE[i] = bytes[bytes.length - 1 - i]; - } - - // Write LE data - bytebuf.setBytes(startIndex, bytesLE, 0, bytes.length); - bytebuf.setBytes(startIndex + bytes.length, padBytes, 0, byteWidth - bytes.length); - } else { - // Write BE data - bytebuf.setBytes(startIndex + byteWidth - bytes.length, bytes, 0, bytes.length); - bytebuf.setBytes(startIndex, padBytes, 0, byteWidth - bytes.length); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java deleted file mode 100644 index d4182af904f83..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; - -/** Utility methods for working with Dictionaries used in Dictionary encodings. */ -public class DictionaryUtility { - private DictionaryUtility() {} - - /** - * Convert field and child fields that have a dictionary encoding to message format, so fields - * have the dictionary type. - * - *

    NOTE: in the message format, fields have the dictionary type in the memory format, they have - * the index type - */ - public static Field toMessageFormat( - Field field, DictionaryProvider provider, Set dictionaryIdsUsed) { - if (!needConvertToMessageFormat(field)) { - return field; - } - DictionaryEncoding encoding = field.getDictionary(); - List children; - - ArrowType type; - if (encoding == null) { - type = field.getType(); - children = field.getChildren(); - } else { - long id = encoding.getId(); - Dictionary dictionary = provider.lookup(id); - if (dictionary == null) { - throw new IllegalArgumentException("Could not find dictionary with ID " + id); - } - type = dictionary.getVectorType(); - children = dictionary.getVector().getField().getChildren(); - - dictionaryIdsUsed.add(id); - } - - final List updatedChildren = new ArrayList<>(children.size()); - for (Field child : children) { - updatedChildren.add(toMessageFormat(child, provider, dictionaryIdsUsed)); - } - - return new Field( - field.getName(), - new FieldType(field.isNullable(), type, encoding, field.getMetadata()), - updatedChildren); - } - - /** - * Checks if it is required to convert the field to message format. - * - * @param field the field to check. - * @return true if a conversion is required, and false otherwise. - */ - public static boolean needConvertToMessageFormat(Field field) { - DictionaryEncoding encoding = field.getDictionary(); - - if (encoding != null) { - // when encoding is not null, the type must be determined from the - // dictionary, so conversion must be performed. - return true; - } - - List children = field.getChildren(); - for (Field child : children) { - if (needConvertToMessageFormat(child)) { - return true; - } - } - return false; - } - - /** - * Convert field and child fields that have a dictionary encoding to memory format, so fields have - * the index type. - */ - public static Field toMemoryFormat( - Field field, BufferAllocator allocator, Map dictionaries) { - DictionaryEncoding encoding = field.getDictionary(); - List children = field.getChildren(); - - if (encoding == null && children.isEmpty()) { - return field; - } - - List updatedChildren = new ArrayList<>(children.size()); - for (Field child : children) { - updatedChildren.add(toMemoryFormat(child, allocator, dictionaries)); - } - - ArrowType type; - List fieldChildren = null; - if (encoding == null) { - type = field.getType(); - fieldChildren = updatedChildren; - } else { - // re-type the field for in-memory format - type = encoding.getIndexType(); - if (type == null) { - type = new ArrowType.Int(32, true); - } - // get existing or create dictionary vector - if (!dictionaries.containsKey(encoding.getId())) { - // create a new dictionary vector for the values - String dictName = "DICT" + encoding.getId(); - Field dictionaryField = - new Field( - dictName, - new FieldType(field.isNullable(), field.getType(), null, null), - updatedChildren); - FieldVector dictionaryVector = dictionaryField.createVector(allocator); - dictionaries.put(encoding.getId(), new Dictionary(dictionaryVector, encoding)); - } - } - - return new Field( - field.getName(), - new FieldType(field.isNullable(), type, encoding, field.getMetadata()), - fieldChildren); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java deleted file mode 100644 index 6d8ab9cafb487..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.Iterator; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.memory.util.hash.SimpleHasher; -import org.apache.arrow.vector.ElementAddressableVector; - -/** - * Iterator for traversing elements of a {@link ElementAddressableVector}. - * - * @param vector type. - */ -public class ElementAddressableVectorIterator - implements Iterator { - - private final T vector; - - /** Index of the next element to access. */ - private int index = 0; - - private final ArrowBufPointer reusablePointer; - - /** - * Constructs an iterator for the {@link ElementAddressableVector}. - * - * @param vector the vector to iterate. - */ - public ElementAddressableVectorIterator(T vector) { - this(vector, SimpleHasher.INSTANCE); - } - - /** - * Constructs an iterator for the {@link ElementAddressableVector}. - * - * @param vector the vector to iterate. - * @param hasher the hasher to calculate the hash code. - */ - public ElementAddressableVectorIterator(T vector, ArrowBufHasher hasher) { - this.vector = vector; - reusablePointer = new ArrowBufPointer(hasher); - } - - @Override - public boolean hasNext() { - return index < vector.getValueCount(); - } - - /** - * Retrieves the next pointer from the vector. - * - * @return the pointer pointing to the next element in the vector. Note that the returned pointer - * is only valid before the next call to this method. - */ - @Override - public ArrowBufPointer next() { - vector.getDataPointer(index, reusablePointer); - index += 1; - return reusablePointer; - } - - /** - * Retrieves the next pointer from the vector. - * - * @param outPointer the pointer to populate. - */ - public void next(ArrowBufPointer outPointer) { - vector.getDataPointer(index, outPointer); - index += 1; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectHashMap.java b/java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectHashMap.java deleted file mode 100644 index b625f602caa9c..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectHashMap.java +++ /dev/null @@ -1,717 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.AbstractCollection; -import java.util.AbstractSet; -import java.util.Arrays; -import java.util.Collection; -import java.util.Iterator; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Set; - -/** - * A vendored specialized copy of Netty's IntObjectHashMap for use within Arrow. Avoids requiring - * Netty in the Arrow core just for this one class. - * - * @param The value type stored in the map. - */ -class IntObjectHashMap implements IntObjectMap { - - /** Default initial capacity. Used if not specified in the constructor */ - public static final int DEFAULT_CAPACITY = 8; - - /** Default load factor. Used if not specified in the constructor */ - public static final float DEFAULT_LOAD_FACTOR = 0.5f; - - /** - * Placeholder for null values, so we can use the actual null to mean available. (Better than - * using a placeholder for available: less references for GC processing.) - */ - private static final Object NULL_VALUE = new Object(); - - /** The maximum number of elements allowed without allocating more space. */ - private int maxSize; - - /** The load factor for the map. Used to calculate {@link #maxSize}. */ - private final float loadFactor; - - private int[] keys; - private V[] values; - private int size; - private int mask; - - private final Set keySet = new KeySet(); - private final Set> entrySet = new EntrySet(); - private final Iterable> entries = - new Iterable>() { - @Override - public Iterator> iterator() { - return new PrimitiveIterator(); - } - }; - - public IntObjectHashMap() { - this(DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR); - } - - public IntObjectHashMap(int initialCapacity) { - this(initialCapacity, DEFAULT_LOAD_FACTOR); - } - - public IntObjectHashMap(int initialCapacity, float loadFactor) { - if (loadFactor <= 0.0f || loadFactor > 1.0f) { - // Cannot exceed 1 because we can never store more than capacity elements; - // using a bigger loadFactor would trigger rehashing before the desired load is reached. - throw new IllegalArgumentException("loadFactor must be > 0 and <= 1"); - } - - this.loadFactor = loadFactor; - - // Adjust the initial capacity if necessary. - int capacity = safeFindNextPositivePowerOfTwo(initialCapacity); - mask = capacity - 1; - - // Allocate the arrays. - keys = new int[capacity]; - @SuppressWarnings({"unchecked", "SuspiciousArrayCast"}) - V[] temp = (V[]) new Object[capacity]; - values = temp; - - // Initialize the maximum size value. - maxSize = calcMaxSize(capacity); - } - - private static T toExternal(T value) { - assert value != null : "null is not a legitimate internal value. Concurrent Modification?"; - return value == NULL_VALUE ? null : value; - } - - @SuppressWarnings("unchecked") - private static T toInternal(T value) { - return value == null ? (T) NULL_VALUE : value; - } - - @Override - public V get(int key) { - int index = indexOf(key); - return index == -1 ? null : toExternal(values[index]); - } - - @Override - public V put(int key, V value) { - int startIndex = hashIndex(key); - int index = startIndex; - - for (; ; ) { - if (values[index] == null) { - // Found empty slot, use it. - keys[index] = key; - values[index] = toInternal(value); - growSize(); - return null; - } - if (keys[index] == key) { - // Found existing entry with this key, just replace the value. - V previousValue = values[index]; - values[index] = toInternal(value); - return toExternal(previousValue); - } - - // Conflict, keep probing ... - if ((index = probeNext(index)) == startIndex) { - // Can only happen if the map was full at MAX_ARRAY_SIZE and couldn't grow. - throw new IllegalStateException("Unable to insert"); - } - } - } - - @Override - public void putAll(Map sourceMap) { - if (sourceMap instanceof IntObjectHashMap) { - // Optimization - iterate through the arrays. - @SuppressWarnings("unchecked") - IntObjectHashMap source = (IntObjectHashMap) sourceMap; - for (int i = 0; i < source.values.length; ++i) { - V sourceValue = source.values[i]; - if (sourceValue != null) { - put(source.keys[i], sourceValue); - } - } - return; - } - - // Otherwise, just add each entry. - for (Entry entry : sourceMap.entrySet()) { - put(entry.getKey(), entry.getValue()); - } - } - - @Override - public V remove(int key) { - int index = indexOf(key); - if (index == -1) { - return null; - } - - V prev = values[index]; - removeAt(index); - return toExternal(prev); - } - - @Override - public int size() { - return size; - } - - @Override - public boolean isEmpty() { - return size == 0; - } - - @Override - public void clear() { - Arrays.fill(keys, (int) 0); - Arrays.fill(values, null); - size = 0; - } - - @Override - public boolean containsKey(int key) { - return indexOf(key) >= 0; - } - - @Override - public boolean containsValue(Object value) { - @SuppressWarnings("unchecked") - V v1 = toInternal((V) value); - for (V v2 : values) { - // The map supports null values; this will be matched as NULL_VALUE.equals(NULL_VALUE). - if (v2 != null && v2.equals(v1)) { - return true; - } - } - return false; - } - - @Override - public Iterable> entries() { - return entries; - } - - @Override - public Collection values() { - return new AbstractCollection() { - @Override - public Iterator iterator() { - return new Iterator() { - final PrimitiveIterator iter = new PrimitiveIterator(); - - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public V next() { - return iter.next().value(); - } - - @Override - public void remove() { - iter.remove(); - } - }; - } - - @Override - public int size() { - return size; - } - }; - } - - @Override - public int hashCode() { - // Hashcode is based on all non-zero, valid keys. We have to scan the whole keys - // array, which may have different lengths for two maps of same size(), so the - // capacity cannot be used as input for hashing but the size can. - int hash = size; - for (int key : keys) { - // 0 can be a valid key or unused slot, but won't impact the hashcode in either case. - // This way we can use a cheap loop without conditionals, or hard-to-unroll operations, - // or the devastatingly bad memory locality of visiting value objects. - // Also, it's important to use a hash function that does not depend on the ordering - // of terms, only their values; since the map is an unordered collection and - // entries can end up in different positions in different maps that have the same - // elements, but with different history of puts/removes, due to conflicts. - hash ^= hashCode(key); - } - return hash; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (!(obj instanceof IntObjectMap)) { - return false; - } - @SuppressWarnings("rawtypes") - IntObjectMap other = (IntObjectMap) obj; - if (size != other.size()) { - return false; - } - for (int i = 0; i < values.length; ++i) { - V value = values[i]; - if (value != null) { - int key = keys[i]; - Object otherValue = other.get(key); - if (value == NULL_VALUE) { - if (otherValue != null) { - return false; - } - } else if (!value.equals(otherValue)) { - return false; - } - } - } - return true; - } - - @Override - public boolean containsKey(Object key) { - return containsKey(objectToKey(key)); - } - - @Override - public V get(Object key) { - return get(objectToKey(key)); - } - - @Override - public V put(Integer key, V value) { - return put(objectToKey(key), value); - } - - @Override - public V remove(Object key) { - return remove(objectToKey(key)); - } - - @Override - public Set keySet() { - return keySet; - } - - @Override - public Set> entrySet() { - return entrySet; - } - - private int objectToKey(Object key) { - return (int) (Integer) key; - } - - /** - * Locates the index for the given key. This method probes using double hashing. - * - * @param key the key for an entry in the map. - * @return the index where the key was found, or {@code -1} if no entry is found for that key. - */ - private int indexOf(int key) { - int startIndex = hashIndex(key); - int index = startIndex; - - for (; ; ) { - if (values[index] == null) { - // It's available, so no chance that this value exists anywhere in the map. - return -1; - } - if (key == keys[index]) { - return index; - } - - // Conflict, keep probing ... - if ((index = probeNext(index)) == startIndex) { - return -1; - } - } - } - - /** Returns the hashed index for the given key. */ - private int hashIndex(int key) { - // The array lengths are always a power of two, so we can use a bitmask to stay inside the array - // bounds. - return hashCode(key) & mask; - } - - /** Returns the hash code for the key. */ - private static int hashCode(int key) { - return key; - } - - /** Get the next sequential index after {@code index} and wraps if necessary. */ - private int probeNext(int index) { - // The array lengths are always a power of two, so we can use a bitmask to stay inside the array - // bounds. - return (index + 1) & mask; - } - - /** Grows the map size after an insertion. If necessary, performs a rehash of the map. */ - private void growSize() { - size++; - - if (size > maxSize) { - if (keys.length == Integer.MAX_VALUE) { - throw new IllegalStateException("Max capacity reached at size=" + size); - } - - // Double the capacity. - rehash(keys.length << 1); - } - } - - /** - * Removes entry at the given index position. Also performs opportunistic, incremental rehashing - * if necessary to not break conflict chains. - * - * @param index the index position of the element to remove. - * @return {@code true} if the next item was moved back. {@code false} otherwise. - */ - private boolean removeAt(final int index) { - --size; - // Clearing the key is not strictly necessary (for GC like in a regular collection), - // but recommended for security. The memory location is still fresh in the cache anyway. - keys[index] = 0; - values[index] = null; - - // In the interval from index to the next available entry, the arrays may have entries - // that are displaced from their base position due to prior conflicts. Iterate these - // entries and move them back if possible, optimizing future lookups. - // Knuth Section 6.4 Algorithm R, also used by the JDK's IdentityHashMap. - - int nextFree = index; - int i = probeNext(index); - for (V value = values[i]; value != null; value = values[i = probeNext(i)]) { - int key = keys[i]; - int bucket = hashIndex(key); - if (i < bucket && (bucket <= nextFree || nextFree <= i) - || bucket <= nextFree && nextFree <= i) { - // Move the displaced entry "back" to the first available position. - keys[nextFree] = key; - values[nextFree] = value; - // Put the first entry after the displaced entry - keys[i] = 0; - values[i] = null; - nextFree = i; - } - } - return nextFree != index; - } - - /** Calculates the maximum size allowed before rehashing. */ - private int calcMaxSize(int capacity) { - // Clip the upper bound so that there will always be at least one available slot. - int upperBound = capacity - 1; - return Math.min(upperBound, (int) (capacity * loadFactor)); - } - - /** - * Rehashes the map for the given capacity. - * - * @param newCapacity the new capacity for the map. - */ - private void rehash(int newCapacity) { - int[] oldKeys = keys; - V[] oldVals = values; - - keys = new int[newCapacity]; - @SuppressWarnings({"unchecked", "SuspiciousArrayCast"}) - V[] temp = (V[]) new Object[newCapacity]; - values = temp; - - maxSize = calcMaxSize(newCapacity); - mask = newCapacity - 1; - - // Insert to the new arrays. - for (int i = 0; i < oldVals.length; ++i) { - V oldVal = oldVals[i]; - if (oldVal != null) { - // Inlined put(), but much simpler: we don't need to worry about - // duplicated keys, growing/rehashing, or failing to insert. - int oldKey = oldKeys[i]; - int index = hashIndex(oldKey); - - for (; ; ) { - if (values[index] == null) { - keys[index] = oldKey; - values[index] = oldVal; - break; - } - - // Conflict, keep probing. Can wrap around, but never reaches startIndex again. - index = probeNext(index); - } - } - } - } - - @Override - public String toString() { - if (isEmpty()) { - return "{}"; - } - StringBuilder sb = new StringBuilder(4 * size); - sb.append('{'); - boolean first = true; - for (int i = 0; i < values.length; ++i) { - V value = values[i]; - if (value != null) { - if (!first) { - sb.append(", "); - } - sb.append(keyToString(keys[i])) - .append('=') - .append(value == this ? "(this Map)" : toExternal(value)); - first = false; - } - } - return sb.append('}').toString(); - } - - /** - * Helper method called by {@link #toString()} in order to convert a single map key into a string. - * This is protected to allow subclasses to override the appearance of a given key. - */ - protected String keyToString(int key) { - return Integer.toString(key); - } - - /** Set implementation for iterating over the entries of the map. */ - private final class EntrySet extends AbstractSet> { - @Override - public Iterator> iterator() { - return new MapIterator(); - } - - @Override - public int size() { - return IntObjectHashMap.this.size(); - } - } - - /** Set implementation for iterating over the keys. */ - private final class KeySet extends AbstractSet { - @Override - public int size() { - return IntObjectHashMap.this.size(); - } - - @Override - public boolean contains(Object o) { - return IntObjectHashMap.this.containsKey(o); - } - - @Override - public boolean remove(Object o) { - return IntObjectHashMap.this.remove(o) != null; - } - - @Override - public boolean retainAll(Collection retainedKeys) { - boolean changed = false; - for (Iterator> iter = entries().iterator(); iter.hasNext(); ) { - PrimitiveEntry entry = iter.next(); - if (!retainedKeys.contains(entry.key())) { - changed = true; - iter.remove(); - } - } - return changed; - } - - @Override - public void clear() { - IntObjectHashMap.this.clear(); - } - - @Override - public Iterator iterator() { - return new Iterator() { - private final Iterator> iter = entrySet.iterator(); - - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public Integer next() { - return iter.next().getKey(); - } - - @Override - public void remove() { - iter.remove(); - } - }; - } - } - - /** - * Iterator over primitive entries. Entry key/values are overwritten by each call to {@link - * #next()}. - */ - private final class PrimitiveIterator implements Iterator>, PrimitiveEntry { - private int prevIndex = -1; - private int nextIndex = -1; - private int entryIndex = -1; - - private void scanNext() { - while (++nextIndex != values.length && values[nextIndex] == null) {} - } - - @Override - public boolean hasNext() { - if (nextIndex == -1) { - scanNext(); - } - return nextIndex != values.length; - } - - @Override - public PrimitiveEntry next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - - prevIndex = nextIndex; - scanNext(); - - // Always return the same Entry object, just change its index each time. - entryIndex = prevIndex; - return this; - } - - @Override - public void remove() { - if (prevIndex == -1) { - throw new IllegalStateException("next must be called before each remove."); - } - if (removeAt(prevIndex)) { - // removeAt may move elements "back" in the array if they have been displaced because their - // spot in the - // array was occupied when they were inserted. If this occurs then the nextIndex is now - // invalid and - // should instead point to the prevIndex which now holds an element which was "moved back". - nextIndex = prevIndex; - } - prevIndex = -1; - } - - // Entry implementation. Since this implementation uses a single Entry, we coalesce that - // into the Iterator object (potentially making loop optimization much easier). - - @Override - public int key() { - return keys[entryIndex]; - } - - @Override - public V value() { - return toExternal(values[entryIndex]); - } - - @Override - public void setValue(V value) { - values[entryIndex] = toInternal(value); - } - } - - /** Iterator used by the {@link Map} interface. */ - private final class MapIterator implements Iterator> { - private final PrimitiveIterator iter = new PrimitiveIterator(); - - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public Entry next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - - iter.next(); - - return new MapEntry(iter.entryIndex); - } - - @Override - public void remove() { - iter.remove(); - } - } - - /** A single entry in the map. */ - final class MapEntry implements Entry { - private final int entryIndex; - - MapEntry(int entryIndex) { - this.entryIndex = entryIndex; - } - - @Override - public Integer getKey() { - verifyExists(); - return keys[entryIndex]; - } - - @Override - public V getValue() { - verifyExists(); - return toExternal(values[entryIndex]); - } - - @Override - public V setValue(V value) { - verifyExists(); - V prevValue = toExternal(values[entryIndex]); - values[entryIndex] = toInternal(value); - return prevValue; - } - - private void verifyExists() { - if (values[entryIndex] == null) { - throw new IllegalStateException("The map entry has been removed"); - } - } - } - - static int safeFindNextPositivePowerOfTwo(final int value) { - return value <= 0 ? 1 : value >= 0x40000000 ? 0x40000000 : findNextPositivePowerOfTwo(value); - } - - static int findNextPositivePowerOfTwo(final int value) { - assert value > Integer.MIN_VALUE && value < 0x40000000; - return 1 << (32 - Integer.numberOfLeadingZeros(value - 1)); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectMap.java b/java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectMap.java deleted file mode 100644 index 0de31b56dfa63..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectMap.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.Iterator; -import java.util.Map; - -/** - * A vendored specialized copy of Netty's IntObjectMap for use within Arrow. Avoids requiring Netty - * in the Arrow core just for this one class. - * - * @param the value type stored in the map. - */ -interface IntObjectMap extends Map { - - /** - * A primitive entry in the map, provided by the iterator from {@link #entries()}. - * - * @param the value type stored in the map. - */ - interface PrimitiveEntry { - /** Gets the key for this entry. */ - int key(); - - /** Gets the value for this entry. */ - V value(); - - /** Sets the value for this entry. */ - void setValue(V value); - } - - /** - * Gets the value in the map with the specified key. - * - * @param key the key whose associated value is to be returned. - * @return the value or {@code null} if the key was not found in the map. - */ - V get(int key); - - /** - * Puts the given entry into the map. - * - * @param key the key of the entry. - * @param value the value of the entry. - * @return the previous value for this key or {@code null} if there was no previous mapping. - */ - V put(int key, V value); - - /** - * Removes the entry with the specified key. - * - * @param key the key for the entry to be removed from this map. - * @return the previous value for the key, or {@code null} if there was no mapping. - */ - V remove(int key); - - /** - * Gets an iterable to traverse over the primitive entries contained in this map. As an - * optimization, the {@link PrimitiveEntry}s returned by the {@link Iterator} may change as the - * {@link Iterator} progresses. The caller should not rely on {@link PrimitiveEntry} key/value - * stability. - */ - Iterable> entries(); - - /** Indicates whether or not this map contains a value for the specified key. */ - boolean containsKey(int key); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java deleted file mode 100644 index 119d1dfc2357a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.util.ArrayList; - -/** - * Extension of {@link ArrayList} that {@link #toString()} method returns the serialized JSON - * version of its members (or throws an exception if they can't be converted to JSON). - * - * @param Type of value held in the list. - */ -public class JsonStringArrayList extends ArrayList { - - private static final ObjectMapper MAPPER = ObjectMapperFactory.newObjectMapper(); - - public JsonStringArrayList() { - super(); - } - - public JsonStringArrayList(int size) { - super(size); - } - - @Override - public final String toString() { - try { - return MAPPER.writeValueAsString(this); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Cannot serialize array list to JSON string", e); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java b/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java deleted file mode 100644 index 109df55153565..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.util.LinkedHashMap; - -/** - * Simple class that extends the regular java.util.HashMap but overrides the toString() method of - * the HashMap class to produce a JSON string instead - * - * @param The type of the key for the map. - * @param The type of the value for the map. - */ -public class JsonStringHashMap extends LinkedHashMap { - - private static final ObjectMapper MAPPER = ObjectMapperFactory.newObjectMapper(); - - @Override - public final String toString() { - try { - return MAPPER.writeValueAsString(this); - } catch (JsonProcessingException e) { - throw new IllegalStateException("Cannot serialize hash map to JSON string", e); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java deleted file mode 100644 index 72977db091949..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.Collection; -import java.util.Map; -import java.util.Set; - -/** - * An implementation of a map that supports constant time look-up by a generic key or an ordinal. - * - *

    This class extends the functionality a regular {@link Map} with ordinal lookup support. Upon - * insertion an unused ordinal is assigned to the inserted (key, value) tuple. Upon update the same - * ordinal id is re-used while value is replaced. Upon deletion of an existing item, its - * corresponding ordinal is recycled and could be used by another item. - * - *

    For any instance with N items, this implementation guarantees that ordinals are in the range - * of [0, N). However, the ordinal assignment is dynamic and may change after an insertion or - * deletion. Consumers of this class are responsible for explicitly checking the ordinal - * corresponding to a key via {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to - * execute a lookup with an ordinal. - * - * @param key type - * @param value type - */ -public interface MapWithOrdinal { - V getByOrdinal(int id); - - int getOrdinal(K key); - - int size(); - - boolean isEmpty(); - - V get(K key); - - Collection getAll(K key); - - boolean put(K key, V value, boolean overwrite); - - Collection values(); - - boolean containsKey(K key); - - boolean remove(K key, V value); - - boolean removeAll(K key); - - void clear(); - - Set keys(); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java deleted file mode 100644 index 15b6bc5a63c54..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.Collection; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -/** - * An implementation of map that supports constant time look-up by a generic key or an ordinal. - * - *

    This class extends the functionality a regular {@link Map} with ordinal lookup support. Upon - * insertion an unused ordinal is assigned to the inserted (key, value) tuple. Upon update the same - * ordinal id is re-used while value is replaced. Upon deletion of an existing item, its - * corresponding ordinal is recycled and could be used by another item. - * - *

    For any instance with N items, this implementation guarantees that ordinals are in the range - * of [0, N). However, the ordinal assignment is dynamic and may change after an insertion or - * deletion. Consumers of this class are responsible for explicitly checking the ordinal - * corresponding to a key via {@link MapWithOrdinalImpl#getOrdinal(Object)} before attempting to - * execute a lookup with an ordinal. - * - * @param key type - * @param value type - */ -public class MapWithOrdinalImpl implements MapWithOrdinal { - - private final Map> primary = new LinkedHashMap<>(); - private final IntObjectHashMap secondary = new IntObjectHashMap<>(); - - private final Map delegate = - new Map() { - @Override - public boolean isEmpty() { - return size() == 0; - } - - @Override - public int size() { - return primary.size(); - } - - @Override - public boolean containsKey(Object key) { - return primary.containsKey(key); - } - - @Override - public boolean containsValue(Object value) { - return primary.containsValue(value); - } - - @Override - public V get(Object key) { - Entry pair = primary.get(key); - if (pair != null) { - return pair.getValue(); - } - return null; - } - - @Override - public V put(K key, V value) { - final Entry oldPair = primary.get(key); - // if key exists try replacing otherwise, assign a new ordinal identifier - final int ordinal = oldPair == null ? primary.size() : oldPair.getKey(); - primary.put(key, new AbstractMap.SimpleImmutableEntry<>(ordinal, value)); - secondary.put(ordinal, value); - return oldPair == null ? null : oldPair.getValue(); - } - - @Override - public V remove(Object key) { - final Entry oldPair = primary.remove(key); - if (oldPair != null) { - final int lastOrdinal = secondary.size(); - final V last = secondary.get(lastOrdinal); - // normalize mappings so that all numbers until primary.size() is assigned - // swap the last element with the deleted one - secondary.put(oldPair.getKey(), last); - primary.put((K) key, new AbstractMap.SimpleImmutableEntry<>(oldPair.getKey(), last)); - } - return oldPair == null ? null : oldPair.getValue(); - } - - @Override - public void putAll(Map m) { - throw new UnsupportedOperationException(); - } - - @Override - public void clear() { - primary.clear(); - secondary.clear(); - } - - @Override - public Set keySet() { - return primary.keySet(); - } - - @Override - public Collection values() { - return secondary.values(); - } - - @Override - public Set> entrySet() { - return primary.entrySet().stream() - .map( - entry -> - new AbstractMap.SimpleImmutableEntry<>( - entry.getKey(), entry.getValue().getValue())) - .collect(Collectors.toSet()); - } - }; - - /** - * Returns the value corresponding to the given ordinal. - * - * @param id ordinal value for lookup - * @return an instance of V - */ - @Override - public V getByOrdinal(int id) { - return secondary.get(id); - } - - /** - * Returns the ordinal corresponding to the given key. - * - * @param key key for ordinal lookup - * @return ordinal value corresponding to key if it exists or -1 - */ - @Override - public int getOrdinal(K key) { - Map.Entry pair = primary.get(key); - if (pair != null) { - return pair.getKey(); - } - return -1; - } - - @Override - public int size() { - return delegate.size(); - } - - @Override - public boolean isEmpty() { - return delegate.isEmpty(); - } - - @Override - public Collection getAll(K key) { - if (delegate.containsKey(key)) { - List list = new ArrayList<>(1); - list.add(get(key)); - return list; - } - return null; - } - - @Override - public V get(K key) { - return delegate.get(key); - } - - /** - * Inserts the tuple (key, value) into the map extending the semantics of {@link Map#put} with - * automatic ordinal assignment. A new ordinal is assigned if key does not exists. Otherwise the - * same ordinal is re-used but the value is replaced. - * - * @see java.util.Map#put - */ - @Override - public boolean put(K key, V value, boolean overwrite) { - return delegate.put(key, value) != null; - } - - @Override - public Collection values() { - return delegate.values(); - } - - @Override - public boolean remove(K key, V value) { - return false; - } - - @Override - public boolean containsKey(Object key) { - return delegate.containsKey(key); - } - - /** - * Removes the element corresponding to the key if exists extending the semantics of {@link - * java.util.Map#remove} with ordinal re-cycling. The ordinal corresponding to the given key may - * be re-assigned to another tuple. It is important that consumer checks the ordinal value via - * {@link MapWithOrdinalImpl#getOrdinal(Object)} before attempting to look-up by ordinal. - * - * @see java.util.Map#remove - */ - @Override - public boolean removeAll(K key) { - return delegate.remove(key) != null; - } - - @Override - public void clear() { - delegate.clear(); - } - - @Override - public Set keys() { - return delegate.keySet(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java deleted file mode 100644 index 7b9ad62bea68f..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.Collection; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; - -/** - * An implementation of a multimap that supports constant time look-up by a generic key or an - * ordinal. - * - *

    This class extends the functionality a regular {@link Map} with ordinal lookup support. Upon - * insertion an unused ordinal is assigned to the inserted (key, value) tuple. Upon update the same - * ordinal id is re-used while value is replaced. Upon deletion of an existing item, its - * corresponding ordinal is recycled and could be used by another item. - * - *

    For any instance with N items, this implementation guarantees that ordinals are in the range - * of [0, N). However, the ordinal assignment is dynamic and may change after an insertion or - * deletion. Consumers of this class are responsible for explicitly checking the ordinal - * corresponding to a key via {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to - * execute a lookup with an ordinal. - * - * @param key type - * @param value type - */ -public class MultiMapWithOrdinal implements MapWithOrdinal { - - private final Map> keyToOrdinal = new LinkedHashMap<>(); - private final IntObjectHashMap ordinalToValue = new IntObjectHashMap<>(); - - /** - * Returns the value corresponding to the given ordinal. - * - * @param id ordinal value for lookup - * @return an instance of V - */ - @Override - public V getByOrdinal(int id) { - return ordinalToValue.get(id); - } - - /** - * Returns the ordinal corresponding to the given key. - * - * @param key key for ordinal lookup - * @return ordinal value corresponding to key if it exists or -1 - */ - @Override - public int getOrdinal(K key) { - Set pair = getOrdinals(key); - if (!pair.isEmpty()) { - return pair.iterator().next(); - } - return -1; - } - - private Set getOrdinals(K key) { - return keyToOrdinal.getOrDefault(key, new HashSet<>()); - } - - @Override - public int size() { - return ordinalToValue.size(); - } - - @Override - public boolean isEmpty() { - return ordinalToValue.isEmpty(); - } - - /** get set of values for key. */ - @Override - public V get(K key) { - Set ordinals = keyToOrdinal.get(key); - if (ordinals == null) { - return null; - } - return ordinals.stream().map(ordinalToValue::get).collect(Collectors.toList()).get(0); - } - - /** get set of values for key. */ - @Override - public Collection getAll(K key) { - Set ordinals = keyToOrdinal.get(key); - if (ordinals == null) { - return null; - } - return ordinals.stream().map(ordinalToValue::get).collect(Collectors.toList()); - } - - /** - * Inserts the tuple (key, value) into the multimap with automatic ordinal assignment. - * - *

    A new ordinal is assigned if key/value pair does not exists. - * - *

    If overwrite is true the existing key will be overwritten with value else value will be - * appended to the multimap. - */ - @Override - public boolean put(K key, V value, boolean overwrite) { - if (overwrite) { - removeAll(key); - } - Set ordinalSet = getOrdinals(key); - int nextOrdinal = ordinalToValue.size(); - ordinalToValue.put(nextOrdinal, value); - boolean changed = ordinalSet.add(nextOrdinal); - keyToOrdinal.put(key, ordinalSet); - return changed; - } - - @Override - public Collection values() { - return ordinalToValue.values(); - } - - @Override - public boolean containsKey(K key) { - return keyToOrdinal.containsKey(key); - } - - /** - * Removes the element corresponding to the key/value if exists with ordinal re-cycling. - * - *

    The ordinal corresponding to the given key may be re-assigned to another tuple. It is - * important that consumer checks the ordinal value via {@link - * MultiMapWithOrdinal#getOrdinal(Object)} before attempting to look-up by ordinal. - * - *

    If the multimap is changed return true. - */ - @Override - public synchronized boolean remove(K key, V value) { - Set removalSet = getOrdinals(key); - if (removalSet.isEmpty()) { - return false; - } - Optional removeValue = - removalSet.stream().map(ordinalToValue::get).filter(value::equals).findFirst(); - if (!removeValue.isPresent()) { - return false; - } - int removalOrdinal = removeKv(removalSet, key, value); - int lastOrdinal = ordinalToValue.size(); - if (lastOrdinal != removalOrdinal) { // we didn't remove the last ordinal - swapOrdinal(lastOrdinal, removalOrdinal); - } - return true; - } - - private void swapOrdinal(int lastOrdinal, int removalOrdinal) { - V swapOrdinalValue = ordinalToValue.remove(lastOrdinal); - ordinalToValue.put(removalOrdinal, swapOrdinalValue); - K swapOrdinalKey = - keyToOrdinal.entrySet().stream() - .filter(kv -> kv.getValue().stream().anyMatch(o -> o == lastOrdinal)) - .map(Map.Entry::getKey) - .findFirst() - .orElseThrow(() -> new IllegalStateException("MultimapWithOrdinal in bad state")); - ordinalToValue.put(removalOrdinal, swapOrdinalValue); - Set swapSet = getOrdinals(swapOrdinalKey); - swapSet.remove(lastOrdinal); - swapSet.add(removalOrdinal); - keyToOrdinal.put(swapOrdinalKey, swapSet); - } - - private int removeKv(Set removalSet, K key, V value) { - Integer removalOrdinal = - removalSet.stream() - .filter(i -> ordinalToValue.get(i).equals(value)) - .findFirst() - .orElseThrow(() -> new IllegalStateException("MultimapWithOrdinal in bad state")); - ordinalToValue.remove(removalOrdinal); - removalSet.remove(removalOrdinal); - if (removalSet.isEmpty()) { - keyToOrdinal.remove(key); - } else { - keyToOrdinal.put(key, removalSet); - } - return removalOrdinal; - } - - /** remove all entries of key. */ - @Override - public synchronized boolean removeAll(K key) { - Collection values = this.getAll(key); - if (values == null) { - return false; - } - for (V v : values) { - this.remove(key, v); - } - return true; - } - - @Override - public void clear() { - ordinalToValue.clear(); - keyToOrdinal.clear(); - } - - @Override - public Set keys() { - return keyToOrdinal.keySet(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java deleted file mode 100644 index 5d004ef36b332..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.json.JsonMapper; -import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; - -/** A {@link ObjectMapper} factory to read/write JSON. */ -public final class ObjectMapperFactory { - - private ObjectMapperFactory() {} - - /** Creates a new {@link ObjectMapper} instance. */ - public static ObjectMapper newObjectMapper() { - return JsonMapper.builder().addModule(new JavaTimeModule()).build(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java b/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java deleted file mode 100644 index 11964595709d4..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -/** - * An exception that is used to signal that allocation request in bytes is greater than the maximum - * allowed by {@link org.apache.arrow.memory.BufferAllocator#buffer(int) allocator}. - * - *

    Operators should handle this exception to split the batch and later resume the execution on - * the next iteration. - */ -public class OversizedAllocationException extends RuntimeException { - public OversizedAllocationException() { - super(); - } - - public OversizedAllocationException( - String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } - - public OversizedAllocationException(String message, Throwable cause) { - super(message, cause); - } - - public OversizedAllocationException(String message) { - super(message); - } - - public OversizedAllocationException(Throwable cause) { - super(cause); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java deleted file mode 100644 index acc49ab34fde0..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.Collection; -import java.util.Set; -import org.apache.arrow.vector.complex.AbstractStructVector; - -/** - * Implementation of MapWithOrdinal that allows for promotion to multimap when duplicate fields - * exist. - * - * @param key type - * @param value type - */ -public class PromotableMultiMapWithOrdinal implements MapWithOrdinal { - private final MapWithOrdinalImpl mapWithOrdinal = new MapWithOrdinalImpl<>(); - private final MultiMapWithOrdinal multiMapWithOrdinal = new MultiMapWithOrdinal<>(); - private final boolean promotable; - private AbstractStructVector.ConflictPolicy conflictPolicy; - private MapWithOrdinal delegate; - - /** - * Create promotable map. - * - * @param promotable if promotion is allowed, otherwise delegate to MapWithOrdinal. - * @param conflictPolicy how to handle name conflicts. - */ - public PromotableMultiMapWithOrdinal( - boolean promotable, AbstractStructVector.ConflictPolicy conflictPolicy) { - this.promotable = promotable; - this.conflictPolicy = conflictPolicy; - delegate = mapWithOrdinal; - } - - private void promote() { - if (delegate == multiMapWithOrdinal - || !promotable - || conflictPolicy.equals(AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE)) { - return; - } - for (K key : mapWithOrdinal.keys()) { - V value = mapWithOrdinal.get(key); - multiMapWithOrdinal.put(key, value, false); - } - mapWithOrdinal.clear(); - delegate = multiMapWithOrdinal; - } - - @Override - public V getByOrdinal(int id) { - return delegate.getByOrdinal(id); - } - - @Override - public int getOrdinal(K key) { - return delegate.getOrdinal(key); - } - - @Override - public int size() { - return delegate.size(); - } - - @Override - public boolean isEmpty() { - return delegate.isEmpty(); - } - - @Override - public V get(K key) { - return delegate.get(key); - } - - @Override - public Collection getAll(K key) { - return delegate.getAll(key); - } - - @Override - public boolean put(K key, V value, boolean overwrite) { - if (delegate.containsKey(key)) { - promote(); - } - return delegate.put(key, value, overwrite); - } - - @Override - public Collection values() { - return delegate.values(); - } - - @Override - public boolean containsKey(K key) { - return delegate.containsKey(key); - } - - @Override - public boolean remove(K key, V value) { - return delegate.remove(key, value); - } - - @Override - public boolean removeAll(K key) { - return delegate.removeAll(key); - } - - @Override - public void clear() { - delegate.clear(); - } - - @Override - public Set keys() { - return delegate.keys(); - } - - public void setConflictPolicy(AbstractStructVector.ConflictPolicy conflictPolicy) { - this.conflictPolicy = conflictPolicy; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java deleted file mode 100644 index df7be1f0296a0..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.Arrays; -import java.util.Base64; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.ReusableBuffer; - -/** A wrapper around byte arrays for repeated writing. */ -public class ReusableByteArray implements ReusableBuffer { - - protected static final byte[] EMPTY_BYTES = new byte[0]; - - protected byte[] bytes; - protected int length; - - public ReusableByteArray() { - bytes = EMPTY_BYTES; - } - - public ReusableByteArray(byte[] data) { - bytes = Arrays.copyOfRange(data, 0, data.length); - length = data.length; - } - - /** - * Get the number of bytes in the byte array. - * - * @return the number of bytes in the byte array - */ - @Override - public long getLength() { - return length; - } - - @Override - public byte[] getBuffer() { - return bytes; - } - - @Override - public void set(ArrowBuf srcBytes, long start, long len) { - setCapacity((int) len, false); - srcBytes.getBytes(start, bytes, 0, (int) len); - length = (int) len; - } - - @Override - public void set(byte[] srcBytes, long start, long len) { - setCapacity((int) len, false); - System.arraycopy(srcBytes, (int) start, bytes, 0, (int) len); - length = (int) len; - } - - @Override - public boolean equals(Object o) { - if (o == this) { - return true; - } else if (o == null) { - return false; - } - if (!(o instanceof ReusableByteArray)) { - return false; - } - - final ReusableByteArray that = (ReusableByteArray) o; - if (this.getLength() != that.getLength()) { - return false; - } - - for (int i = 0; i < length; i++) { - if (bytes[i] != that.bytes[i]) { - return false; - } - } - - return true; - } - - @Override - public int hashCode() { - if (bytes == null) { - return 0; - } - - int result = 1; - for (int i = 0; i < length; i++) { - result = 31 * result + bytes[i]; - } - - return result; - } - - @Override - public String toString() { - return Base64.getEncoder().encodeToString(Arrays.copyOfRange(bytes, 0, length)); - } - - /** - * Sets the capacity of this object to at least len bytes. If the current - * buffer is longer, then the capacity and existing content of the buffer are unchanged. If - * len is larger than the current capacity, the Text object's capacity is increased to - * match. - * - * @param len the number of bytes we need - * @param keepData should the old data be kept - */ - protected void setCapacity(int len, boolean keepData) { - if (bytes == null || bytes.length < len) { - if (bytes != null && keepData) { - bytes = Arrays.copyOf(bytes, Math.max(len, length << 1)); - } else { - bytes = new byte[len]; - } - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java deleted file mode 100644 index 18a158b921c4a..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -/** Thrown when child vectors (e.g. in lists) don't match the expected type. */ -public class SchemaChangeRuntimeException extends RuntimeException { - public SchemaChangeRuntimeException() { - super(); - } - - public SchemaChangeRuntimeException( - String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } - - public SchemaChangeRuntimeException(String message, Throwable cause) { - super(message, cause); - } - - public SchemaChangeRuntimeException(String message) { - super(message); - } - - public SchemaChangeRuntimeException(Throwable cause) { - super(cause); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java deleted file mode 100644 index 83d2f328d34f1..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.ipc.ReadChannel; -import org.apache.arrow.vector.ipc.WriteChannel; -import org.apache.arrow.vector.ipc.message.MessageChannelReader; -import org.apache.arrow.vector.ipc.message.MessageResult; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Schema utility class including serialization and deserialization. */ -public class SchemaUtility { - private SchemaUtility() {} - - /** Deserialize Arrow schema from byte array. */ - public static Schema deserialize(byte[] bytes, BufferAllocator allocator) throws IOException { - try (MessageChannelReader schemaReader = - new MessageChannelReader( - new ReadChannel(new ByteArrayReadableSeekableByteChannel(bytes)), allocator)) { - - MessageResult result = schemaReader.readNext(); - if (result == null) { - throw new IOException("Unexpected end of input. Missing schema."); - } - return MessageSerializer.deserializeSchema(result.getMessage()); - } - } - - /** Serialize Arrow schema into byte array. */ - public static byte[] serialize(Schema schema) throws IOException { - final ByteArrayOutputStream out = new ByteArrayOutputStream(); - MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema); - return out.toByteArray(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java deleted file mode 100644 index 35d810abbb633..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java +++ /dev/null @@ -1,876 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import com.fasterxml.jackson.core.JsonGenerationException; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import com.fasterxml.jackson.databind.ser.std.StdSerializer; -import java.io.DataInput; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.CharacterCodingException; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CodingErrorAction; -import java.nio.charset.MalformedInputException; -import java.text.CharacterIterator; -import java.text.StringCharacterIterator; -import java.util.Optional; - -/** - * A simplified byte wrapper similar to Hadoop's Text class without all the dependencies. Lifted - * from Hadoop 2.7.1 - */ -@JsonSerialize(using = Text.TextSerializer.class) -public class Text extends ReusableByteArray { - - private static ThreadLocal ENCODER_FACTORY = - new ThreadLocal() { - @Override - protected CharsetEncoder initialValue() { - return Charset.forName("UTF-8") - .newEncoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT); - } - }; - - private static ThreadLocal DECODER_FACTORY = - new ThreadLocal() { - @Override - protected CharsetDecoder initialValue() { - return Charset.forName("UTF-8") - .newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT); - } - }; - - public Text() { - super(); - } - - /** - * Construct from a string. - * - * @param string initialize from that string - */ - public Text(String string) { - set(string); - } - - /** - * Construct from another text. - * - * @param utf8 initialize from that Text - */ - public Text(Text utf8) { - set(utf8); - } - - /** - * Construct from a byte array. - * - * @param utf8 initialize from that byte array - */ - public Text(byte[] utf8) { - set(utf8); - } - - /** - * Get a copy of the bytes that is exactly the length of the data. See {@link #getBytes()} for - * faster access to the underlying array. - * - * @return a copy of the underlying array - */ - public byte[] copyBytes() { - byte[] result = new byte[length]; - System.arraycopy(bytes, 0, result, 0, length); - return result; - } - - /** - * Returns the raw bytes; however, only data up to {@link #getLength()} is valid. Please use - * {@link #copyBytes()} if you need the returned array to be precisely the length of the data. - * - * @return the underlying array - */ - public byte[] getBytes() { - return bytes; - } - - /** - * Returns the Unicode Scalar Value (32-bit integer value) for the character at position - * . Note that this method avoids using the converter or doing String instantiation. - * - * @param position the index of the char we want to retrieve - * @return the Unicode scalar value at position or -1 if the position is invalid or points to a - * trailing byte - */ - public int charAt(int position) { - if (position > this.length) { - return -1; // too long - } - if (position < 0) { - return -1; // duh. - } - - ByteBuffer bb = (ByteBuffer) ByteBuffer.wrap(bytes).position(position); - return bytesToCodePoint(bb.slice()); - } - - public int find(String what) { - return find(what, 0); - } - - /** - * Finds any occurrence of what in the backing buffer, starting as position - * start. The starting position is measured in bytes and the return value is in terms of - * byte position in the buffer. The backing buffer is not converted to a string for this - * operation. - * - * @param what the string to search for - * @param start where to start from - * @return byte position of the first occurrence of the search string in the UTF-8 buffer or -1 if - * not found - */ - public int find(String what, int start) { - try { - ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length); - ByteBuffer tgt = encode(what); - byte b = tgt.get(); - src.position(start); - - while (src.hasRemaining()) { - if (b == src.get()) { // matching first byte - src.mark(); // save position in loop - tgt.mark(); // save position in target - boolean found = true; - int pos = src.position() - 1; - while (tgt.hasRemaining()) { - if (!src.hasRemaining()) { // src expired first - tgt.reset(); - src.reset(); - found = false; - break; - } - if (!(tgt.get() == src.get())) { - tgt.reset(); - src.reset(); - found = false; - break; // no match - } - } - if (found) { - return pos; - } - } - } - return -1; // not found - } catch (CharacterCodingException e) { - // can't get here - e.printStackTrace(); - return -1; - } - } - - /** - * Set to contain the contents of a string. - * - * @param string the string to initialize from - */ - public void set(String string) { - try { - ByteBuffer bb = encode(string, true); - bytes = bb.array(); - length = bb.limit(); - } catch (CharacterCodingException e) { - throw new RuntimeException("Should not have happened ", e); - } - } - - /** - * Set to an utf8 byte array. - * - * @param utf8 the byte array to initialize from - */ - public void set(byte[] utf8) { - set(utf8, 0, utf8.length); - } - - /** - * copy a text. - * - * @param other the text to initialize from - */ - public void set(Text other) { - set(other.getBytes(), 0, (int) other.getLength()); - } - - /** - * Set the Text to range of bytes. - * - * @param utf8 the data to copy from - * @param start the first position of the new string - * @param len the number of bytes of the new string - */ - public void set(byte[] utf8, int start, int len) { - super.set(utf8, start, len); - } - - /** - * Append a range of bytes to the end of the given text. - * - * @param utf8 the data to copy from - * @param start the first position to append from utf8 - * @param len the number of bytes to append - */ - public void append(byte[] utf8, int start, int len) { - setCapacity(length + len, true); - System.arraycopy(utf8, start, bytes, length, len); - length += len; - } - - /** - * Clear the string to empty. - * - *

    Note: For performance reasons, this call does not clear the underlying byte array - * that is retrievable via {@link #getBytes()}. In order to free the byte-array memory, call - * {@link #set(byte[])} with an empty byte array (For example, new byte[0]). - */ - public void clear() { - length = 0; - } - - @Override - public String toString() { - try { - return decode(bytes, 0, length); - } catch (CharacterCodingException e) { - throw new RuntimeException("Should not have happened ", e); - } - } - - /** - * Read a Text object whose length is already known. This allows creating Text from a stream which - * uses a different serialization format. - * - * @param in the input to initialize from - * @param len how many bytes to read from in - * @throws IOException if something bad happens - */ - public void readWithKnownLength(DataInput in, int len) throws IOException { - setCapacity(len, false); - in.readFully(bytes, 0, len); - length = len; - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof Text)) { - return false; - } - return super.equals(o); - } - - // / STATIC UTILITIES FROM HERE DOWN - - /** - * Converts the provided byte array to a String using the UTF-8 encoding. If the input is - * malformed, replace by a default value. - * - * @param utf8 bytes to decode - * @return the decoded string - * @throws CharacterCodingException if this is not valid UTF-8 - */ - public static String decode(byte[] utf8) throws CharacterCodingException { - return decode(ByteBuffer.wrap(utf8), true); - } - - public static String decode(byte[] utf8, int start, int length) throws CharacterCodingException { - return decode(ByteBuffer.wrap(utf8, start, length), true); - } - - /** - * Converts the provided byte array to a String using the UTF-8 encoding. If replace - * is true, then malformed input is replaced with the substitution character, which is U+FFFD. - * Otherwise the method throws a MalformedInputException. - * - * @param utf8 the bytes to decode - * @param start where to start from - * @param length length of the bytes to decode - * @param replace whether to replace malformed characters with U+FFFD - * @return the decoded string - * @throws CharacterCodingException if the input could not be decoded - */ - public static String decode(byte[] utf8, int start, int length, boolean replace) - throws CharacterCodingException { - return decode(ByteBuffer.wrap(utf8, start, length), replace); - } - - private static String decode(ByteBuffer utf8, boolean replace) throws CharacterCodingException { - CharsetDecoder decoder = DECODER_FACTORY.get(); - if (replace) { - decoder.onMalformedInput(java.nio.charset.CodingErrorAction.REPLACE); - decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); - } - String str = decoder.decode(utf8).toString(); - // set decoder back to its default value: REPORT - if (replace) { - decoder.onMalformedInput(CodingErrorAction.REPORT); - decoder.onUnmappableCharacter(CodingErrorAction.REPORT); - } - return str; - } - - /** - * Converts the provided String to bytes using the UTF-8 encoding. If the input is malformed, - * invalid chars are replaced by a default value. - * - * @param string the string to encode - * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit() - * @throws CharacterCodingException if the string could not be encoded - */ - public static ByteBuffer encode(String string) throws CharacterCodingException { - return encode(string, true); - } - - /** - * Converts the provided String to bytes using the UTF-8 encoding. If replace is - * true, then malformed input is replaced with the substitution character, which is U+FFFD. - * Otherwise the method throws a MalformedInputException. - * - * @param string the string to encode - * @param replace whether to replace malformed characters with U+FFFD - * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit() - * @throws CharacterCodingException if the string could not be encoded - */ - public static ByteBuffer encode(String string, boolean replace) throws CharacterCodingException { - CharsetEncoder encoder = ENCODER_FACTORY.get(); - if (replace) { - encoder.onMalformedInput(CodingErrorAction.REPLACE); - encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); - } - ByteBuffer bytes = encoder.encode(CharBuffer.wrap(string.toCharArray())); - if (replace) { - encoder.onMalformedInput(CodingErrorAction.REPORT); - encoder.onUnmappableCharacter(CodingErrorAction.REPORT); - } - return bytes; - } - - public static final int DEFAULT_MAX_LEN = 1024 * 1024; - - // //// states for validateUTF8 - - private static final int LEAD_BYTE = 0; - - private static final int TRAIL_BYTE_1 = 1; - - private static final int TRAIL_BYTE = 2; - - /** - * Check if a byte array contains valid utf-8. - * - * @param utf8 byte array - * @return true if the input is valid UTF-8. False otherwise. - */ - public static boolean validateUTF8NoThrow(byte[] utf8) { - return !validateUTF8Internal(utf8, 0, utf8.length).isPresent(); - } - - /** - * Check if a byte array contains valid utf-8. - * - * @param utf8 byte array - * @throws MalformedInputException if the byte array contains invalid utf-8 - */ - public static void validateUTF8(byte[] utf8) throws MalformedInputException { - validateUTF8(utf8, 0, utf8.length); - } - - /** - * Check to see if a byte array is valid utf-8. - * - * @param utf8 the array of bytes - * @param start the offset of the first byte in the array - * @param len the length of the byte sequence - * @throws MalformedInputException if the byte array contains invalid bytes - */ - public static void validateUTF8(byte[] utf8, int start, int len) throws MalformedInputException { - Optional result = validateUTF8Internal(utf8, start, len); - if (result.isPresent()) { - throw new MalformedInputException(result.get()); - } - } - - /** - * Check to see if a byte array is valid utf-8. - * - * @param utf8 the array of bytes - * @param start the offset of the first byte in the array - * @param len the length of the byte sequence - * @return the position where a malformed byte occurred or Optional.empty() if the byte array was - * valid UTF-8. - */ - private static Optional validateUTF8Internal(byte[] utf8, int start, int len) { - int count = start; - int leadByte = 0; - int length = 0; - int state = LEAD_BYTE; - while (count < start + len) { - int aByte = utf8[count] & 0xFF; - - switch (state) { - case LEAD_BYTE: - leadByte = aByte; - length = bytesFromUTF8[aByte]; - - switch (length) { - case 0: // check for ASCII - if (leadByte > 0x7F) { - return Optional.of(count); - } - break; - case 1: - if (leadByte < 0xC2 || leadByte > 0xDF) { - return Optional.of(count); - } - state = TRAIL_BYTE_1; - break; - case 2: - if (leadByte < 0xE0 || leadByte > 0xEF) { - return Optional.of(count); - } - state = TRAIL_BYTE_1; - break; - case 3: - if (leadByte < 0xF0 || leadByte > 0xF4) { - return Optional.of(count); - } - state = TRAIL_BYTE_1; - break; - default: - // too long! Longest valid UTF-8 is 4 bytes (lead + three) - // or if < 0 we got a trail byte in the lead byte position - return Optional.of(count); - } // switch (length) - break; - - case TRAIL_BYTE_1: - if (leadByte == 0xF0 && aByte < 0x90) { - return Optional.of(count); - } - if (leadByte == 0xF4 && aByte > 0x8F) { - return Optional.of(count); - } - if (leadByte == 0xE0 && aByte < 0xA0) { - return Optional.of(count); - } - if (leadByte == 0xED && aByte > 0x9F) { - return Optional.of(count); - } - // falls through to regular trail-byte test!! - case TRAIL_BYTE: - if (aByte < 0x80 || aByte > 0xBF) { - return Optional.of(count); - } - if (--length == 0) { - state = LEAD_BYTE; - } else { - state = TRAIL_BYTE; - } - break; - default: - break; - } // switch (state) - count++; - } - return Optional.empty(); - } - - /** - * Magic numbers for UTF-8. These are the number of bytes that follow a given lead byte. - * Trailing bytes have the value -1. The values 4 and 5 are presented in this table, even though - * valid UTF-8 cannot include the five and six byte sequences. - */ - static final int[] bytesFromUTF8 = { - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - // trail bytes - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 4, - 4, - 4, - 4, - 5, - 5, - 5, - 5 - }; - - /** - * Returns the next code point at the current position in the buffer. The buffer's position will - * be incremented. Any mark set on this buffer will be changed by this method! - * - * @param bytes the incoming bytes - * @return the corresponding unicode codepoint - */ - public static int bytesToCodePoint(ByteBuffer bytes) { - bytes.mark(); - byte b = bytes.get(); - bytes.reset(); - int extraBytesToRead = bytesFromUTF8[(b & 0xFF)]; - if (extraBytesToRead < 0) { - return -1; // trailing byte! - } - int ch = 0; - - switch (extraBytesToRead) { - case 5: - ch += (bytes.get() & 0xFF); - ch <<= 6; /* remember, illegal UTF-8 */ - // fall through - case 4: - ch += (bytes.get() & 0xFF); - ch <<= 6; /* remember, illegal UTF-8 */ - // fall through - case 3: - ch += (bytes.get() & 0xFF); - ch <<= 6; - // fall through - case 2: - ch += (bytes.get() & 0xFF); - ch <<= 6; - // fall through - case 1: - ch += (bytes.get() & 0xFF); - ch <<= 6; - // fall through - case 0: - ch += (bytes.get() & 0xFF); - break; - default: // do nothing - } - ch -= offsetsFromUTF8[extraBytesToRead]; - - return ch; - } - - static final int[] offsetsFromUTF8 = { - 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 - }; - - /** - * For the given string, returns the number of UTF-8 bytes required to encode the string. - * - * @param string text to encode - * @return number of UTF-8 bytes required to encode - */ - public static int utf8Length(String string) { - CharacterIterator iter = new StringCharacterIterator(string); - char ch = iter.first(); - int size = 0; - while (ch != CharacterIterator.DONE) { - if ((ch >= 0xD800) && (ch < 0xDC00)) { - // surrogate pair? - char trail = iter.next(); - if ((trail > 0xDBFF) && (trail < 0xE000)) { - // valid pair - size += 4; - } else { - // invalid pair - size += 3; - iter.previous(); // rewind one - } - } else if (ch < 0x80) { - size++; - } else if (ch < 0x800) { - size += 2; - } else { - // ch < 0x10000, that is, the largest char value - size += 3; - } - ch = iter.next(); - } - return size; - } - - /** JSON serializer for {@link Text}. */ - public static class TextSerializer extends StdSerializer { - - public TextSerializer() { - super(Text.class); - } - - @Override - public void serialize( - Text text, JsonGenerator jsonGenerator, SerializerProvider serializerProvider) - throws IOException, JsonGenerationException { - jsonGenerator.writeString(text.toString()); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java b/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java deleted file mode 100644 index aca685baccbfb..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import org.apache.arrow.vector.ValueVector; - -/** Interface for copying values between a pair of two vectors of the same type. */ -public interface TransferPair { - void transfer(); - - void splitAndTransfer(int startIndex, int length); - - ValueVector getTo(); - - void copyValueSafe(int from, int to); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java b/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java deleted file mode 100644 index e1b4c93e6eeb6..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -/** Utility class for validating arrow data structures. */ -public class Validator { - - /** - * Validate two arrow schemas are equal. - * - * @param schema1 the 1st schema to compare - * @param schema2 the 2nd schema to compare - * @throws IllegalArgumentException if they are different. - */ - public static void compareSchemas(Schema schema1, Schema schema2) { - if (!schema2.equals(schema1)) { - throw new IllegalArgumentException("Different schemas:\n" + schema2 + "\n" + schema1); - } - } - - /** Validate two Dictionary encodings and dictionaries with id's from the encodings. */ - public static void compareDictionaries( - List encodings1, - List encodings2, - DictionaryProvider provider1, - DictionaryProvider provider2) { - - if (encodings1.size() != encodings2.size()) { - throw new IllegalArgumentException( - "Different dictionary encoding count:\n" + encodings1.size() + "\n" + encodings2.size()); - } - - for (int i = 0; i < encodings1.size(); i++) { - if (!encodings1.get(i).equals(encodings2.get(i))) { - throw new IllegalArgumentException( - "Different dictionary encodings:\n" + encodings1.get(i) + "\n" + encodings2.get(i)); - } - - long id = encodings1.get(i).getId(); - Dictionary dict1 = provider1.lookup(id); - Dictionary dict2 = provider2.lookup(id); - - if (dict1 == null || dict2 == null) { - throw new IllegalArgumentException( - "The DictionaryProvider did not contain the required " - + "dictionary with id: " - + id - + "\n" - + dict1 - + "\n" - + dict2); - } - - try { - compareFieldVectors(dict1.getVector(), dict2.getVector()); - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException("Different dictionaries:\n" + dict1 + "\n" + dict2, e); - } - } - } - - /** Validate two dictionary providers are equal in structure and contents. */ - public static void compareDictionaryProviders( - DictionaryProvider provider1, DictionaryProvider provider2) { - List ids1 = new ArrayList(provider1.getDictionaryIds()); - List ids2 = new ArrayList(provider2.getDictionaryIds()); - java.util.Collections.sort(ids1); - java.util.Collections.sort(ids2); - if (!ids1.equals(ids2)) { - throw new IllegalArgumentException( - "Different ids in dictionary providers:\n" + ids1 + "\n" + ids2); - } - for (long id : ids1) { - Dictionary dict1 = provider1.lookup(id); - Dictionary dict2 = provider2.lookup(id); - try { - compareFieldVectors(dict1.getVector(), dict2.getVector()); - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException("Different dictionaries:\n" + dict1 + "\n" + dict2, e); - } - } - } - - /** - * Validate two arrow vectorSchemaRoot are equal. - * - * @param root1 the 1st schema to compare - * @param root2 the 2nd schema to compare - * @throws IllegalArgumentException if they are different. - */ - public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaRoot root2) { - compareSchemas(root2.getSchema(), root1.getSchema()); - if (root1.getRowCount() != root2.getRowCount()) { - throw new IllegalArgumentException( - "Different row count:\n" + root1.getRowCount() + " != " + root2.getRowCount()); - } - List vectors1 = root1.getFieldVectors(); - List vectors2 = root2.getFieldVectors(); - if (vectors1.size() != vectors2.size()) { - throw new IllegalArgumentException( - "Different column count:\n" + vectors1.toString() + "\n!=\n" + vectors2.toString()); - } - for (int i = 0; i < vectors1.size(); i++) { - compareFieldVectors(vectors1.get(i), vectors2.get(i)); - } - } - - /** - * Validate two arrow FieldVectors are equal. - * - * @param vector1 the 1st VectorField to compare - * @param vector2 the 2nd VectorField to compare - * @throws IllegalArgumentException if they are different - */ - public static void compareFieldVectors(FieldVector vector1, FieldVector vector2) { - Field field1 = vector1.getField(); - if (!field1.equals(vector2.getField())) { - throw new IllegalArgumentException( - "Different Fields:\n" + field1 + "\n!=\n" + vector2.getField()); - } - int valueCount = vector1.getValueCount(); - if (valueCount != vector2.getValueCount()) { - throw new IllegalArgumentException( - "Different value count for field " - + field1 - + " : " - + valueCount - + " != " - + vector2.getValueCount()); - } - for (int j = 0; j < valueCount; j++) { - Object obj1 = vector1.getObject(j); - Object obj2 = vector2.getObject(j); - if (!equals(field1.getType(), obj1, obj2)) { - throw new IllegalArgumentException( - "Different values in column:\n" - + field1 - + " at index " - + j - + ": " - + obj1 - + " != " - + obj2); - } - } - } - - static boolean equals(ArrowType type, final Object o1, final Object o2) { - if (type instanceof ArrowType.FloatingPoint) { - ArrowType.FloatingPoint fpType = (ArrowType.FloatingPoint) type; - switch (fpType.getPrecision()) { - case DOUBLE: - return equalEnough((Double) o1, (Double) o2); - case SINGLE: - return equalEnough((Float) o1, (Float) o2); - case HALF: - default: - throw new UnsupportedOperationException("unsupported precision: " + fpType); - } - } else if (type instanceof ArrowType.Binary - || type instanceof ArrowType.LargeBinary - || type instanceof ArrowType.FixedSizeBinary) { - return Arrays.equals((byte[]) o1, (byte[]) o2); - } else if (o1 instanceof byte[] && o2 instanceof byte[]) { - return Arrays.equals((byte[]) o1, (byte[]) o2); - } - - return Objects.equals(o1, o2); - } - - static boolean equalEnough(Float f1, Float f2) { - if (f1 == null || f2 == null) { - return f1 == null && f2 == null; - } - if (f1.isNaN()) { - return f2.isNaN(); - } - if (f1.isInfinite()) { - return f2.isInfinite() && Math.signum(f1) == Math.signum(f2); - } - float average = Math.abs((f1 + f2) / 2); - float differenceScaled = Math.abs(f1 - f2) / (average == 0.0f ? 1f : average); - return differenceScaled < 1.0E-6f; - } - - static boolean equalEnough(Double f1, Double f2) { - if (f1 == null || f2 == null) { - return f1 == null && f2 == null; - } - if (f1.isNaN()) { - return f2.isNaN(); - } - if (f1.isInfinite()) { - return f2.isInfinite() && Math.signum(f1) == Math.signum(f2); - } - double average = Math.abs((f1 + f2) / 2); - double differenceScaled = Math.abs(f1 - f2) / (average == 0.0d ? 1d : average); - return differenceScaled < 1.0E-12d; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java deleted file mode 100644 index d40afed2aaa81..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow; - -import java.util.function.BiFunction; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.validate.ValidateVectorBufferVisitor; -import org.apache.arrow.vector.validate.ValidateVectorDataVisitor; -import org.apache.arrow.vector.validate.ValidateVectorTypeVisitor; - -/** Utility methods for {@link ValueVector}. */ -public class ValueVectorUtility { - - private ValueVectorUtility() {} - - /** - * Get the toString() representation of vector suitable for debugging. Note since vectors may have - * millions of values, this method only shows max 20 values. Examples as below (v represents - * value): - *

  • vector with 0 value: [] - *
  • vector with 5 values (no more than 20 values): [v0, v1, v2, v3, v4] - *
  • vector with 100 values (more than 20 values): [v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, ..., - * v90, v91, v92, v93, v94, v95, v96, v97, v98, v99] - */ - public static String getToString(V vector, int start, int end) { - return getToString(vector, start, end, (v, i) -> v.getObject(i)); - } - - /** - * Get the toString() representation of vector suitable for debugging. Note since vectors may have - * millions of values, this method only shows at most 20 values. - * - * @param vector the vector for which to get toString representation. - * @param start the starting index, inclusive. - * @param end the end index, exclusive. - * @param valueToString the function to transform individual elements to strings. - */ - public static String getToString( - V vector, int start, int end, BiFunction valueToString) { - Preconditions.checkNotNull(vector); - final int length = end - start; - Preconditions.checkArgument(length >= 0); - Preconditions.checkArgument(start >= 0); - Preconditions.checkArgument(end <= vector.getValueCount()); - - if (length == 0) { - return "[]"; - } - - final int window = 10; - boolean skipComma = false; - - StringBuilder sb = new StringBuilder(); - sb.append('['); - for (int i = start; i < end; i++) { - if (skipComma) { - skipComma = false; - } - if (i - start >= window && i < end - window) { - sb.append("..."); - i = end - window - 1; - skipComma = true; - } else { - sb.append(valueToString.apply(vector, i)); - } - - if (i == end - 1) { - sb.append(']'); - } else { - if (!skipComma) { - sb.append(','); - } - sb.append(' '); - } - } - - return sb.toString(); - } - - /** Utility to validate vector in O(1) time. */ - public static void validate(ValueVector vector) { - Preconditions.checkNotNull(vector); - - ValidateVectorTypeVisitor typeVisitor = new ValidateVectorTypeVisitor(); - vector.accept(typeVisitor, null); - - ValidateVectorBufferVisitor bufferVisitor = new ValidateVectorBufferVisitor(); - vector.accept(bufferVisitor, null); - } - - /** Utility to validate vector in O(n) time, where n is the value count. */ - public static void validateFull(ValueVector vector) { - validate(vector); - - ValidateVectorDataVisitor dataVisitor = new ValidateVectorDataVisitor(); - vector.accept(dataVisitor, null); - } - - /** Utility to validate vector schema root in O(1) time. */ - public static void validate(VectorSchemaRoot root) { - Preconditions.checkNotNull(root); - int valueCount = root.getRowCount(); - validateOrThrow( - valueCount >= 0, "The row count of vector schema root %s is negative.", valueCount); - for (ValueVector childVec : root.getFieldVectors()) { - validateOrThrow( - valueCount == childVec.getValueCount(), - "Child vector and vector schema root have different value counts. " - + "Child vector value count %s, vector schema root value count %s", - childVec.getValueCount(), - valueCount); - validate(childVec); - } - } - - /** Utility to validate vector in O(n) time, where n is the value count. */ - public static void validateFull(VectorSchemaRoot root) { - Preconditions.checkNotNull(root); - int valueCount = root.getRowCount(); - validateOrThrow( - valueCount >= 0, "The row count of vector schema root %s is negative.", valueCount); - for (ValueVector childVec : root.getFieldVectors()) { - validateOrThrow( - valueCount == childVec.getValueCount(), - "Child vector and vector schema root have different value counts. " - + "Child vector value count %s, vector schema root value count %s", - childVec.getValueCount(), - valueCount); - validateFull(childVec); - } - } - - /** Pre allocate memory for BaseFixedWidthVector. */ - public static void preAllocate(VectorSchemaRoot root, int targetSize) { - for (ValueVector vector : root.getFieldVectors()) { - if (vector instanceof BaseFixedWidthVector) { - ((BaseFixedWidthVector) vector).allocateNew(targetSize); - } - } - } - - /** Ensure capacity for BaseFixedWidthVector. */ - public static void ensureCapacity(VectorSchemaRoot root, int targetCapacity) { - for (ValueVector vector : root.getFieldVectors()) { - if (vector instanceof BaseFixedWidthVector) { - while (vector.getValueCapacity() < targetCapacity) { - vector.reAlloc(); - } - } - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java deleted file mode 100644 index e703571b374eb..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ /dev/null @@ -1,642 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; - -import java.util.HashSet; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.util.Preconditions; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.TypeEqualsVisitor; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.UnionVector; - -/** Utility to append two vectors together. */ -public class VectorAppender implements VectorVisitor { - - /** The targetVector to be appended. */ - private final ValueVector targetVector; - - private final TypeEqualsVisitor typeVisitor; - - /** - * Constructs a new targetVector appender, with the given targetVector. - * - * @param targetVector the targetVector to be appended. - */ - public VectorAppender(ValueVector targetVector) { - this.targetVector = targetVector; - typeVisitor = new TypeEqualsVisitor(targetVector, false, true); - } - - @Override - public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) { - Preconditions.checkArgument( - targetVector.getField().getType().equals(deltaVector.getField().getType()), - "The targetVector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // optimization, nothing to append, return - } - - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - // make sure there is enough capacity - while (targetVector.getValueCapacity() < newValueCount) { - targetVector.reAlloc(); - } - - // append validity buffer - BitVectorHelper.concatBits( - targetVector.getValidityBuffer(), - targetVector.getValueCount(), - deltaVector.getValidityBuffer(), - deltaVector.getValueCount(), - targetVector.getValidityBuffer()); - - // append data buffer - if (targetVector instanceof BitVector) { - // special processing for bit vector, as its type width is 0 - BitVectorHelper.concatBits( - targetVector.getDataBuffer(), - targetVector.getValueCount(), - deltaVector.getDataBuffer(), - deltaVector.getValueCount(), - targetVector.getDataBuffer()); - - } else { - MemoryUtil.copyMemory( - deltaVector.getDataBuffer().memoryAddress(), - targetVector.getDataBuffer().memoryAddress() - + deltaVector.getTypeWidth() * targetVector.getValueCount(), - deltaVector.getTypeWidth() * deltaVector.getValueCount()); - } - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) { - Preconditions.checkArgument( - targetVector.getField().getType().equals(deltaVector.getField().getType()), - "The targetVector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // nothing to append, return - } - - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - int targetDataSize = - targetVector - .getOffsetBuffer() - .getInt((long) targetVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); - int deltaDataSize = - deltaVector - .getOffsetBuffer() - .getInt((long) deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); - int newValueCapacity = targetDataSize + deltaDataSize; - - // make sure there is enough capacity - while (targetVector.getValueCapacity() < newValueCount) { - ((BaseVariableWidthVector) targetVector).reallocValidityAndOffsetBuffers(); - } - while (targetVector.getDataBuffer().capacity() < newValueCapacity) { - ((BaseVariableWidthVector) targetVector).reallocDataBuffer(); - } - - // append validity buffer - BitVectorHelper.concatBits( - targetVector.getValidityBuffer(), - targetVector.getValueCount(), - deltaVector.getValidityBuffer(), - deltaVector.getValueCount(), - targetVector.getValidityBuffer()); - - // append data buffer - MemoryUtil.copyMemory( - deltaVector.getDataBuffer().memoryAddress(), - targetVector.getDataBuffer().memoryAddress() + targetDataSize, - deltaDataSize); - - // copy offset buffer - MemoryUtil.copyMemory( - deltaVector.getOffsetBuffer().memoryAddress() + BaseVariableWidthVector.OFFSET_WIDTH, - targetVector.getOffsetBuffer().memoryAddress() - + (targetVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, - deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); - - // increase each offset from the second buffer - for (int i = 0; i < deltaVector.getValueCount(); i++) { - int oldOffset = - targetVector - .getOffsetBuffer() - .getInt( - (long) (targetVector.getValueCount() + 1 + i) - * BaseVariableWidthVector.OFFSET_WIDTH); - targetVector - .getOffsetBuffer() - .setInt( - (long) (targetVector.getValueCount() + 1 + i) * BaseVariableWidthVector.OFFSET_WIDTH, - oldOffset + targetDataSize); - } - ((BaseVariableWidthVector) targetVector).setLastSet(newValueCount - 1); - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) { - Preconditions.checkArgument( - targetVector.getField().getType().equals(deltaVector.getField().getType()), - "The targetVector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // nothing to append, return - } - - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - long targetDataSize = - targetVector - .getOffsetBuffer() - .getLong( - (long) targetVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH); - long deltaDataSize = - deltaVector - .getOffsetBuffer() - .getLong( - (long) deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH); - long newValueCapacity = targetDataSize + deltaDataSize; - - // make sure there is enough capacity - while (targetVector.getValueCapacity() < newValueCount) { - ((BaseLargeVariableWidthVector) targetVector).reallocValidityAndOffsetBuffers(); - } - while (targetVector.getDataBuffer().capacity() < newValueCapacity) { - ((BaseLargeVariableWidthVector) targetVector).reallocDataBuffer(); - } - - // append validity buffer - BitVectorHelper.concatBits( - targetVector.getValidityBuffer(), - targetVector.getValueCount(), - deltaVector.getValidityBuffer(), - deltaVector.getValueCount(), - targetVector.getValidityBuffer()); - - // append data buffer - MemoryUtil.copyMemory( - deltaVector.getDataBuffer().memoryAddress(), - targetVector.getDataBuffer().memoryAddress() + targetDataSize, - deltaDataSize); - - // copy offset buffer - MemoryUtil.copyMemory( - deltaVector.getOffsetBuffer().memoryAddress() + BaseLargeVariableWidthVector.OFFSET_WIDTH, - targetVector.getOffsetBuffer().memoryAddress() - + (targetVector.getValueCount() + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH, - deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH); - - // increase each offset from the second buffer - for (int i = 0; i < deltaVector.getValueCount(); i++) { - long oldOffset = - targetVector - .getOffsetBuffer() - .getLong( - (long) (targetVector.getValueCount() + 1 + i) - * BaseLargeVariableWidthVector.OFFSET_WIDTH); - targetVector - .getOffsetBuffer() - .setLong( - (long) (targetVector.getValueCount() + 1 + i) - * BaseLargeVariableWidthVector.OFFSET_WIDTH, - oldOffset + targetDataSize); - } - ((BaseLargeVariableWidthVector) targetVector).setLastSet(newValueCount - 1); - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(BaseVariableWidthViewVector left, Void value) { - throw new UnsupportedOperationException("View vectors are not supported."); - } - - @Override - public ValueVector visit(ListVector deltaVector, Void value) { - Preconditions.checkArgument( - typeVisitor.equals(deltaVector), - "The targetVector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // nothing to append, return - } - - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - int targetListSize = - targetVector - .getOffsetBuffer() - .getInt((long) targetVector.getValueCount() * ListVector.OFFSET_WIDTH); - int deltaListSize = - deltaVector - .getOffsetBuffer() - .getInt((long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH); - - ListVector targetListVector = (ListVector) targetVector; - - // make sure the underlying vector has value count set - targetListVector.getDataVector().setValueCount(targetListSize); - deltaVector.getDataVector().setValueCount(deltaListSize); - - // make sure there is enough capacity - while (targetVector.getValueCapacity() < newValueCount) { - targetVector.reAlloc(); - } - - // append validity buffer - BitVectorHelper.concatBits( - targetVector.getValidityBuffer(), - targetVector.getValueCount(), - deltaVector.getValidityBuffer(), - deltaVector.getValueCount(), - targetVector.getValidityBuffer()); - - // append offset buffer - MemoryUtil.copyMemory( - deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH, - targetVector.getOffsetBuffer().memoryAddress() - + (targetVector.getValueCount() + 1) * ListVector.OFFSET_WIDTH, - (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH); - - // increase each offset from the second buffer - for (int i = 0; i < deltaVector.getValueCount(); i++) { - int oldOffset = - targetVector - .getOffsetBuffer() - .getInt((long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH); - targetVector - .getOffsetBuffer() - .setInt( - (long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH, - oldOffset + targetListSize); - } - targetListVector.setLastSet(newValueCount - 1); - - // append underlying vectors - VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector()); - deltaVector.getDataVector().accept(innerAppender, null); - - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(LargeListVector deltaVector, Void value) { - Preconditions.checkArgument( - typeVisitor.equals(deltaVector), - "The targetVector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // nothing to append, return - } - - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - long targetListSize = - targetVector - .getOffsetBuffer() - .getLong((long) targetVector.getValueCount() * LargeListVector.OFFSET_WIDTH); - long deltaListSize = - deltaVector - .getOffsetBuffer() - .getLong((long) deltaVector.getValueCount() * LargeListVector.OFFSET_WIDTH); - - ListVector targetListVector = (ListVector) targetVector; - - // make sure the underlying vector has value count set - // todo recheck these casts when int64 vectors are supported - targetListVector.getDataVector().setValueCount(checkedCastToInt(targetListSize)); - deltaVector.getDataVector().setValueCount(checkedCastToInt(deltaListSize)); - - // make sure there is enough capacity - while (targetVector.getValueCapacity() < newValueCount) { - targetVector.reAlloc(); - } - - // append validity buffer - BitVectorHelper.concatBits( - targetVector.getValidityBuffer(), - targetVector.getValueCount(), - deltaVector.getValidityBuffer(), - deltaVector.getValueCount(), - targetVector.getValidityBuffer()); - - // append offset buffer - MemoryUtil.copyMemory( - deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH, - targetVector.getOffsetBuffer().memoryAddress() - + (targetVector.getValueCount() + 1) * LargeListVector.OFFSET_WIDTH, - (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH); - - // increase each offset from the second buffer - for (int i = 0; i < deltaVector.getValueCount(); i++) { - long oldOffset = - targetVector - .getOffsetBuffer() - .getLong( - (long) (targetVector.getValueCount() + 1 + i) * LargeListVector.OFFSET_WIDTH); - targetVector - .getOffsetBuffer() - .setLong( - (long) (targetVector.getValueCount() + 1 + i) * LargeListVector.OFFSET_WIDTH, - oldOffset + targetListSize); - } - targetListVector.setLastSet(newValueCount - 1); - - // append underlying vectors - VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector()); - deltaVector.getDataVector().accept(innerAppender, null); - - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(FixedSizeListVector deltaVector, Void value) { - Preconditions.checkArgument( - typeVisitor.equals(deltaVector), - "The vector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // optimization, nothing to append, return - } - - FixedSizeListVector targetListVector = (FixedSizeListVector) targetVector; - - Preconditions.checkArgument( - targetListVector.getListSize() == deltaVector.getListSize(), - "FixedSizeListVector must have the same list size to append"); - - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - int targetListSize = targetListVector.getValueCount() * targetListVector.getListSize(); - int deltaListSize = deltaVector.getValueCount() * deltaVector.getListSize(); - - // make sure the underlying vector has value count set - targetListVector.getDataVector().setValueCount(targetListSize); - deltaVector.getDataVector().setValueCount(deltaListSize); - - // make sure there is enough capacity - while (targetVector.getValueCapacity() < newValueCount) { - targetVector.reAlloc(); - } - - // append validity buffer - BitVectorHelper.concatBits( - targetVector.getValidityBuffer(), - targetVector.getValueCount(), - deltaVector.getValidityBuffer(), - deltaVector.getValueCount(), - targetVector.getValidityBuffer()); - - // append underlying vectors - VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector()); - deltaVector.getDataVector().accept(innerAppender, null); - - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(NonNullableStructVector deltaVector, Void value) { - Preconditions.checkArgument( - typeVisitor.equals(deltaVector), - "The vector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // optimization, nothing to append, return - } - - NonNullableStructVector targetStructVector = (NonNullableStructVector) targetVector; - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - // make sure there is enough capacity - while (targetVector.getValueCapacity() < newValueCount) { - targetVector.reAlloc(); - } - - // append validity buffer - BitVectorHelper.concatBits( - targetVector.getValidityBuffer(), - targetVector.getValueCount(), - deltaVector.getValidityBuffer(), - deltaVector.getValueCount(), - targetVector.getValidityBuffer()); - - // append child vectors - for (int i = 0; i < targetStructVector.getChildrenFromFields().size(); i++) { - ValueVector targetChild = targetStructVector.getVectorById(i); - ValueVector deltaChild = deltaVector.getVectorById(i); - - targetChild.setValueCount(targetStructVector.getValueCount()); - deltaChild.setValueCount(deltaVector.getValueCount()); - - VectorAppender innerAppender = new VectorAppender(targetChild); - deltaChild.accept(innerAppender, null); - } - - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(UnionVector deltaVector, Void value) { - // we only make sure that both vectors are union vectors. - Preconditions.checkArgument( - targetVector.getMinorType() == deltaVector.getMinorType(), - "The vector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // optimization, nothing to append, return - } - - UnionVector targetUnionVector = (UnionVector) targetVector; - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - // make sure there is enough capacity - while (targetUnionVector.getValueCapacity() < newValueCount) { - targetUnionVector.reAlloc(); - } - - // append type buffers - MemoryUtil.copyMemory( - deltaVector.getTypeBufferAddress(), - targetUnionVector.getTypeBufferAddress() + targetVector.getValueCount(), - deltaVector.getValueCount()); - - // build the hash set for all types - HashSet targetTypes = new HashSet<>(); - for (int i = 0; i < targetUnionVector.getValueCount(); i++) { - targetTypes.add(targetUnionVector.getTypeValue(i)); - } - HashSet deltaTypes = new HashSet<>(); - for (int i = 0; i < deltaVector.getValueCount(); i++) { - deltaTypes.add(deltaVector.getTypeValue(i)); - } - - // append child vectors - for (int i = 0; i < Byte.MAX_VALUE; i++) { - if (targetTypes.contains(i) || deltaTypes.contains(i)) { - ValueVector targetChild = targetUnionVector.getVectorByType(i); - if (!targetTypes.contains(i)) { - // if the vector type does not exist in the target, it must be newly created - // and we must make sure it has enough capacity. - while (targetChild.getValueCapacity() < newValueCount) { - targetChild.reAlloc(); - } - } - - if (deltaTypes.contains(i)) { - // append child vectors - ValueVector deltaChild = deltaVector.getVectorByType(i); - - targetChild.setValueCount(targetUnionVector.getValueCount()); - deltaChild.setValueCount(deltaVector.getValueCount()); - - VectorAppender innerAppender = new VectorAppender(targetChild); - deltaChild.accept(innerAppender, null); - } - targetChild.setValueCount(newValueCount); - } - } - - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(DenseUnionVector deltaVector, Void value) { - // we only make sure that both vectors are union vectors. - Preconditions.checkArgument( - targetVector.getMinorType() == deltaVector.getMinorType(), - "The vector to append must have the same type as the targetVector being appended"); - - if (deltaVector.getValueCount() == 0) { - return targetVector; // optimization, nothing to append, return - } - - DenseUnionVector targetDenseUnionVector = (DenseUnionVector) targetVector; - int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); - - // make sure there is enough capacity - while (targetDenseUnionVector.getValueCapacity() < newValueCount) { - targetDenseUnionVector.reAlloc(); - } - - // append type buffers - MemoryUtil.copyMemory( - deltaVector.getTypeBuffer().memoryAddress(), - targetDenseUnionVector.getTypeBuffer().memoryAddress() + targetVector.getValueCount(), - deltaVector.getValueCount()); - - // append offset buffers - for (int i = 0; i < deltaVector.getValueCount(); i++) { - byte typeId = deltaVector.getTypeId(i); - ValueVector targetChildVector = targetDenseUnionVector.getVectorByType(typeId); - int offsetBase = targetChildVector == null ? 0 : targetChildVector.getValueCount(); - int deltaOffset = deltaVector.getOffset(i); - long index = (long) (targetVector.getValueCount() + i) * DenseUnionVector.OFFSET_WIDTH; - - targetVector.getOffsetBuffer().setInt(index, offsetBase + deltaOffset); - } - - // append child vectors - for (int i = 0; i <= Byte.MAX_VALUE; i++) { - ValueVector targetChildVector = targetDenseUnionVector.getVectorByType((byte) i); - ValueVector deltaChildVector = deltaVector.getVectorByType((byte) i); - - if (targetChildVector == null && deltaChildVector == null) { - // the type id is not registered in either vector, we are done. - continue; - } else if (targetChildVector == null && deltaChildVector != null) { - // first register a new child in the target vector - targetDenseUnionVector.registerNewTypeId(deltaChildVector.getField()); - targetChildVector = - targetDenseUnionVector.addVector( - (byte) i, - deltaChildVector.getField().createVector(targetDenseUnionVector.getAllocator())); - - // now we have both child vectors not null, we can append them. - VectorAppender childAppender = new VectorAppender(targetChildVector); - deltaChildVector.accept(childAppender, null); - } else if (targetChildVector != null && deltaChildVector == null) { - // the value only exists in the target vector, so we are done - continue; - } else { - // both child vectors are non-null - - // first check vector types - TypeEqualsVisitor childTypeVisitor = - new TypeEqualsVisitor( - targetChildVector, /* check name */ false, /* check meta data*/ false); - if (!childTypeVisitor.equals(deltaChildVector)) { - throw new IllegalArgumentException( - "dense union vectors have different child vector types with type id " + i); - } - - // append child vectors - VectorAppender childAppender = new VectorAppender(targetChildVector); - deltaChildVector.accept(childAppender, null); - } - } - - targetVector.setValueCount(newValueCount); - return targetVector; - } - - @Override - public ValueVector visit(NullVector deltaVector, Void value) { - Preconditions.checkArgument( - targetVector.getField().getType().equals(deltaVector.getField().getType()), - "The targetVector to append must have the same type as the targetVector being appended"); - return targetVector; - } - - @Override - public ValueVector visit(ExtensionTypeVector deltaVector, Void value) { - ValueVector targetUnderlying = ((ExtensionTypeVector) targetVector).getUnderlyingVector(); - VectorAppender underlyingAppender = new VectorAppender(targetUnderlying); - deltaVector.getUnderlyingVector().accept(underlyingAppender, null); - return targetVector; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java deleted file mode 100644 index a5d289ffc7774..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import org.apache.arrow.vector.ValueVector; - -/** Utility to add vector values in batch. */ -public class VectorBatchAppender { - - /** - * Add value vectors in batch. - * - * @param targetVector the target vector. - * @param vectorsToAppend the vectors to append. - * @param the vector type. - */ - public static void batchAppend(V targetVector, V... vectorsToAppend) { - VectorAppender appender = new VectorAppender(targetVector); - for (V delta : vectorsToAppend) { - delta.accept(appender, null); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java deleted file mode 100644 index 96674b4002c95..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.compare.TypeEqualsVisitor; - -/** Utility to append {@link org.apache.arrow.vector.VectorSchemaRoot}s with the same schema. */ -public class VectorSchemaRootAppender { - - /** - * Appends a number of {@link VectorSchemaRoot}s. - * - * @param checkSchema if we need to check schema for the vector schema roots. - * @param targetRoot the vector schema root to be appended. - * @param rootsToAppend the vector schema roots to append. - * @throws IllegalArgumentException throws if we need to check schema, and checking schema fails. - */ - public static void append( - boolean checkSchema, VectorSchemaRoot targetRoot, VectorSchemaRoot... rootsToAppend) { - // create appenders - VectorAppender[] appenders = new VectorAppender[targetRoot.getFieldVectors().size()]; - for (int i = 0; i < appenders.length; i++) { - appenders[i] = new VectorAppender(targetRoot.getVector(i)); - } - - // create type checkers, if necessary - TypeEqualsVisitor[] typeCheckers = null; - if (checkSchema) { - typeCheckers = new TypeEqualsVisitor[targetRoot.getFieldVectors().size()]; - for (int i = 0; i < typeCheckers.length; i++) { - typeCheckers[i] = - new TypeEqualsVisitor( - targetRoot.getVector(i), /* check name */ false, /* check meta data */ false); - } - } - - for (VectorSchemaRoot delta : rootsToAppend) { - // check schema, if necessary - if (checkSchema) { - if (delta.getFieldVectors().size() != targetRoot.getFieldVectors().size()) { - throw new IllegalArgumentException( - "Vector schema roots have different numbers of child vectors."); - } - for (int i = 0; i < typeCheckers.length; i++) { - if (!typeCheckers[i].equals(delta.getVector(i))) { - throw new IllegalArgumentException("Vector schema roots have different schemas."); - } - } - } - - // append child vectors. - for (int i = 0; i < appenders.length; i++) { - delta.getVector(i).accept(appenders[i], null); - } - targetRoot.setRowCount(targetRoot.getRowCount() + delta.getRowCount()); - } - } - - /** - * Appends a number of {@link VectorSchemaRoot}s. This method performs schema checking before - * appending data. - * - * @param targetRoot the vector schema root to be appended. - * @param rootsToAppend the vector schema roots to append. - * @throws IllegalArgumentException throws if we need to check schema, and checking schema fails. - */ - public static void append(VectorSchemaRoot targetRoot, VectorSchemaRoot... rootsToAppend) { - append(true, targetRoot, rootsToAppend); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java deleted file mode 100644 index f127fd6a5805f..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import java.io.IOException; -import java.util.Iterator; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; - -/** - * Given a field, checks that no Union fields are present. - * - *

    This is intended to be used to prevent unions from being read/written with V4 metadata. - */ -public final class MetadataV4UnionChecker { - static boolean isUnion(Field field) { - return field.getType().getTypeID() == ArrowType.ArrowTypeID.Union; - } - - static Field check(Field field) { - if (isUnion(field)) { - return field; - } - // Naive recursive DFS - for (final Field child : field.getChildren()) { - final Field result = check(child); - if (result != null) { - return result; - } - } - return null; - } - - /** - * Check the schema, raising an error if an unsupported feature is used (e.g. unions with < V5 - * metadata). - */ - public static void checkForUnion(Iterator fields, MetadataVersion metadataVersion) { - if (metadataVersion.toFlatbufID() >= MetadataVersion.V5.toFlatbufID()) { - return; - } - while (fields.hasNext()) { - Field union = check(fields.next()); - if (union != null) { - throw new IllegalArgumentException( - "Cannot write union with V4 metadata version, use V5 instead. Found field: " + union); - } - } - } - - /** - * Check the schema, raising an error if an unsupported feature is used (e.g. unions with < V5 - * metadata). - */ - public static void checkRead(Schema schema, MetadataVersion metadataVersion) throws IOException { - if (metadataVersion.toFlatbufID() >= MetadataVersion.V5.toFlatbufID()) { - return; - } - for (final Field field : schema.getFields()) { - Field union = check(field); - if (union != null) { - throw new IOException("Cannot read union with V4 metadata version. Found field: " + union); - } - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java deleted file mode 100644 index 015ee265d1423..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -/** Utilities for vector validation. */ -public class ValidateUtil { - - private ValidateUtil() {} - - /** - * Validate the expression. - * - * @param expression the expression to validate. - * @param errorMessage the error message. - * @throws ValidateException if the expression evaluates to false. - */ - public static void validateOrThrow(boolean expression, String errorMessage) { - if (!expression) { - throw new ValidateException(errorMessage); - } - } - - /** - * Validate the expression. - * - * @param expression the expression to validate. - * @param errorMessage the error message template. - * @param args the error message arguments. - * @throws ValidateException if the expression evaluates to false. - */ - public static void validateOrThrow(boolean expression, String errorMessage, Object... args) { - if (!expression) { - throw new ValidateException(String.format(errorMessage, args)); - } - } - - /** A exception that is thrown when the vector validation fails. */ - public static class ValidateException extends RuntimeException { - public ValidateException(String message) { - super(message); - } - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java deleted file mode 100644 index 5c7215437f8ec..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseIntVector; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.TypeLayout; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.pojo.ArrowType; - -/** Visitor to validate vector buffers. */ -public class ValidateVectorBufferVisitor implements VectorVisitor { - - private void validateVectorCommon(ValueVector vector) { - ArrowType arrowType = vector.getField().getType(); - validateOrThrow( - vector.getValueCount() >= 0, - "Vector valueCount %s is negative.", - vector.getValueCapacity()); - - if (vector instanceof FieldVector) { - FieldVector fieldVector = (FieldVector) vector; - // TODO: https://github.com/apache/arrow/issues/41734 - int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType); - validateOrThrow( - fieldVector.getFieldBuffers().size() == typeBufferCount, - "Expected %s buffers in vector of type %s, got %s.", - typeBufferCount, - vector.getField().getType().toString(), - fieldVector.getFieldBuffers().size()); - } - } - - private void validateValidityBuffer(ValueVector vector, int valueCount) { - ArrowBuf validityBuffer = vector.getValidityBuffer(); - validateOrThrow(validityBuffer != null, "The validity buffer is null."); - validateOrThrow( - validityBuffer.capacity() * 8 >= valueCount, - "Not enough capacity for the validity buffer. Minimum capacity %s, actual capacity %s.", - (valueCount + 7) / 8, - validityBuffer.capacity()); - } - - private void validateOffsetBuffer(ValueVector vector, long minCapacity) { - ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - validateOrThrow(offsetBuffer != null, "The offset buffer is null."); - validateOrThrow( - offsetBuffer.capacity() >= minCapacity, - "Not enough capacity for the offset buffer. Minimum capacity %s, actual capacity %s.", - minCapacity, - offsetBuffer.capacity()); - } - - private void validateFixedWidthDataBuffer(ValueVector vector, int valueCount, int bitWidth) { - ArrowBuf dataBuffer = vector.getDataBuffer(); - validateOrThrow(dataBuffer != null, "The fixed width data buffer is null."); - validateOrThrow( - (long) bitWidth * valueCount <= dataBuffer.capacity() * 8L, - "Not enough capacity for fixed width data buffer. Minimum capacity %s, actual capacity %s.", - ((long) bitWidth * valueCount + 7L) / 8L, - dataBuffer.capacity()); - } - - private void validateDataBuffer(ValueVector vector, long minCapacity) { - ArrowBuf dataBuffer = vector.getDataBuffer(); - validateOrThrow(dataBuffer != null, "The data buffer is null."); - validateOrThrow( - dataBuffer.capacity() >= minCapacity, - "Not enough capacity for data buffer. Minimum capacity %s, actual capacity %s.", - minCapacity, - dataBuffer.capacity()); - } - - private void validateTypeBuffer(ArrowBuf typeBuf, long minCapacity) { - validateOrThrow(typeBuf != null, "The type buffer is null."); - validateOrThrow( - typeBuf.capacity() >= minCapacity, - "Not enough capacity for type buffer. Minimum capacity %s, actual capacity %s.", - minCapacity, - typeBuf.capacity()); - } - - @Override - public Void visit(BaseFixedWidthVector vector, Void value) { - int bitWidth = (vector instanceof BitVector) ? 1 : vector.getTypeWidth() * 8; - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateValidityBuffer(vector, valueCount); - validateFixedWidthDataBuffer(vector, valueCount, bitWidth); - return null; - } - - @Override - public Void visit(BaseVariableWidthVector vector, Void value) { - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateValidityBuffer(vector, valueCount); - long minOffsetCapacity = - valueCount == 0 ? 0L : (long) (valueCount + 1) * BaseVariableWidthVector.OFFSET_WIDTH; - validateOffsetBuffer(vector, minOffsetCapacity); - int lastOffset = - valueCount == 0 - ? 0 - : vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH); - validateDataBuffer(vector, lastOffset); - return null; - } - - @Override - public Void visit(BaseLargeVariableWidthVector vector, Void value) { - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateValidityBuffer(vector, valueCount); - long minOffsetCapacity = - valueCount == 0 ? 0L : (long) (valueCount + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH; - validateOffsetBuffer(vector, minOffsetCapacity); - long lastOffset = - valueCount == 0 - ? 0L - : vector - .getOffsetBuffer() - .getLong((long) valueCount * BaseLargeVariableWidthVector.OFFSET_WIDTH); - validateDataBuffer(vector, lastOffset); - return null; - } - - @Override - public Void visit(BaseVariableWidthViewVector vector, Void value) { - throw new UnsupportedOperationException("View vectors are not supported."); - } - - @Override - public Void visit(ListVector vector, Void value) { - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateValidityBuffer(vector, valueCount); - long minOffsetCapacity = - valueCount == 0 ? 0L : (long) (valueCount + 1) * ListVector.OFFSET_WIDTH; - validateOffsetBuffer(vector, minOffsetCapacity); - - FieldVector dataVector = vector.getDataVector(); - int lastOffset = - valueCount == 0 - ? 0 - : vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH); - int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount(); - validateOrThrow( - dataVectorLength >= lastOffset, - "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s", - lastOffset + 1, - dataVectorLength); - - if (dataVector != null) { - dataVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(FixedSizeListVector vector, Void value) { - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateValidityBuffer(vector, valueCount); - FieldVector dataVector = vector.getDataVector(); - int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount(); - validateOrThrow( - dataVectorLength >= valueCount * vector.getListSize(), - "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s.", - valueCount * vector.getListSize(), - dataVectorLength); - if (dataVector != null) { - dataVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(LargeListVector vector, Void value) { - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateValidityBuffer(vector, valueCount); - long minOffsetCapacity = - valueCount == 0 ? 0L : (long) (valueCount + 1) * LargeListVector.OFFSET_WIDTH; - validateOffsetBuffer(vector, minOffsetCapacity); - - FieldVector dataVector = vector.getDataVector(); - long lastOffset = - valueCount == 0 - ? 0 - : vector - .getOffsetBuffer() - .getLong(valueCount * BaseLargeVariableWidthVector.OFFSET_WIDTH); - int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount(); - validateOrThrow( - dataVectorLength >= lastOffset, - "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s", - lastOffset + 1, - dataVectorLength); - - if (dataVector != null) { - dataVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(NonNullableStructVector vector, Void value) { - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateValidityBuffer(vector, valueCount); - for (ValueVector subVector : vector.getChildrenFromFields()) { - validateOrThrow( - valueCount == subVector.getValueCount(), - "Struct vector length not equal to child vector length. Struct vector length %s, child vector length %s", - valueCount, - subVector.getValueCount()); - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(UnionVector vector, Void value) { - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateTypeBuffer(vector.getTypeBuffer(), valueCount * UnionVector.TYPE_WIDTH); - for (ValueVector subVector : vector.getChildrenFromFields()) { - validateOrThrow( - valueCount == subVector.getValueCount(), - "Union vector length not equal to child vector length. Union vector length %s, child vector length %s", - valueCount, - subVector.getValueCount()); - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(DenseUnionVector vector, Void value) { - int valueCount = vector.getValueCount(); - validateVectorCommon(vector); - validateOffsetBuffer(vector, (long) valueCount * DenseUnionVector.OFFSET_WIDTH); - validateTypeBuffer(vector.getTypeBuffer(), valueCount * DenseUnionVector.TYPE_WIDTH); - for (ValueVector subVector : vector.getChildrenFromFields()) { - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(NullVector vector, Void value) { - return null; - } - - @Override - public Void visit(ExtensionTypeVector vector, Void value) { - vector.getUnderlyingVector().accept(this, value); - return null; - } - - @Override - public Void visit(RunEndEncodedVector vector, Void value) { - validateVectorCommon(vector); - int valueCount = vector.getValueCount(); - FieldVector runEndsVector = vector.getRunEndsVector(); - - if (runEndsVector != null) { - validateOrThrow( - runEndsVector.getNullCount() == 0, "Run ends vector cannot contain null values"); - runEndsVector.accept(this, null); - - int runCount = runEndsVector.getValueCount(); - if (runCount == 0) { - validateOrThrow(valueCount == 0, "Run end vector does not contain enough elements"); - } else if (runCount > 0) { - long lastEnd = ((BaseIntVector) runEndsVector).getValueAsLong(runCount - 1); - validateOrThrow( - valueCount == lastEnd, - "Vector logic length not equal to the last end in run ends vector. Logical length %s, last end %s", - valueCount, - lastEnd); - } - } - - FieldVector valuesVector = vector.getValuesVector(); - if (valuesVector != null) { - valuesVector.accept(this, null); - } - - return null; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java deleted file mode 100644 index c62bff79f7710..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.UnionVector; - -/** Utility for validating vector data. */ -public class ValidateVectorDataVisitor implements VectorVisitor { - - private void validateOffsetBuffer(ValueVector vector, int valueCount) { - if (valueCount == 0) { - return; - } - ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - - // verify that the values in the offset buffer is non-decreasing - int prevValue = offsetBuffer.getInt(0); - for (int i = 1; i <= valueCount; i++) { - int curValue = offsetBuffer.getInt(i * 4); - validateOrThrow( - curValue >= 0, - "The value at position %s of the offset buffer is negative: %s.", - i, - curValue); - validateOrThrow( - curValue >= prevValue, - "The values in positions %s and %s of the offset buffer are decreasing: %s, %s.", - i - 1, - i, - prevValue, - curValue); - prevValue = curValue; - } - } - - private void validateLargeOffsetBuffer(ValueVector vector, int valueCount) { - if (valueCount == 0) { - return; - } - ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - - // verify that the values in the large offset buffer is non-decreasing - long prevValue = offsetBuffer.getLong(0); - for (int i = 1; i <= valueCount; i++) { - long curValue = offsetBuffer.getLong((long) i * 8); - validateOrThrow( - curValue >= 0L, - "The value at position %s of the large offset buffer is negative: %s.", - i, - curValue); - validateOrThrow( - curValue >= prevValue, - "The values in positions %s and %s of the large offset buffer are decreasing: %s, %s.", - i - 1, - i, - prevValue, - curValue); - prevValue = curValue; - } - } - - private void validateTypeBuffer(ArrowBuf typeBuf, int valueCount) { - for (int i = 0; i < valueCount; i++) { - validateOrThrow( - typeBuf.getByte(i) >= 0, - "The type id at position %s is negative: %s.", - i, - typeBuf.getByte(i)); - } - } - - @Override - public Void visit(BaseFixedWidthVector vector, Void value) { - vector.validateScalars(); - return null; - } - - @Override - public Void visit(BaseVariableWidthVector vector, Void value) { - validateOffsetBuffer(vector, vector.getValueCount()); - vector.validateScalars(); - return null; - } - - @Override - public Void visit(BaseLargeVariableWidthVector vector, Void value) { - validateLargeOffsetBuffer(vector, vector.getValueCount()); - vector.validateScalars(); - return null; - } - - @Override - public Void visit(BaseVariableWidthViewVector vector, Void value) { - throw new UnsupportedOperationException("View vectors are not supported."); - } - - @Override - public Void visit(ListVector vector, Void value) { - validateOffsetBuffer(vector, vector.getValueCount()); - ValueVector innerVector = vector.getDataVector(); - if (innerVector != null) { - innerVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(FixedSizeListVector vector, Void value) { - validateOffsetBuffer(vector, vector.getValueCount()); - ValueVector innerVector = vector.getDataVector(); - if (innerVector != null) { - innerVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(LargeListVector vector, Void value) { - validateLargeOffsetBuffer(vector, vector.getValueCount()); - ValueVector innerVector = vector.getDataVector(); - if (innerVector != null) { - innerVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(NonNullableStructVector vector, Void value) { - for (ValueVector subVector : vector.getChildrenFromFields()) { - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(UnionVector vector, Void value) { - validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount()); - for (ValueVector subVector : vector.getChildrenFromFields()) { - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(DenseUnionVector vector, Void value) { - validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount()); - - // validate offset buffer - for (int i = 0; i < vector.getValueCount(); i++) { - int offset = vector.getOffset(i); - byte typeId = vector.getTypeId(i); - ValueVector subVector = vector.getVectorByType(typeId); - validateOrThrow( - offset < subVector.getValueCount(), - "Dense union vector offset exceeds sub-vector boundary. Vector offset %s, sub vector size %s", - offset, - subVector.getValueCount()); - } - - for (ValueVector subVector : vector.getChildrenFromFields()) { - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(NullVector vector, Void value) { - ValidateUtil.validateOrThrow( - vector.getNullCount() == vector.getValueCount(), - "NullVector should have only null entries."); - return null; - } - - @Override - public Void visit(ExtensionTypeVector vector, Void value) { - vector.getUnderlyingVector().accept(this, value); - return null; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java deleted file mode 100644 index daad41dbdc2ce..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow; - -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; - -/** Utility to validate vector type information. */ -public class ValidateVectorTypeVisitor implements VectorVisitor { - - private void validateVectorCommon( - ValueVector vector, Class expectedArrowType) { - validateOrThrow(vector.getField() != null, "Vector field is empty."); - validateOrThrow(vector.getField().getFieldType() != null, "Vector field type is empty."); - ArrowType arrowType = vector.getField().getFieldType().getType(); - validateOrThrow(arrowType != null, "Vector arrow type is empty."); - validateOrThrow( - expectedArrowType == arrowType.getClass(), - "Incorrect arrow type for " + vector.getClass() + " : " + arrowType.toString()); - } - - private void validateIntVector(ValueVector vector, int expectedWidth, boolean expectedSigned) { - validateOrThrow( - vector.getField().getFieldType().getType() instanceof ArrowType.Int, - "Vector %s is not an integer vector.", - vector.getClass()); - ArrowType.Int intType = (ArrowType.Int) vector.getField().getFieldType().getType(); - validateOrThrow( - intType.getIsSigned() == expectedSigned, - "Expecting bit width %s, actual width %s.", - expectedWidth, - intType.getBitWidth()); - validateOrThrow( - intType.getBitWidth() == expectedWidth, - "Expecting bit width %s, actual bit width %s.", - expectedWidth, - intType.getBitWidth()); - } - - private void validateFloatingPointVector( - ValueVector vector, FloatingPointPrecision expectedPrecision) { - validateOrThrow( - vector.getField().getFieldType().getType() instanceof ArrowType.FloatingPoint, - "Vector %s is not a floating point vector.", - vector.getClass()); - ArrowType.FloatingPoint floatType = - (ArrowType.FloatingPoint) vector.getField().getFieldType().getType(); - validateOrThrow( - floatType.getPrecision() == expectedPrecision, - "Expecting precision %s, actual precision %s.", - expectedPrecision, - floatType.getPrecision()); - } - - private void validateDateVector(ValueVector vector, DateUnit expectedDateUnit) { - validateOrThrow( - vector.getField().getFieldType().getType() instanceof ArrowType.Date, - "Vector %s is not a date vector", - vector.getClass()); - ArrowType.Date dateType = (ArrowType.Date) vector.getField().getFieldType().getType(); - validateOrThrow( - dateType.getUnit() == expectedDateUnit, - "Expecting date unit %s, actual date unit %s.", - expectedDateUnit, - dateType.getUnit()); - } - - private void validateDecimalVector(ValueVector vector) { - validateOrThrow( - vector.getField().getFieldType().getType() instanceof ArrowType.Decimal, - "Vector %s is not a decimal vector", - vector.getClass()); - ArrowType.Decimal decimalType = (ArrowType.Decimal) vector.getField().getFieldType().getType(); - validateOrThrow( - decimalType.getScale() >= 0, - "The scale of decimal %s is negative.", - decimalType.getScale()); - validateOrThrow( - decimalType.getScale() <= decimalType.getPrecision(), - "The scale of decimal %s is greater than the precision %s.", - decimalType.getScale(), - decimalType.getPrecision()); - switch (decimalType.getBitWidth()) { - case DecimalVector.TYPE_WIDTH * 8: - validateOrThrow( - decimalType.getPrecision() >= 1 - && decimalType.getPrecision() <= DecimalVector.MAX_PRECISION, - "Invalid precision %s for decimal 128.", - decimalType.getPrecision()); - break; - case Decimal256Vector.TYPE_WIDTH * 8: - validateOrThrow( - decimalType.getPrecision() >= 1 - && decimalType.getPrecision() <= Decimal256Vector.MAX_PRECISION, - "Invalid precision %s for decimal 256.", - decimalType.getPrecision()); - break; - default: - throw new ValidateUtil.ValidateException( - "Only decimal 128 or decimal 256 are supported for decimal types"); - } - } - - private void validateTimeVector( - ValueVector vector, TimeUnit expectedTimeUnit, int expectedBitWidth) { - validateOrThrow( - vector.getField().getFieldType().getType() instanceof ArrowType.Time, - "Vector %s is not a time vector.", - vector.getClass()); - ArrowType.Time timeType = (ArrowType.Time) vector.getField().getFieldType().getType(); - validateOrThrow( - timeType.getUnit() == expectedTimeUnit, - "Expecting time unit %s, actual time unit %s.", - expectedTimeUnit, - timeType.getUnit()); - validateOrThrow( - timeType.getBitWidth() == expectedBitWidth, - "Expecting bit width %s, actual bit width %s.", - expectedBitWidth, - timeType.getBitWidth()); - } - - private void validateIntervalVector(ValueVector vector, IntervalUnit expectedIntervalUnit) { - validateOrThrow( - vector.getField().getFieldType().getType() instanceof ArrowType.Interval, - "Vector %s is not an interval vector.", - vector.getClass()); - ArrowType.Interval intervalType = - (ArrowType.Interval) vector.getField().getFieldType().getType(); - validateOrThrow( - intervalType.getUnit() == expectedIntervalUnit, - "Expecting interval unit %s, actual date unit %s.", - expectedIntervalUnit, - intervalType.getUnit()); - } - - private void validateTimeStampVector( - ValueVector vector, TimeUnit expectedTimeUnit, boolean expectTZ) { - validateOrThrow( - vector.getField().getFieldType().getType() instanceof ArrowType.Timestamp, - "Vector %s is not a time stamp vector.", - vector.getClass()); - ArrowType.Timestamp timestampType = - (ArrowType.Timestamp) vector.getField().getFieldType().getType(); - validateOrThrow( - timestampType.getUnit() == expectedTimeUnit, - "Expecting time stamp unit %s, actual time stamp unit %s.", - expectedTimeUnit, - timestampType.getUnit()); - if (expectTZ) { - validateOrThrow(timestampType.getTimezone() != null, "The time zone should not be null"); - } else { - validateOrThrow(timestampType.getTimezone() == null, "The time zone should be null"); - } - } - - private void validateExtensionTypeVector(ExtensionTypeVector vector) { - validateOrThrow( - vector.getField().getFieldType().getType() instanceof ArrowType.ExtensionType, - "Vector %s is not an extension type vector.", - vector.getClass()); - validateOrThrow( - vector - .getField() - .getMetadata() - .containsKey(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME), - "Field %s does not have proper extension type metadata: %s", - vector.getField().getName(), - vector.getField().getMetadata()); - // Validate the storage vector type - vector.getUnderlyingVector().accept(this, null); - } - - @Override - public Void visit(BaseFixedWidthVector vector, Void value) { - if (vector instanceof TinyIntVector) { - validateVectorCommon(vector, ArrowType.Int.class); - validateIntVector(vector, 8, true); - } else if (vector instanceof SmallIntVector) { - validateVectorCommon(vector, ArrowType.Int.class); - validateIntVector(vector, 16, true); - } else if (vector instanceof IntVector) { - validateVectorCommon(vector, ArrowType.Int.class); - validateIntVector(vector, 32, true); - } else if (vector instanceof BigIntVector) { - validateVectorCommon(vector, ArrowType.Int.class); - validateIntVector(vector, 64, true); - } else if (vector instanceof UInt1Vector) { - validateVectorCommon(vector, ArrowType.Int.class); - validateIntVector(vector, 8, false); - } else if (vector instanceof UInt2Vector) { - validateVectorCommon(vector, ArrowType.Int.class); - validateIntVector(vector, 16, false); - } else if (vector instanceof UInt4Vector) { - validateVectorCommon(vector, ArrowType.Int.class); - validateIntVector(vector, 32, false); - } else if (vector instanceof UInt8Vector) { - validateVectorCommon(vector, ArrowType.Int.class); - validateIntVector(vector, 64, false); - } else if (vector instanceof BitVector) { - validateVectorCommon(vector, ArrowType.Bool.class); - } else if (vector instanceof DecimalVector || vector instanceof Decimal256Vector) { - validateVectorCommon(vector, ArrowType.Decimal.class); - validateDecimalVector(vector); - } else if (vector instanceof DateDayVector) { - validateVectorCommon(vector, ArrowType.Date.class); - validateDateVector(vector, DateUnit.DAY); - } else if (vector instanceof DateMilliVector) { - validateVectorCommon(vector, ArrowType.Date.class); - validateDateVector(vector, DateUnit.MILLISECOND); - } else if (vector instanceof DurationVector) { - validateVectorCommon(vector, ArrowType.Duration.class); - ArrowType.Duration arrowType = (ArrowType.Duration) vector.getField().getType(); - validateOrThrow( - ((DurationVector) vector).getUnit() == arrowType.getUnit(), - "Different duration time unit for vector and arrow type. Vector time unit %s, type time unit %s.", - ((DurationVector) vector).getUnit(), - arrowType.getUnit()); - } else if (vector instanceof Float4Vector) { - validateVectorCommon(vector, ArrowType.FloatingPoint.class); - validateFloatingPointVector(vector, FloatingPointPrecision.SINGLE); - } else if (vector instanceof Float8Vector) { - validateVectorCommon(vector, ArrowType.FloatingPoint.class); - validateFloatingPointVector(vector, FloatingPointPrecision.DOUBLE); - } else if (vector instanceof IntervalDayVector) { - validateVectorCommon(vector, ArrowType.Interval.class); - validateIntervalVector(vector, IntervalUnit.DAY_TIME); - } else if (vector instanceof IntervalMonthDayNanoVector) { - validateVectorCommon(vector, ArrowType.Interval.class); - validateIntervalVector(vector, IntervalUnit.MONTH_DAY_NANO); - } else if (vector instanceof IntervalYearVector) { - validateVectorCommon(vector, ArrowType.Interval.class); - validateIntervalVector(vector, IntervalUnit.YEAR_MONTH); - } else if (vector instanceof TimeMicroVector) { - validateVectorCommon(vector, ArrowType.Time.class); - validateTimeVector(vector, TimeUnit.MICROSECOND, 64); - } else if (vector instanceof TimeMilliVector) { - validateVectorCommon(vector, ArrowType.Time.class); - validateTimeVector(vector, TimeUnit.MILLISECOND, 32); - } else if (vector instanceof TimeNanoVector) { - validateVectorCommon(vector, ArrowType.Time.class); - validateTimeVector(vector, TimeUnit.NANOSECOND, 64); - } else if (vector instanceof TimeSecVector) { - validateVectorCommon(vector, ArrowType.Time.class); - validateTimeVector(vector, TimeUnit.SECOND, 32); - } else if (vector instanceof TimeStampMicroTZVector) { - validateVectorCommon(vector, ArrowType.Timestamp.class); - validateTimeStampVector(vector, TimeUnit.MICROSECOND, true); - } else if (vector instanceof TimeStampMicroVector) { - validateVectorCommon(vector, ArrowType.Timestamp.class); - validateTimeStampVector(vector, TimeUnit.MICROSECOND, false); - } else if (vector instanceof TimeStampMilliTZVector) { - validateVectorCommon(vector, ArrowType.Timestamp.class); - validateTimeStampVector(vector, TimeUnit.MILLISECOND, true); - } else if (vector instanceof TimeStampMilliVector) { - validateVectorCommon(vector, ArrowType.Timestamp.class); - validateTimeStampVector(vector, TimeUnit.MILLISECOND, false); - } else if (vector instanceof TimeStampNanoTZVector) { - validateVectorCommon(vector, ArrowType.Timestamp.class); - validateTimeStampVector(vector, TimeUnit.NANOSECOND, true); - } else if (vector instanceof TimeStampNanoVector) { - validateVectorCommon(vector, ArrowType.Timestamp.class); - validateTimeStampVector(vector, TimeUnit.NANOSECOND, false); - } else if (vector instanceof TimeStampSecTZVector) { - validateVectorCommon(vector, ArrowType.Timestamp.class); - validateTimeStampVector(vector, TimeUnit.SECOND, true); - } else if (vector instanceof TimeStampSecVector) { - validateVectorCommon(vector, ArrowType.Timestamp.class); - validateTimeStampVector(vector, TimeUnit.SECOND, false); - } else if (vector instanceof FixedSizeBinaryVector) { - validateVectorCommon(vector, ArrowType.FixedSizeBinary.class); - ArrowType.FixedSizeBinary arrowType = (ArrowType.FixedSizeBinary) vector.getField().getType(); - validateOrThrow( - arrowType.getByteWidth() > 0, - "The byte width of a FixedSizeBinaryVector %s is not positive.", - arrowType.getByteWidth()); - validateOrThrow( - arrowType.getByteWidth() == vector.getTypeWidth(), - "Type width mismatch for FixedSizeBinaryVector. Vector type width %s, arrow type width %s.", - vector.getTypeWidth(), - arrowType.getByteWidth()); - } else { - throw new IllegalArgumentException( - "Unknown type for fixed width vector " + vector.getClass()); - } - return null; - } - - @Override - public Void visit(BaseVariableWidthVector vector, Void value) { - if (vector instanceof VarCharVector) { - validateVectorCommon(vector, ArrowType.Utf8.class); - } else if (vector instanceof VarBinaryVector) { - validateVectorCommon(vector, ArrowType.Binary.class); - } - return null; - } - - @Override - public Void visit(BaseLargeVariableWidthVector vector, Void value) { - if (vector instanceof LargeVarCharVector) { - validateVectorCommon(vector, ArrowType.LargeUtf8.class); - } else if (vector instanceof LargeVarBinaryVector) { - validateVectorCommon(vector, ArrowType.LargeBinary.class); - } - return null; - } - - @Override - public Void visit(BaseVariableWidthViewVector vector, Void value) { - throw new UnsupportedOperationException("View vectors are not supported."); - } - - @Override - public Void visit(ListVector vector, Void value) { - validateVectorCommon(vector, ArrowType.List.class); - ValueVector innerVector = vector.getDataVector(); - if (innerVector != null) { - innerVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(FixedSizeListVector vector, Void value) { - validateVectorCommon(vector, ArrowType.FixedSizeList.class); - ArrowType.FixedSizeList arrowType = (ArrowType.FixedSizeList) vector.getField().getType(); - validateOrThrow( - arrowType.getListSize() == vector.getListSize(), - "Inconsistent list size for FixedSizeListVector. Vector list size %s, arrow type list size %s.", - vector.getListSize(), - arrowType.getListSize()); - validateOrThrow( - arrowType.getListSize() > 0, "The list size %s is not positive.", arrowType.getListSize()); - ValueVector innerVector = vector.getDataVector(); - if (innerVector != null) { - innerVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(LargeListVector vector, Void value) { - validateVectorCommon(vector, ArrowType.LargeList.class); - ValueVector innerVector = vector.getDataVector(); - if (innerVector != null) { - innerVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(NonNullableStructVector vector, Void value) { - validateVectorCommon(vector, ArrowType.Struct.class); - validateOrThrow( - vector.getField().getChildren().size() == vector.getChildrenFromFields().size(), - "Child field count and child vector count mismatch. Vector child count %s, field child count %s", - vector.getChildrenFromFields().size(), - vector.getField().getChildren().size()); - for (int i = 0; i < vector.getChildrenFromFields().size(); i++) { - ValueVector subVector = vector.getChildByOrdinal(i); - FieldType subType = vector.getField().getChildren().get(i).getFieldType(); - - validateOrThrow( - subType.equals(subVector.getField().getFieldType()), - "Struct vector's field type not equal to the child vector's field type. " - + "Struct field type %s, sub-vector field type %s", - subType, - subVector.getField().getFieldType()); - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(UnionVector vector, Void value) { - validateVectorCommon(vector, ArrowType.Union.class); - ArrowType.Union arrowType = (ArrowType.Union) vector.getField().getType(); - validateOrThrow( - arrowType.getMode() == UnionMode.Sparse, "The union mode of UnionVector must be sparse"); - for (ValueVector subVector : vector.getChildrenFromFields()) { - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(DenseUnionVector vector, Void value) { - validateVectorCommon(vector, ArrowType.Union.class); - ArrowType.Union arrowType = (ArrowType.Union) vector.getField().getType(); - validateOrThrow( - arrowType.getMode() == UnionMode.Dense, "The union mode of DenseUnionVector must be dense"); - for (ValueVector subVector : vector.getChildrenFromFields()) { - subVector.accept(this, null); - } - return null; - } - - @Override - public Void visit(NullVector vector, Void value) { - validateVectorCommon(vector, ArrowType.Null.class); - return null; - } - - @Override - public Void visit(ExtensionTypeVector vector, Void value) { - validateExtensionTypeVector(vector); - return null; - } - - @Override - public Void visit(RunEndEncodedVector vector, Void value) { - validateVectorCommon(vector, ArrowType.RunEndEncoded.class); - for (ValueVector subVector : vector.getChildrenFromFields()) { - if (subVector != null) { - subVector.accept(this, null); - } - } - return null; - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java deleted file mode 100644 index 5004ba488cacd..0000000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.vector.BaseFixedWidthVector; -import org.apache.arrow.vector.BaseLargeVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.BaseVariableWidthViewVector; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.compare.VectorVisitor; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.util.ValueVectorUtility; - -/** - * Visitor to validate vector (without validating data). This visitor could be used for {@link - * ValueVector#accept(VectorVisitor, Object)} API, and also users could simply use {@link - * ValueVectorUtility#validate(ValueVector)}. - */ -public class ValidateVectorVisitor implements VectorVisitor { - - @Override - public Void visit(BaseFixedWidthVector vector, Void value) { - if (vector.getValueCount() > 0) { - if (vector.getDataBuffer() == null || vector.getDataBuffer().capacity() == 0) { - throw new IllegalArgumentException("valueBuffer is null or capacity is 0"); - } - } - return null; - } - - @Override - public Void visit(BaseVariableWidthVector vector, Void value) { - - if (vector.getValueCount() > 0) { - if (vector.getDataBuffer() == null || vector.getDataBuffer().capacity() == 0) { - throw new IllegalArgumentException("valueBuffer is null or capacity is 0"); - } - - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - int minBufferSize = (vector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH; - - if (offsetBuf.capacity() < minBufferSize) { - throw new IllegalArgumentException( - String.format( - "offsetBuffer too small in vector of type %s" - + " and valueCount %s : expected at least %s byte(s), got %s", - vector.getField().getType().toString(), - vector.getValueCount(), - minBufferSize, - offsetBuf.capacity())); - } - - int firstOffset = vector.getOffsetBuffer().getInt(0); - int lastOffset = - vector - .getOffsetBuffer() - .getInt(vector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); - - if (firstOffset < 0 || lastOffset < 0) { - throw new IllegalArgumentException("Negative offsets in vector"); - } - - int dataExtent = lastOffset - firstOffset; - - if (dataExtent > 0 && (vector.getDataBuffer().capacity() == 0)) { - throw new IllegalArgumentException("dataBuffer capacity is 0"); - } - - if (dataExtent > vector.getDataBuffer().capacity()) { - throw new IllegalArgumentException( - String.format( - "Length spanned by offsets %s larger than" + " dataBuffer capacity %s", - dataExtent, vector.getValueCount())); - } - } - return null; - } - - @Override - public Void visit(BaseLargeVariableWidthVector left, Void value) { - return null; - } - - @Override - public Void visit(BaseVariableWidthViewVector left, Void value) { - throw new UnsupportedOperationException("View vectors are not supported."); - } - - @Override - public Void visit(ListVector vector, Void value) { - - FieldVector dataVector = vector.getDataVector(); - - if (vector.getValueCount() > 0) { - - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - int minBufferSize = (vector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH; - - if (offsetBuf.capacity() < minBufferSize) { - throw new IllegalArgumentException( - String.format( - "offsetBuffer too small in vector of type %s" - + " and valueCount %s : expected at least %s byte(s), got %s", - vector.getField().getType().toString(), - vector.getValueCount(), - minBufferSize, - offsetBuf.capacity())); - } - - int firstOffset = vector.getOffsetBuffer().getInt(0); - int lastOffset = - vector - .getOffsetBuffer() - .getInt(vector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH); - - if (firstOffset < 0 || lastOffset < 0) { - throw new IllegalArgumentException("Negative offsets in list vector"); - } - - int dataExtent = lastOffset - firstOffset; - - if (dataExtent > 0 - && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) { - throw new IllegalArgumentException("valueBuffer is null or capacity is 0"); - } - - if (dataExtent > dataVector.getValueCount()) { - throw new IllegalArgumentException( - String.format( - "Length spanned by list offsets (%s) larger than" - + " data vector valueCount (length %s)", - dataExtent, dataVector.getValueCount())); - } - } - - return dataVector.accept(this, null); - } - - @Override - public Void visit(LargeListVector vector, Void value) { - - FieldVector dataVector = vector.getDataVector(); - - if (vector.getValueCount() > 0) { - - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - long minBufferSize = (vector.getValueCount() + 1) * LargeListVector.OFFSET_WIDTH; - - if (offsetBuf.capacity() < minBufferSize) { - throw new IllegalArgumentException( - String.format( - "offsetBuffer too small in vector of type %s" - + " and valueCount %s : expected at least %s byte(s), got %s", - vector.getField().getType().toString(), - vector.getValueCount(), - minBufferSize, - offsetBuf.capacity())); - } - - long firstOffset = vector.getOffsetBuffer().getLong(0); - long lastOffset = - vector.getOffsetBuffer().getLong(vector.getValueCount() * LargeListVector.OFFSET_WIDTH); - - if (firstOffset < 0 || lastOffset < 0) { - throw new IllegalArgumentException("Negative offsets in list vector"); - } - - long dataExtent = lastOffset - firstOffset; - - if (dataExtent > 0 - && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) { - throw new IllegalArgumentException("valueBuffer is null or capacity is 0"); - } - - if (dataExtent > dataVector.getValueCount()) { - throw new IllegalArgumentException( - String.format( - "Length spanned by list offsets (%s) larger than" - + " data vector valueCount (length %s)", - dataExtent, dataVector.getValueCount())); - } - } - - return dataVector.accept(this, null); - } - - @Override - public Void visit(FixedSizeListVector vector, Void value) { - - FieldVector dataVector = vector.getDataVector(); - int valueCount = vector.getValueCount(); - int listSize = vector.getListSize(); - - if (valueCount > 0 - && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) { - throw new IllegalArgumentException("valueBuffer is null or capacity is 0"); - } - - if (valueCount * listSize != dataVector.getValueCount()) { - throw new IllegalArgumentException( - String.format( - "data vector valueCount invalid, expect %s, " + "actual is: %s", - valueCount * listSize, dataVector.getValueCount())); - } - - return null; - } - - @Override - public Void visit(NonNullableStructVector vector, Void value) { - - List childFields = vector.getField().getChildren(); - final int valueCount = vector.getValueCount(); - - for (int i = 0; i < childFields.size(); i++) { - FieldVector child = vector.getChildrenFromFields().get(i); - - if (child.getValueCount() != valueCount) { - throw new IllegalArgumentException( - String.format( - "struct child vector #%s valueCount is not equals with " - + "struct vector, expect %s, actual %s", - i, vector.getValueCount(), child.getValueCount())); - } - - if (!childFields.get(i).getType().equals(child.getField().getType())) { - throw new IllegalArgumentException( - String.format( - "struct child vector #%s does not match type: %s vs %s", - i, childFields.get(i).getType().toString(), child.getField().getType().toString())); - } - - child.accept(this, null); - } - return null; - } - - @Override - public Void visit(UnionVector vector, Void value) { - - List childFields = vector.getField().getChildren(); - final int valueCount = vector.getValueCount(); - - for (int i = 0; i < childFields.size(); i++) { - FieldVector child = vector.getChildrenFromFields().get(i); - - if (child.getValueCount() != valueCount) { - throw new IllegalArgumentException( - String.format( - "union child vector #%s valueCount is not equals with union" - + " vector, expect %s, actual %s", - i, vector.getValueCount(), child.getValueCount())); - } - - if (!childFields.get(i).getType().equals(child.getField().getType())) { - throw new IllegalArgumentException( - String.format( - "union child vector #%s does not match type: %s vs %s", - i, childFields.get(i).getType().toString(), child.getField().getType().toString())); - } - - child.accept(this, null); - } - return null; - } - - @Override - public Void visit(DenseUnionVector vector, Void value) { - - List childFields = vector.getField().getChildren(); - for (int i = 0; i < childFields.size(); i++) { - FieldVector child = vector.getChildrenFromFields().get(i); - - if (!childFields.get(i).getType().equals(child.getField().getType())) { - throw new IllegalArgumentException( - String.format( - "union child vector #%s does not match type: %s vs %s", - i, childFields.get(i).getType().toString(), child.getField().getType().toString())); - } - - child.accept(this, null); - } - return null; - } - - @Override - public Void visit(NullVector vector, Void value) { - return null; - } - - @Override - public Void visit(ExtensionTypeVector vector, Void value) { - vector.getUnderlyingVector().accept(this, value); - return null; - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java b/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java deleted file mode 100644 index e650d01bcc53f..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferManager; -import org.apache.arrow.memory.RootAllocator; - -/** - * Root allocator that returns buffers pre-filled with a given value.
    - * Useful for testing if value vectors are properly zeroing their buffers. - */ -public class DirtyRootAllocator extends RootAllocator { - - private final byte fillValue; - - public DirtyRootAllocator(final long limit, final byte fillValue) { - super(limit); - this.fillValue = fillValue; - } - - @Override - public ArrowBuf buffer(long size) { - return buffer(size, null); - } - - @Override - public ArrowBuf buffer(long size, BufferManager manager) { - ArrowBuf buffer = super.buffer(size, manager); - // contaminate the buffer - for (int i = 0; i < buffer.capacity(); i++) { - buffer.setByte(i, fillValue); - } - - return buffer; - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java deleted file mode 100644 index 00e7368d40440..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.holders.NullableDecimalHolder; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Integration test for a vector with a large (more than 2GB) {@link - * org.apache.arrow.memory.ArrowBuf} as the data buffer. To run this test, please make sure there is - * at least 4GB free memory in the system. - */ -public class ITTestLargeVector { - private static final Logger logger = LoggerFactory.getLogger(ITTestLargeVector.class); - - @Test - public void testLargeLongVector() { - logger.trace("Testing large big int vector."); - - final long bufSize = 4 * 1024 * 1024 * 1024L; - final int vecLength = (int) (bufSize / BigIntVector.TYPE_WIDTH); - - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - BigIntVector largeVec = new BigIntVector("vec", allocator)) { - largeVec.allocateNew(vecLength); - - logger.trace("Successfully allocated a vector with capacity {}", vecLength); - - for (int i = 0; i < vecLength; i++) { - largeVec.set(i, i * 10L); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully written {} values", i + 1); - } - } - logger.trace("Successfully written {} values", vecLength); - - for (int i = 0; i < vecLength; i++) { - long val = largeVec.get(i); - assertEquals(i * 10L, val); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully read {} values", i + 1); - } - } - logger.trace("Successfully read {} values", vecLength); - } - logger.trace("Successfully released the large vector."); - } - - @Test - public void testLargeIntVector() { - logger.trace("Testing large int vector."); - - final long bufSize = 4 * 1024 * 1024 * 1024L; - final int vecLength = (int) (bufSize / IntVector.TYPE_WIDTH); - - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - IntVector largeVec = new IntVector("vec", allocator)) { - largeVec.allocateNew(vecLength); - - logger.trace("Successfully allocated a vector with capacity {}", vecLength); - - for (int i = 0; i < vecLength; i++) { - largeVec.set(i, i); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully written {} values", i + 1); - } - } - logger.trace("Successfully written {} values", vecLength); - - for (int i = 0; i < vecLength; i++) { - long val = largeVec.get(i); - assertEquals(i, val); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully read {} values", i + 1); - } - } - logger.trace("Successfully read {} values", vecLength); - } - logger.trace("Successfully released the large vector."); - } - - @Test - public void testLargeDecimalVector() { - logger.trace("Testing large decimal vector."); - - final long bufSize = 4 * 1024 * 1024 * 1024L; - final int vecLength = (int) (bufSize / DecimalVector.TYPE_WIDTH); - - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - DecimalVector largeVec = new DecimalVector("vec", allocator, 38, 0)) { - largeVec.allocateNew(vecLength); - - logger.trace("Successfully allocated a vector with capacity {}", vecLength); - - for (int i = 0; i < vecLength; i++) { - largeVec.set(i, 0); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully written {} values", i + 1); - } - } - logger.trace("Successfully written {} values", vecLength); - - for (int i = 0; i < vecLength; i++) { - ArrowBuf buf = largeVec.get(i); - assertEquals(DecimalVector.TYPE_WIDTH, buf.capacity()); - assertEquals(0, buf.getLong(0)); - assertEquals(0, buf.getLong(8)); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully read {} values", i + 1); - } - } - logger.trace("Successfully read {} values", vecLength); - - // try setting values with a large offset in the buffer - largeVec.set(vecLength - 1, 12345L); - assertEquals(12345L, largeVec.getObject(vecLength - 1).longValue()); - - NullableDecimalHolder holder = new NullableDecimalHolder(); - holder.buffer = largeVec.valueBuffer; - holder.isSet = 1; - holder.start = (long) (vecLength - 1) * largeVec.getTypeWidth(); - assertTrue(holder.start > Integer.MAX_VALUE); - largeVec.set(0, holder); - - BigDecimal decimal = largeVec.getObject(0); - assertEquals(12345L, decimal.longValue()); - - logger.trace("Successfully setting values from large offsets"); - } - logger.trace("Successfully released the large vector."); - } - - @Test - public void testLargeFixedSizeBinaryVector() { - logger.trace("Testing large fixed size binary vector."); - - final long bufSize = 4 * 1024 * 1024 * 1024L; - final int typeWidth = 8; - final int vecLength = (int) (bufSize / typeWidth); - - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - FixedSizeBinaryVector largeVec = new FixedSizeBinaryVector("vec", allocator, typeWidth)) { - largeVec.allocateNew(vecLength); - - logger.trace("Successfully allocated a vector with capacity {}", vecLength); - - byte[] value = new byte[] {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'}; - for (int i = 0; i < vecLength; i++) { - largeVec.set(i, value); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully written {} values", i + 1); - } - } - logger.trace("Successfully written {} values", vecLength); - - for (int i = 0; i < vecLength; i++) { - byte[] buf = largeVec.get(i); - assertEquals(typeWidth, buf.length); - assertArrayEquals(buf, value); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully read {} values", i + 1); - } - } - logger.trace("Successfully read {} values", vecLength); - } - logger.trace("Successfully released the large vector."); - } - - @Test - public void testLargeVarCharVector() { - logger.trace("Testing large var char vector."); - - final long bufSize = 4 * 1024 * 1024 * 1024L; - final int vecLength = (int) (bufSize / BaseVariableWidthVector.OFFSET_WIDTH); - final String strElement = "a"; - - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - VarCharVector largeVec = new VarCharVector("vec", allocator)) { - largeVec.allocateNew(vecLength); - - logger.trace("Successfully allocated a vector with capacity " + vecLength); - - for (int i = 0; i < vecLength; i++) { - largeVec.setSafe(i, strElement.getBytes(StandardCharsets.UTF_8)); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully written " + (i + 1) + " values"); - } - } - largeVec.setValueCount(vecLength); - assertTrue(largeVec.getOffsetBuffer().readableBytes() > Integer.MAX_VALUE); - assertTrue(largeVec.getDataBuffer().readableBytes() < Integer.MAX_VALUE); - logger.trace("Successfully written " + vecLength + " values"); - - for (int i = 0; i < vecLength; i++) { - byte[] val = largeVec.get(i); - assertEquals(strElement, new String(val, StandardCharsets.UTF_8)); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully read " + (i + 1) + " values"); - } - } - logger.trace("Successfully read " + vecLength + " values"); - } - logger.trace("Successfully released the large vector."); - } - - @Test - public void testLargeLargeVarCharVector() { - logger.trace("Testing large large var char vector."); - - final long bufSize = 4 * 1024 * 1024 * 1024L; - final int vecLength = (int) (bufSize / BaseLargeVariableWidthVector.OFFSET_WIDTH); - final String strElement = "9876543210"; - - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - LargeVarCharVector largeVec = new LargeVarCharVector("vec", allocator)) { - largeVec.allocateNew(vecLength); - - logger.trace("Successfully allocated a vector with capacity " + vecLength); - - for (int i = 0; i < vecLength; i++) { - largeVec.setSafe(i, strElement.getBytes(StandardCharsets.UTF_8)); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully written " + (i + 1) + " values"); - } - } - largeVec.setValueCount(vecLength); - assertTrue(largeVec.getOffsetBuffer().readableBytes() > Integer.MAX_VALUE); - assertTrue(largeVec.getDataBuffer().readableBytes() > Integer.MAX_VALUE); - logger.trace("Successfully written " + vecLength + " values"); - - for (int i = 0; i < vecLength; i++) { - byte[] val = largeVec.get(i); - assertEquals(strElement, new String(val, StandardCharsets.UTF_8)); - - if ((i + 1) % 10000 == 0) { - logger.trace("Successfully read " + (i + 1) + " values"); - } - } - logger.trace("Successfully read " + vecLength + " values"); - } - logger.trace("Successfully released the large vector."); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java deleted file mode 100644 index b5a993db5f21d..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java +++ /dev/null @@ -1,572 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.stream.IntStream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.hash.MurmurHasher; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestBitVector { - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testBitVectorCopyFromSafe() { - final int size = 20; - try (final BitVector src = new BitVector(EMPTY_SCHEMA_PATH, allocator); - final BitVector dst = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { - src.allocateNew(size); - dst.allocateNew(10); - - for (int i = 0; i < size; i++) { - src.set(i, i % 2); - } - src.setValueCount(size); - - for (int i = 0; i < size; i++) { - dst.copyFromSafe(i, i, src); - } - dst.setValueCount(size); - - for (int i = 0; i < size; i++) { - assertEquals(src.getObject(i), dst.getObject(i)); - } - } - } - - @Test - public void testSplitAndTransfer() throws Exception { - - try (final BitVector sourceVector = new BitVector("bitvector", allocator)) { - - sourceVector.allocateNew(40); - - /* populate the bitvector -- 010101010101010101010101..... */ - for (int i = 0; i < 40; i++) { - if ((i & 1) == 1) { - sourceVector.set(i, 1); - } else { - sourceVector.set(i, 0); - } - } - - sourceVector.setValueCount(40); - - /* check the vector output */ - for (int i = 0; i < 40; i++) { - int result = sourceVector.get(i); - if ((i & 1) == 1) { - assertEquals(Integer.toString(1), Integer.toString(result)); - } else { - assertEquals(Integer.toString(0), Integer.toString(result)); - } - } - - try (final BitVector toVector = new BitVector("toVector", allocator)) { - final TransferPair transferPair = sourceVector.makeTransferPair(toVector); - - /* - * form test cases such that we cover: - * - * (1) the start index is exactly where a particular byte starts in the source bit vector - * (2) the start index is randomly positioned within a byte in the source bit vector - * (2.1) the length is a multiple of 8 - * (2.2) the length is not a multiple of 8 - */ - final int[][] transferLengths = {{0, 8}, {8, 10}, {18, 0}, {18, 8}, {26, 0}, {26, 14}}; - - for (final int[] transferLength : transferLengths) { - final int start = transferLength[0]; - final int length = transferLength[1]; - - transferPair.splitAndTransfer(start, length); - - /* check the toVector output after doing splitAndTransfer */ - for (int i = 0; i < length; i++) { - int actual = toVector.get(i); - int expected = sourceVector.get(start + i); - assertEquals( - expected, - actual, - "different data values not expected --> sourceVector index: " - + (start + i) - + " toVector index: " - + i); - } - } - } - } - } - - @Test - public void testSplitAndTransfer1() throws Exception { - - try (final BitVector sourceVector = new BitVector("bitvector", allocator)) { - - sourceVector.allocateNew(8190); - - /* populate the bitvector */ - for (int i = 0; i < 8190; i++) { - sourceVector.set(i, 1); - } - - sourceVector.setValueCount(8190); - - /* check the vector output */ - for (int i = 0; i < 8190; i++) { - int result = sourceVector.get(i); - assertEquals(Integer.toString(1), Integer.toString(result)); - } - - try (final BitVector toVector = new BitVector("toVector", allocator)) { - final TransferPair transferPair = sourceVector.makeTransferPair(toVector); - - final int[][] transferLengths = {{0, 4095}, {4095, 4095}}; - - for (final int[] transferLength : transferLengths) { - final int start = transferLength[0]; - final int length = transferLength[1]; - - transferPair.splitAndTransfer(start, length); - - /* check the toVector output after doing splitAndTransfer */ - for (int i = 0; i < length; i++) { - int actual = toVector.get(i); - int expected = sourceVector.get(start + i); - assertEquals( - expected, - actual, - "different data values not expected --> sourceVector index: " - + (start + i) - + " toVector index: " - + i); - } - } - } - } - } - - @Test - public void testSplitAndTransfer2() throws Exception { - - try (final BitVector sourceVector = new BitVector("bitvector", allocator)) { - - sourceVector.allocateNew(32); - - /* populate the bitvector */ - for (int i = 0; i < 32; i++) { - if ((i & 1) == 1) { - sourceVector.set(i, 1); - } else { - sourceVector.set(i, 0); - } - } - - sourceVector.setValueCount(32); - - /* check the vector output */ - for (int i = 0; i < 32; i++) { - int result = sourceVector.get(i); - if ((i & 1) == 1) { - assertEquals(Integer.toString(1), Integer.toString(result)); - } else { - assertEquals(Integer.toString(0), Integer.toString(result)); - } - } - - try (final BitVector toVector = new BitVector("toVector", allocator)) { - final TransferPair transferPair = sourceVector.makeTransferPair(toVector); - - final int[][] transferLengths = { - {5, 22}, {5, 24}, {5, 25}, {5, 27}, {0, 31}, {5, 7}, {2, 3} - }; - - for (final int[] transferLength : transferLengths) { - final int start = transferLength[0]; - final int length = transferLength[1]; - - transferPair.splitAndTransfer(start, length); - - /* check the toVector output after doing splitAndTransfer */ - for (int i = 0; i < length; i++) { - int actual = toVector.get(i); - int expected = sourceVector.get(start + i); - assertEquals( - expected, - actual, - "different data values not expected --> sourceVector index: " - + (start + i) - + " toVector index: " - + i); - } - } - } - } - } - - @Test - public void testReallocAfterVectorTransfer1() { - try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(4096); - int valueCapacity = vector.getValueCapacity(); - assertEquals(4096, valueCapacity); - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - vector.setToOne(i); - } - } - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); - } - } - - /* trigger first realloc */ - vector.setSafeToOne(valueCapacity); - assertEquals(valueCapacity * 2, vector.getValueCapacity()); - - for (int i = valueCapacity; i < valueCapacity * 2; i++) { - if ((i & 1) == 1) { - vector.setToOne(i); - } - } - - for (int i = 0; i < valueCapacity * 2; i++) { - if (((i & 1) == 1) || (i == valueCapacity)) { - assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); - } - } - - /* trigger second realloc */ - vector.setSafeToOne(valueCapacity * 2); - assertEquals(valueCapacity * 4, vector.getValueCapacity()); - - for (int i = valueCapacity * 2; i < valueCapacity * 4; i++) { - if ((i & 1) == 1) { - vector.setToOne(i); - } - } - - for (int i = 0; i < valueCapacity * 4; i++) { - if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) { - assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); - } - } - - /* now transfer the vector */ - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - final BitVector toVector = (BitVector) transferPair.getTo(); - - assertEquals(valueCapacity * 4, toVector.getValueCapacity()); - - /* realloc the toVector */ - toVector.setSafeToOne(valueCapacity * 4); - - for (int i = 0; i < toVector.getValueCapacity(); i++) { - if (i <= valueCapacity * 4) { - if (((i & 1) == 1) - || (i == valueCapacity) - || (i == valueCapacity * 2) - || (i == valueCapacity * 4)) { - assertEquals(1, toVector.get(i), "unexpected cleared bit at index: " + i); - } else { - assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); - } - } else { - assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); - } - } - - toVector.close(); - } - } - - @Test - public void testReallocAfterVectorTransfer2() { - try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(4096); - int valueCapacity = vector.getValueCapacity(); - assertEquals(4096, valueCapacity); - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - vector.set(i, 1); - } - } - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); - } - } - - /* trigger first realloc */ - vector.setSafe(valueCapacity, 1, 1); - assertEquals(valueCapacity * 2, vector.getValueCapacity()); - - for (int i = valueCapacity; i < valueCapacity * 2; i++) { - if ((i & 1) == 1) { - vector.set(i, 1); - } - } - - for (int i = 0; i < valueCapacity * 2; i++) { - if (((i & 1) == 1) || (i == valueCapacity)) { - assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); - } - } - - /* trigger second realloc */ - vector.setSafe(valueCapacity * 2, 1, 1); - assertEquals(valueCapacity * 4, vector.getValueCapacity()); - - for (int i = valueCapacity * 2; i < valueCapacity * 4; i++) { - if ((i & 1) == 1) { - vector.set(i, 1); - } - } - - for (int i = 0; i < valueCapacity * 4; i++) { - if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) { - assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); - } - } - - /* now transfer the vector */ - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - final BitVector toVector = (BitVector) transferPair.getTo(); - - assertEquals(valueCapacity * 4, toVector.getValueCapacity()); - - /* realloc the toVector */ - toVector.setSafe(valueCapacity * 4, 1, 1); - - for (int i = 0; i < toVector.getValueCapacity(); i++) { - if (i <= valueCapacity * 4) { - if (((i & 1) == 1) - || (i == valueCapacity) - || (i == valueCapacity * 2) - || (i == valueCapacity * 4)) { - assertFalse(toVector.isNull(i), "unexpected cleared bit at index: " + i); - } else { - assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); - } - } else { - assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); - } - } - - toVector.close(); - } - } - - @Test - public void testBitVector() { - // Create a new value vector for 1024 integers - try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024); - vector.setValueCount(1024); - - // Put and set a few values - vector.set(0, 1); - vector.set(1, 0); - vector.set(100, 0); - vector.set(1022, 1); - - vector.setValueCount(1024); - - assertEquals(1, vector.get(0)); - assertEquals(0, vector.get(1)); - assertEquals(0, vector.get(100)); - assertEquals(1, vector.get(1022)); - - assertEquals(1020, vector.getNullCount()); - - // test setting the same value twice - vector.set(0, 1); - vector.set(0, 1); - vector.set(1, 0); - vector.set(1, 0); - assertEquals(1, vector.get(0)); - assertEquals(0, vector.get(1)); - - // test toggling the values - vector.set(0, 0); - vector.set(1, 1); - assertEquals(0, vector.get(0)); - assertEquals(1, vector.get(1)); - - // should not change - assertEquals(1020, vector.getNullCount()); - - // Ensure null value - assertTrue(vector.isNull(3)); - - // unset the previously set bits - vector.setNull(0); - vector.setNull(1); - vector.setNull(100); - vector.setNull(1022); - // this should set all the array to 0 - assertEquals(1024, vector.getNullCount()); - - // set all the array to 1 - for (int i = 0; i < 1024; ++i) { - assertEquals(1024 - i, vector.getNullCount()); - vector.set(i, 1); - } - - assertEquals(0, vector.getNullCount()); - - vector.allocateNew(1015); - vector.setValueCount(1015); - - // ensure it has been zeroed - assertEquals(1015, vector.getNullCount()); - - vector.set(0, 1); - vector.set(1014, 1); // ensure that the last item of the last byte is allocated - - assertEquals(1013, vector.getNullCount()); - - vector.zeroVector(); - assertEquals(1015, vector.getNullCount()); - - // set all the array to 1 - for (int i = 0; i < 1015; ++i) { - assertEquals(1015 - i, vector.getNullCount()); - vector.set(i, 1); - } - - assertEquals(0, vector.getNullCount()); - } - } - - @Test - public void testBitVectorRangeSetAllOnes() { - validateRange(1000, 0, 1000); - validateRange(1000, 0, 1); - validateRange(1000, 1, 2); - validateRange(1000, 5, 6); - validateRange(1000, 5, 10); - validateRange(1000, 5, 150); - validateRange(1000, 5, 27); - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 8; j++) { - validateRange(1000, 10 + i, 27 + j); - validateRange(1000, i, j); - } - } - } - - private void validateRange(int length, int start, int count) { - String desc = "[" + start + ", " + (start + count) + ") "; - try (BitVector bitVector = new BitVector("bits", allocator)) { - bitVector.reset(); - bitVector.allocateNew(length); - bitVector.setRangeToOne(start, count); - for (int i = 0; i < start; i++) { - assertTrue(bitVector.isNull(i), desc + i); - } - for (int i = start; i < start + count; i++) { - assertEquals(1, bitVector.get(i), desc + i); - } - for (int i = start + count; i < length; i++) { - assertTrue(bitVector.isNull(i), desc + i); - } - } - } - - @Test - public void testBitVectorHashCode() { - final int size = 6; - try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { - ValueVectorDataPopulator.setVector(vector, 0, 1, null, 0, 1, null); - - int[] hashCodes = new int[size]; - IntStream.range(0, size).forEach(i -> hashCodes[i] = vector.hashCode(i)); - - assertTrue(hashCodes[0] == hashCodes[3]); - assertTrue(hashCodes[1] == hashCodes[4]); - assertTrue(hashCodes[2] == hashCodes[5]); - - assertFalse(hashCodes[0] == hashCodes[1]); - assertFalse(hashCodes[0] == hashCodes[2]); - assertFalse(hashCodes[1] == hashCodes[2]); - - MurmurHasher hasher = new MurmurHasher(); - - IntStream.range(0, size).forEach(i -> hashCodes[i] = vector.hashCode(i, hasher)); - - assertTrue(hashCodes[0] == hashCodes[3]); - assertTrue(hashCodes[1] == hashCodes[4]); - assertTrue(hashCodes[2] == hashCodes[5]); - - assertFalse(hashCodes[0] == hashCodes[1]); - assertFalse(hashCodes[0] == hashCodes[2]); - assertFalse(hashCodes[1] == hashCodes[2]); - } - } - - @Test - public void testGetTransferPairWithField() { - final BitVector fromVector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), allocator); - final BitVector toVector = (BitVector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(fromVector.getField(), toVector.getField()); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java deleted file mode 100644 index 68aa61962ba3f..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.MemoryUtil; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.junit.jupiter.api.Test; - -public class TestBitVectorHelper { - @Test - public void testGetNullCount() throws Exception { - try (BufferAllocator root = new RootAllocator()) { - // test case 1, 1 null value for 0b110 - ArrowBuf validityBuffer = root.buffer(3); - // we set validity buffer to be 0b10110, but only have 3 items with 1st item is null - validityBuffer.setByte(0, 0b10110); - - // we will only consider 0b110 here, since we only 3 items and only one is null - int count = BitVectorHelper.getNullCount(validityBuffer, 3); - assertEquals(count, 1); - validityBuffer.close(); - - // test case 2, no null value for 0xFF - validityBuffer = root.buffer(8); - validityBuffer.setByte(0, 0xFF); - - count = BitVectorHelper.getNullCount(validityBuffer, 8); - assertEquals(0, count); - validityBuffer.close(); - - // test case 3, 1 null value for 0x7F - validityBuffer = root.buffer(8); - validityBuffer.setByte(0, 0x7F); - - count = BitVectorHelper.getNullCount(validityBuffer, 8); - assertEquals(1, count); - validityBuffer.close(); - - // test case 4, validity buffer has multiple bytes, 11 items - validityBuffer = root.buffer(11); - validityBuffer.setByte(0, 0b10101010); - validityBuffer.setByte(1, 0b01010101); - - count = BitVectorHelper.getNullCount(validityBuffer, 11); - assertEquals(5, count); - validityBuffer.close(); - } - } - - @Test - public void testAllBitsNull() { - final int bufferLength = 32 * 1024; - try (RootAllocator allocator = new RootAllocator(bufferLength); - ArrowBuf validityBuffer = allocator.buffer(bufferLength)) { - - validityBuffer.setZero(0, bufferLength); - int bitLength = 1024; - assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); - - bitLength = 1027; - assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); - - validityBuffer.setZero(0, bufferLength); - bitLength = 1025; - BitVectorHelper.setBit(validityBuffer, 12); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); - - validityBuffer.setZero(0, bufferLength); - bitLength = 1025; - BitVectorHelper.setBit(validityBuffer, 1024); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); - - validityBuffer.setZero(0, bufferLength); - bitLength = 1026; - BitVectorHelper.setBit(validityBuffer, 1024); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); - - validityBuffer.setZero(0, bufferLength); - bitLength = 1027; - BitVectorHelper.setBit(validityBuffer, 1025); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); - - validityBuffer.setZero(0, bufferLength); - bitLength = 1031; - BitVectorHelper.setBit(validityBuffer, 1029); - BitVectorHelper.setBit(validityBuffer, 1030); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false)); - } - } - - @Test - public void testAllBitsSet() { - final int bufferLength = 32 * 1024; - try (RootAllocator allocator = new RootAllocator(bufferLength); - ArrowBuf validityBuffer = allocator.buffer(bufferLength)) { - - MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); - int bitLength = 1024; - assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); - - bitLength = 1028; - assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); - - MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); - bitLength = 1025; - BitVectorHelper.unsetBit(validityBuffer, 12); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); - - MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); - bitLength = 1025; - BitVectorHelper.unsetBit(validityBuffer, 1024); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); - - MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); - bitLength = 1026; - BitVectorHelper.unsetBit(validityBuffer, 1024); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); - - MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); - bitLength = 1027; - BitVectorHelper.unsetBit(validityBuffer, 1025); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); - - MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1); - bitLength = 1031; - BitVectorHelper.unsetBit(validityBuffer, 1029); - BitVectorHelper.unsetBit(validityBuffer, 1030); - assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true)); - } - } - - @Test - public void testConcatBits() { - try (RootAllocator allocator = new RootAllocator(1024 * 1024)) { - try (ArrowBuf buf1 = allocator.buffer(1024); - ArrowBuf buf2 = allocator.buffer(1024); - ArrowBuf output = allocator.buffer(1024)) { - - buf1.setZero(0, buf1.capacity()); - buf2.setZero(0, buf2.capacity()); - - final int maxCount = 100; - for (int i = 0; i < maxCount; i++) { - if (i % 3 == 0) { - BitVectorHelper.setBit(buf1, i); - BitVectorHelper.setBit(buf2, i); - } - } - - // test the case where the number of bits for both sets are multiples of 8. - concatAndVerify(buf1, 40, buf2, 48, output); - - // only the number of bits in the first set is a multiple of 8 - concatAndVerify(buf1, 32, buf2, 47, output); - - // only the number of bits in the second set is a multiple of 8 - concatAndVerify(buf1, 31, buf2, 48, output); - - // neither set has a size that is a multiple of 8 - concatAndVerify(buf1, 27, buf2, 52, output); - - // the remaining bits in the second set is spread in two bytes - concatAndVerify(buf1, 31, buf2, 55, output); - } - } - } - - @Test - public void testConcatBitsInPlace() { - try (RootAllocator allocator = new RootAllocator(1024 * 1024)) { - try (ArrowBuf buf1 = allocator.buffer(1024); - ArrowBuf buf2 = allocator.buffer(1024)) { - - buf1.setZero(0, buf1.capacity()); - buf2.setZero(0, buf2.capacity()); - - final int maxCount = 100; - for (int i = 0; i < maxCount; i++) { - if (i % 3 == 0) { - BitVectorHelper.setBit(buf1, i); - BitVectorHelper.setBit(buf2, i); - } - } - - // test the case where the number of bits for both sets are multiples of 8. - concatAndVerify(buf1, 40, buf2, 48, buf1); - - // only the number of bits in the first set is a multiple of 8 - concatAndVerify(buf1, 32, buf2, 47, buf1); - - // only the number of bits in the second set is a multiple of 8 - concatAndVerify(buf1, 31, buf2, 48, buf1); - - // neither set has a size that is a multiple of 8 - concatAndVerify(buf1, 27, buf2, 52, buf1); - - // the remaining bits in the second set is spread in two bytes - concatAndVerify(buf1, 31, buf2, 55, buf1); - } - } - } - - @Test - public void testLoadValidityBuffer() { - try (RootAllocator allocator = new RootAllocator(1024)) { - // if the input validity buffer is all null, we should allocate new memory - ArrowFieldNode fieldNode = new ArrowFieldNode(1024, 1024); - try (ArrowBuf buf = BitVectorHelper.loadValidityBuffer(fieldNode, null, allocator)) { - assertEquals(128, allocator.getAllocatedMemory()); - for (int i = 0; i < 128; i++) { - assertEquals(0, buf.getByte(i)); - } - } - - // should also allocate memory if input validity buffer is all not-null - fieldNode = new ArrowFieldNode(1024, 0); - try (ArrowBuf buf = BitVectorHelper.loadValidityBuffer(fieldNode, null, allocator)) { - assertEquals(128, allocator.getAllocatedMemory()); - for (int i = 0; i < 128; i++) { - assertEquals((byte) 0xff, buf.getByte(i)); - } - } - - // should not allocate memory if input validity buffer is not null, even if all values are - // null - fieldNode = new ArrowFieldNode(1024, 1024); - try (ArrowBuf src = allocator.buffer(128); - ArrowBuf dst = BitVectorHelper.loadValidityBuffer(fieldNode, src, allocator)) { - assertEquals(128, allocator.getAllocatedMemory()); - } - - // ... similarly if all values are not null - fieldNode = new ArrowFieldNode(1024, 0); - try (ArrowBuf src = allocator.buffer(128); - ArrowBuf dst = BitVectorHelper.loadValidityBuffer(fieldNode, src, allocator)) { - assertEquals(128, allocator.getAllocatedMemory()); - } - - // mixed case, input should match output - int numNulls = 100; - fieldNode = new ArrowFieldNode(1024, numNulls); - try (ArrowBuf src = allocator.buffer(128)) { - src.setZero(0, src.capacity()); - for (int i = 0; i < numNulls; i++) { - BitVectorHelper.setBit(src, i); - } - try (ArrowBuf dst = BitVectorHelper.loadValidityBuffer(fieldNode, src, allocator)) { - assertEquals(src.memoryAddress(), dst.memoryAddress()); - assertEquals(128, allocator.getAllocatedMemory()); - for (int i = 0; i < 1024; i++) { - assertEquals(BitVectorHelper.get(src, i), BitVectorHelper.get(dst, i)); - } - } - } - } - } - - private void concatAndVerify( - ArrowBuf buf1, int count1, ArrowBuf buf2, int count2, ArrowBuf output) { - BitVectorHelper.concatBits(buf1, count1, buf2, count2, output); - int outputIdx = 0; - for (int i = 0; i < count1; i++, outputIdx++) { - assertEquals(BitVectorHelper.get(output, outputIdx), BitVectorHelper.get(buf1, i)); - } - for (int i = 0; i < count2; i++, outputIdx++) { - assertEquals(BitVectorHelper.get(output, outputIdx), BitVectorHelper.get(buf2, i)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java deleted file mode 100644 index 3a0bd1c18646a..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.ReferenceManager; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.junit.jupiter.api.Test; - -public class TestBufferOwnershipTransfer { - - @Test - public void testTransferFixedWidth() { - BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 100000, 100000); - BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 100000, 100000); - - IntVector v1 = new IntVector("v1", childAllocator1); - v1.allocateNew(); - v1.setValueCount(4095); - long totalAllocatedMemory = childAllocator1.getAllocatedMemory(); - - IntVector v2 = new IntVector("v2", childAllocator2); - - v1.makeTransferPair(v2).transfer(); - - assertEquals(0, childAllocator1.getAllocatedMemory()); - assertEquals(totalAllocatedMemory, childAllocator2.getAllocatedMemory()); - - v1.close(); - v2.close(); - childAllocator1.close(); - childAllocator2.close(); - allocator.close(); - } - - @Test - public void testTransferVariableWidth() { - BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 100000, 100000); - BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 100000, 100000); - - VarCharVector v1 = new VarCharVector("v1", childAllocator1); - v1.allocateNew(); - v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11); - v1.setValueCount(4001); - - VarCharVector v2 = new VarCharVector("v2", childAllocator2); - long memoryBeforeTransfer = childAllocator1.getAllocatedMemory(); - - v1.makeTransferPair(v2).transfer(); - - assertEquals(0, childAllocator1.getAllocatedMemory()); - assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory()); - - v1.close(); - v2.close(); - childAllocator1.close(); - childAllocator2.close(); - allocator.close(); - } - - private static class Pointer { - T value; - } - - private static CallBack newTriggerCallback(final Pointer trigger) { - trigger.value = false; - return new CallBack() { - @Override - public void doWork() { - trigger.value = true; - } - }; - } - - @Test - public void emptyListTransferShouldNotTriggerSchemaChange() { - final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - - final Pointer trigger1 = new Pointer<>(); - final Pointer trigger2 = new Pointer<>(); - final ListVector v1 = - new ListVector( - "v1", - allocator, - FieldType.nullable(ArrowType.Null.INSTANCE), - newTriggerCallback(trigger1)); - final ListVector v2 = - new ListVector( - "v2", - allocator, - FieldType.nullable(ArrowType.Null.INSTANCE), - newTriggerCallback(trigger2)); - - try { - // since we are working with empty vectors, their internal - // buffers will be allocator.EMPTY which use - // ReferenceManager.NO_OP instance and transfer() is not - // supported - v1.makeTransferPair(v2).transfer(); - } catch (Exception e) { - assertTrue(e instanceof UnsupportedOperationException); - assertTrue(e.getMessage().contains(ReferenceManager.NO_OP_ERROR_MESSAGE)); - } - - assertFalse(trigger1.value); - assertFalse(trigger2.value); - - v1.close(); - v2.close(); - allocator.close(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java deleted file mode 100644 index bd0b44a14452a..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java +++ /dev/null @@ -1,1101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.TestUtils.newVector; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.time.Duration; -import java.time.Period; -import java.util.Objects; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.types.Types.MinorType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/* - * Tested field types: - * - * NullableInt - * NullableBigInt - * NullableFloat4 - * NullableFloat8 - * NullableBit - * NullableDecimal - * NullableIntervalDay - * NullableIntervalYear - * NullableSmallInt - * NullableTinyInt - * NullableVarChar - * NullableTimeMicro - * NullableTimeMilli - * NullableTimeStamp* - */ - -public class TestCopyFrom { - - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test /* NullableVarChar */ - public void testCopyFromWithNulls() { - try (final VarCharVector vector = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); - final VarCharVector vector2 = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { - vector.allocateNew(); - assertTrue(vector.getValueCapacity() >= 1); - assertEquals(0, vector.getValueCount()); - int initialCapacity = vector.getValueCapacity(); - - for (int i = 0; i < initialCapacity; i++) { - if (i % 3 == 0) { - continue; - } - byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); - vector.setSafe(i, b, 0, b.length); - } - - /* NO reAlloc() should have happened in setSafe() */ - int capacity = vector.getValueCapacity(); - assertEquals(initialCapacity, capacity); - - vector.setValueCount(initialCapacity); - - for (int i = 0; i < initialCapacity; i++) { - if (i % 3 == 0) { - assertNull(vector.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - - vector2.setInitialCapacity(initialCapacity); - vector2.allocateNew(); - capacity = vector2.getValueCapacity(); - assertEquals(initialCapacity, capacity); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector); - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector2.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - - /* NO reAlloc() should have happened in copyFrom */ - capacity = vector2.getValueCapacity(); - assertEquals(initialCapacity, capacity); - - vector2.setValueCount(initialCapacity); - - for (int i = 0; i < initialCapacity; i++) { - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector2.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - } - } - - @Test /* NullableVarChar */ - public void testCopyFromWithNulls1() { - try (final VarCharVector vector = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); - final VarCharVector vector2 = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { - vector.allocateNew(); - assertTrue(vector.getValueCapacity() >= 1); - assertEquals(0, vector.getValueCount()); - int initialCapacity = vector.getValueCapacity(); - - for (int i = 0; i < initialCapacity; i++) { - if (i % 3 == 0) { - continue; - } - byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); - vector.setSafe(i, b, 0, b.length); - } - - /* NO reAlloc() should have happened in setSafe() */ - int capacity = vector.getValueCapacity(); - assertEquals(initialCapacity, capacity); - - vector.setValueCount(initialCapacity); - - for (int i = 0; i < initialCapacity; i++) { - if (i % 3 == 0) { - assertNull(vector.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew((initialCapacity / 4) * 10, initialCapacity / 4); - - capacity = vector2.getValueCapacity(); - assertTrue(capacity >= initialCapacity / 4); - assertTrue(capacity < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector); - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector2.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - - /* 2 reAllocs should have happened in copyFromSafe() */ - capacity = vector2.getValueCapacity(); - assertTrue(capacity >= initialCapacity); - - vector2.setValueCount(initialCapacity); - - for (int i = 0; i < initialCapacity; i++) { - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector2.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - } - } - - @Test /* IntVector */ - public void testCopyFromWithNulls2() { - try (final IntVector vector1 = new IntVector(EMPTY_SCHEMA_PATH, allocator); - final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, 1000 + i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals(1000 + i, vector1.get(i), "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals(1000 + i, vector2.get(i), "unexpected value at index: " + i); - } - } - } - } - - @Test /* BigIntVector */ - public void testCopyFromWithNulls3() { - try (final BigIntVector vector1 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator); - final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, 10000000000L + (long) i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals(10000000000L + (long) i, vector1.get(i), "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals(10000000000L + (long) i, vector2.get(i), "unexpected value at index: " + i); - } - } - } - } - - @Test /* BitVector */ - public void testCopyFromWithNulls4() { - try (final BitVector vector1 = new BitVector(EMPTY_SCHEMA_PATH, allocator); - final BitVector vector2 = new BitVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.setInitialCapacity(4096); - vector1.allocateNew(); - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(0, vector1.getValueCount()); - - int counter = 0; - for (int i = 0; i < 4096; i++) { - if ((i & 1) == 0) { - continue; - } - if ((counter & 1) == 0) { - vector1.setSafe(i, 1); - } else { - vector1.setSafe(i, 0); - } - counter++; - } - - vector1.setValueCount(4096); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(4096, vector1.getValueCapacity()); - assertEquals(4096, vector1.getValueCount()); - - counter = 0; - for (int i = 0; i < 4096; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - if ((counter & 1) == 0) { - assertTrue(vector1.getObject(i)); - } else { - assertFalse(vector1.getObject(i)); - } - counter++; - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(1024); - assertEquals(1024, vector2.getValueCapacity()); - - for (int i = 0; i < 4096; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertEquals(4096, vector2.getValueCapacity()); - vector2.setValueCount(8192); - /* setValueCount() should have done another realloc */ - assertEquals(8192, vector2.getValueCount()); - assertEquals(8192, vector2.getValueCapacity()); - - /* check vector data after copy and realloc */ - counter = 0; - for (int i = 0; i < 8192; i++) { - if (((i & 1) == 0) || (i >= 4096)) { - assertNull(vector2.getObject(i)); - } else { - if ((counter & 1) == 0) { - assertTrue(vector2.getObject(i)); - } else { - assertFalse(vector2.getObject(i)); - } - counter++; - } - } - } - } - - @Test /* Float4Vector */ - public void testCopyFromWithNulls5() { - try (final Float4Vector vector1 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator); - final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, 100.25f + (float) i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals(100.25f + (float) i, vector1.get(i), 0, "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals(100.25f + i * 1.0f, vector2.get(i), 0, "unexpected value at index: " + i); - } - } - } - } - - @Test /* Float8Vector */ - public void testCopyFromWithNulls6() { - try (final Float8Vector vector1 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator); - final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, 123456.7865 + (double) i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals( - 123456.7865 + (double) i, vector1.get(i), 0, "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - 123456.7865 + (double) i, vector2.get(i), 0, "unexpected value at index: " + i); - } - } - } - } - - @Test /* IntervalDayVector */ - public void testCopyFromWithNulls7() { - try (final IntervalDayVector vector1 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator); - final IntervalDayVector vector2 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - final int days = 10; - final int milliseconds = 10000; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, days + i, milliseconds + i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - final Duration d = vector1.getObject(i); - assertEquals(days + i, d.toDays()); - assertEquals(milliseconds + i, d.minusDays(days + i).toMillis()); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - final Duration d = vector2.getObject(i); - assertEquals(days + i, d.toDays()); - assertEquals(milliseconds + i, d.minusDays(days + i).toMillis()); - } - } - } - } - - @Test /* IntervalYearVector */ - public void testCopyFromWithNulls8() { - try (final IntervalYearVector vector1 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator); - final IntervalYearVector vector2 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - final int interval = 30; /* 2 years 6 months */ - final Period[] periods = new Period[4096]; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, interval + i); - final int years = (interval + i) / org.apache.arrow.vector.util.DateUtility.yearsToMonths; - final int months = (interval + i) % org.apache.arrow.vector.util.DateUtility.yearsToMonths; - periods[i] = Period.ofYears(years).plusMonths(months).normalized(); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - final Period p = vector1.getObject(i).normalized(); - assertEquals(interval + i, vector1.get(i)); - assertEquals(periods[i], p); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - final Period p = vector2.getObject(i).normalized(); - assertEquals(periods[i], p); - } - } - } - } - - @Test /* SmallIntVector */ - public void testCopyFromWithNulls9() { - try (final SmallIntVector vector1 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator); - final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - final short val = 1000; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, val + (short) i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals(val + (short) i, vector1.get(i), "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals(val + (short) i, vector2.get(i), "unexpected value at index: " + i); - } - } - } - } - - @Test /* TimeMicroVector */ - public void testCopyFromWithNulls10() { - try (final TimeMicroVector vector1 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator); - final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - final long val = 100485765432L; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, val + (long) i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals(val + (long) i, vector1.get(i), "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals(val + (long) i, vector2.get(i), "unexpected value at index: " + i); - } - } - } - } - - @Test /* TimeMilliVector */ - public void testCopyFromWithNulls11() { - try (final TimeMilliVector vector1 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator); - final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - final int val = 1000; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, val + i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals(val + i, vector1.get(i), "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals(val + i, vector2.get(i), "unexpected value at index: " + i); - } - } - } - } - - @Test /* TinyIntVector */ - public void testCopyFromWithNulls12() { - try (final TinyIntVector vector1 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator); - final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - byte val = -128; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, val); - val++; - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - val = -128; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals(val, vector1.get(i), "unexpected value at index: " + i); - val++; - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - val = -128; - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals(val, vector2.get(i), "unexpected value at index: " + i); - val++; - } - } - } - } - - @Test /* DecimalVector */ - public void testCopyFromWithNulls13() { - try (final DecimalVector vector1 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16); - final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - final double baseValue = 104567897654.87654; - final BigDecimal[] decimals = new BigDecimal[4096]; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - BigDecimal decimal = new BigDecimal(baseValue + (double) i); - vector1.setSafe(i, decimal); - decimals[i] = decimal; - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - final BigDecimal decimal = vector1.getObject(i); - assertEquals(decimals[i], decimal); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - final BigDecimal decimal = vector2.getObject(i); - assertEquals(decimals[i], decimal); - } - } - } - } - - @Test /* TimeStampVector */ - public void testCopyFromWithNulls14() { - try (final TimeStampVector vector1 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator); - final TimeStampVector vector2 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) { - - vector1.allocateNew(); - assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION); - assertEquals(0, vector1.getValueCount()); - int initialCapacity = vector1.getValueCapacity(); - - final long val = 20145678912L; - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - continue; - } - vector1.setSafe(i, val + (long) i); - } - - vector1.setValueCount(initialCapacity); - - /* No realloc should have happened in setSafe or - * setValueCount - */ - assertEquals(initialCapacity, vector1.getValueCapacity()); - assertEquals(initialCapacity, vector1.getValueCount()); - - for (int i = 0; i < initialCapacity; i++) { - if ((i & 1) == 0) { - assertNull(vector1.getObject(i)); - } else { - assertEquals(val + (long) i, vector1.get(i), "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(initialCapacity / 4); - assertTrue(vector2.getValueCapacity() >= initialCapacity / 4); - assertTrue(vector2.getValueCapacity() < initialCapacity / 2); - - for (int i = 0; i < initialCapacity; i++) { - vector2.copyFromSafe(i, i, vector1); - } - - /* 2 realloc should have happened in copyFromSafe() */ - assertTrue(vector2.getValueCapacity() >= initialCapacity); - vector2.setValueCount(initialCapacity * 2); - /* setValueCount() should have done another realloc */ - assertEquals(initialCapacity * 2, vector2.getValueCount()); - assertTrue(vector2.getValueCapacity() >= initialCapacity * 2); - - /* check vector data after copy and realloc */ - for (int i = 0; i < initialCapacity * 2; i++) { - if (((i & 1) == 0) || (i >= initialCapacity)) { - assertNull(vector2.getObject(i)); - } else { - assertEquals(val + (long) i, vector2.get(i), "unexpected value at index: " + i); - } - } - } - } - - @Test // https://issues.apache.org/jira/browse/ARROW-7837 - public void testCopySafeArrow7837() { - // this test exposes a bug in `handleSafe` where - // it reads a stale index and as a result missed a required resize of the value vector. - try (VarCharVector vc1 = new VarCharVector("vc1", allocator); - VarCharVector vc2 = new VarCharVector("vc2", allocator); ) { - // initial size is carefully set in order to force the second 'copyFromSafe' operation - // to trigger a reallocation of the vector. - vc2.setInitialCapacity(/*valueCount*/ 20, /*density*/ 0.5); - - vc1.setSafe(0, "1234567890".getBytes(StandardCharsets.UTF_8)); - assertFalse(vc1.isNull(0)); - assertEquals("1234567890", Objects.requireNonNull(vc1.getObject(0)).toString()); - - vc2.copyFromSafe(0, 0, vc1); - assertFalse(vc2.isNull(0)); - assertEquals("1234567890", Objects.requireNonNull(vc2.getObject(0)).toString()); - - vc2.copyFromSafe(0, 5, vc1); - assertTrue(vc2.isNull(1)); - assertTrue(vc2.isNull(2)); - assertTrue(vc2.isNull(3)); - assertTrue(vc2.isNull(4)); - assertFalse(vc2.isNull(5)); - assertEquals("1234567890", Objects.requireNonNull(vc2.getObject(5)).toString()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java deleted file mode 100644 index c155ab98fa2e9..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import java.math.BigInteger; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestDecimal256Vector { - - private static long[] intValues; - - static { - intValues = new long[60]; - for (int i = 0; i < intValues.length / 2; i++) { - intValues[i] = 1L << (i + 1); - intValues[2 * i] = -1L * (1 << (i + 1)); - } - } - - private int scale = 3; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testValuesWriteRead() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, - "decimal", - new ArrowType.Decimal(10, scale, 256), - allocator); ) { - - try (Decimal256Vector oldConstructor = - new Decimal256Vector("decimal", allocator, 10, scale); ) { - assertEquals(decimalVector.getField().getType(), oldConstructor.getField().getType()); - } - - decimalVector.allocateNew(); - BigDecimal[] values = new BigDecimal[intValues.length]; - for (int i = 0; i < intValues.length; i++) { - BigDecimal decimal = new BigDecimal(BigInteger.valueOf(intValues[i]), scale); - values[i] = decimal; - decimalVector.setSafe(i, decimal); - } - - decimalVector.setValueCount(intValues.length); - - for (int i = 0; i < intValues.length; i++) { - BigDecimal value = decimalVector.getObject(i); - assertEquals(values[i], value, "unexpected data at index: " + i); - } - } - } - - @Test - public void testDecimal256DifferentScaleAndPrecision() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(4, 2, 256), allocator)) { - decimalVector.allocateNew(); - - // test Decimal256 with different scale - { - BigDecimal decimal = new BigDecimal(BigInteger.valueOf(0), 3); - UnsupportedOperationException ue = - assertThrows( - UnsupportedOperationException.class, () -> decimalVector.setSafe(0, decimal)); - assertEquals( - "BigDecimal scale must equal that in the Arrow vector: 3 != 2", ue.getMessage()); - } - - // test BigDecimal with larger precision than initialized - { - BigDecimal decimal = new BigDecimal(BigInteger.valueOf(12345), 2); - UnsupportedOperationException ue = - assertThrows( - UnsupportedOperationException.class, () -> decimalVector.setSafe(0, decimal)); - assertEquals( - "BigDecimal precision cannot be greater than that in the Arrow vector: 5 > 4", - ue.getMessage()); - } - } - } - - @Test - public void testWriteBigEndian() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(38, 18, 256), allocator); ) { - decimalVector.allocateNew(); - BigDecimal decimal1 = new BigDecimal("123456789.000000000000000000"); - BigDecimal decimal2 = new BigDecimal("11.123456789123456789"); - BigDecimal decimal3 = new BigDecimal("1.000000000000000000"); - BigDecimal decimal4 = new BigDecimal("0.111111111000000000"); - BigDecimal decimal5 = new BigDecimal("987654321.123456789000000000"); - BigDecimal decimal6 = new BigDecimal("222222222222.222222222000000000"); - BigDecimal decimal7 = new BigDecimal("7777777777777.666666667000000000"); - BigDecimal decimal8 = new BigDecimal("1212121212.343434343000000000"); - - byte[] decimalValue1 = decimal1.unscaledValue().toByteArray(); - byte[] decimalValue2 = decimal2.unscaledValue().toByteArray(); - byte[] decimalValue3 = decimal3.unscaledValue().toByteArray(); - byte[] decimalValue4 = decimal4.unscaledValue().toByteArray(); - byte[] decimalValue5 = decimal5.unscaledValue().toByteArray(); - byte[] decimalValue6 = decimal6.unscaledValue().toByteArray(); - byte[] decimalValue7 = decimal7.unscaledValue().toByteArray(); - byte[] decimalValue8 = decimal8.unscaledValue().toByteArray(); - - decimalVector.setBigEndian(0, decimalValue1); - decimalVector.setBigEndian(1, decimalValue2); - decimalVector.setBigEndian(2, decimalValue3); - decimalVector.setBigEndian(3, decimalValue4); - decimalVector.setBigEndian(4, decimalValue5); - decimalVector.setBigEndian(5, decimalValue6); - decimalVector.setBigEndian(6, decimalValue7); - decimalVector.setBigEndian(7, decimalValue8); - - decimalVector.setValueCount(8); - assertEquals(8, decimalVector.getValueCount()); - assertEquals(decimal1, decimalVector.getObject(0)); - assertEquals(decimal2, decimalVector.getObject(1)); - assertEquals(decimal3, decimalVector.getObject(2)); - assertEquals(decimal4, decimalVector.getObject(3)); - assertEquals(decimal5, decimalVector.getObject(4)); - assertEquals(decimal6, decimalVector.getObject(5)); - assertEquals(decimal7, decimalVector.getObject(6)); - assertEquals(decimal8, decimalVector.getObject(7)); - } - } - - @Test - public void testLongReadWrite() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(38, 0, 256), allocator)) { - decimalVector.allocateNew(); - - long[] longValues = {0L, -2L, Long.MAX_VALUE, Long.MIN_VALUE, 187L}; - - for (int i = 0; i < longValues.length; ++i) { - decimalVector.set(i, longValues[i]); - } - - decimalVector.setValueCount(longValues.length); - - for (int i = 0; i < longValues.length; ++i) { - assertEquals(new BigDecimal(longValues[i]), decimalVector.getObject(i)); - } - } - } - - @Test - public void testBigDecimalReadWrite() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(38, 9, 256), allocator); ) { - decimalVector.allocateNew(); - BigDecimal decimal1 = new BigDecimal("123456789.000000000"); - BigDecimal decimal2 = new BigDecimal("11.123456789"); - BigDecimal decimal3 = new BigDecimal("1.000000000"); - BigDecimal decimal4 = new BigDecimal("-0.111111111"); - BigDecimal decimal5 = new BigDecimal("-987654321.123456789"); - BigDecimal decimal6 = new BigDecimal("-222222222222.222222222"); - BigDecimal decimal7 = new BigDecimal("7777777777777.666666667"); - BigDecimal decimal8 = new BigDecimal("1212121212.343434343"); - - decimalVector.set(0, decimal1); - decimalVector.set(1, decimal2); - decimalVector.set(2, decimal3); - decimalVector.set(3, decimal4); - decimalVector.set(4, decimal5); - decimalVector.set(5, decimal6); - decimalVector.set(6, decimal7); - decimalVector.set(7, decimal8); - - decimalVector.setValueCount(8); - assertEquals(8, decimalVector.getValueCount()); - assertEquals(decimal1, decimalVector.getObject(0)); - assertEquals(decimal2, decimalVector.getObject(1)); - assertEquals(decimal3, decimalVector.getObject(2)); - assertEquals(decimal4, decimalVector.getObject(3)); - assertEquals(decimal5, decimalVector.getObject(4)); - assertEquals(decimal6, decimalVector.getObject(5)); - assertEquals(decimal7, decimalVector.getObject(6)); - assertEquals(decimal8, decimalVector.getObject(7)); - } - } - - /** - * Test {@link Decimal256Vector#setBigEndian(int, byte[])} which takes BE layout input and stores - * in native-endian (NE) layout. Cases to cover: input byte array in different lengths in range - * [1-16] and negative values. - */ - @Test - public void decimalBE2NE() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(23, 2, 256), allocator)) { - decimalVector.allocateNew(); - - BigInteger[] testBigInts = - new BigInteger[] { - new BigInteger("0"), - new BigInteger("-1"), - new BigInteger("23"), - new BigInteger("234234"), - new BigInteger("-234234234"), - new BigInteger("234234234234"), - new BigInteger("-56345345345345"), - new BigInteger( - "2982346298346289346293467923465345634500"), // converts to 16+ byte array - new BigInteger( - "-389457298347598237459832459823434653600"), // converts to 16+ byte array - new BigInteger("-345345"), - new BigInteger("754533") - }; - - int insertionIdx = 0; - insertionIdx++; // insert a null - for (BigInteger val : testBigInts) { - decimalVector.setBigEndian(insertionIdx++, val.toByteArray()); - } - insertionIdx++; // insert a null - // insert a zero length buffer - decimalVector.setBigEndian(insertionIdx++, new byte[0]); - - // Try inserting a buffer larger than 33 bytes and expect a failure - final int insertionIdxCapture = insertionIdx; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, - () -> decimalVector.setBigEndian(insertionIdxCapture, new byte[33])); - assertTrue( - ex.getMessage().equals("Invalid decimal value length. Valid length in [1 - 32], got 33")); - decimalVector.setValueCount(insertionIdx); - - // retrieve values and check if they are correct - int outputIdx = 0; - assertTrue(decimalVector.isNull(outputIdx++)); - for (BigInteger expected : testBigInts) { - final BigDecimal actual = decimalVector.getObject(outputIdx++); - assertEquals(expected, actual.unscaledValue()); - } - assertTrue(decimalVector.isNull(outputIdx++)); - assertEquals(BigInteger.valueOf(0), decimalVector.getObject(outputIdx).unscaledValue()); - } - } - - @Test - public void setUsingArrowBufOfLEInts() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(5, 2, 256), allocator); - ArrowBuf buf = allocator.buffer(8); ) { - decimalVector.allocateNew(); - - // add a positive value equivalent to 705.32 - int val = 70532; - buf.setInt(0, val); - decimalVector.setSafe(0, 0, buf, 4); - - // add a -ve value equivalent to -705.32 - val = -70532; - buf.setInt(4, val); - decimalVector.setSafe(1, 4, buf, 4); - - decimalVector.setValueCount(2); - - BigDecimal[] expectedValues = - new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal.valueOf(-705.32)}; - for (int i = 0; i < 2; i++) { - BigDecimal value = decimalVector.getObject(i); - assertEquals(expectedValues[i], value); - } - } - } - - @Test - public void setUsingArrowLongLEBytes() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(18, 0, 256), allocator); - ArrowBuf buf = allocator.buffer(16); ) { - decimalVector.allocateNew(); - - long val = Long.MAX_VALUE; - buf.setLong(0, val); - decimalVector.setSafe(0, 0, buf, 8); - - val = Long.MIN_VALUE; - buf.setLong(8, val); - decimalVector.setSafe(1, 8, buf, 8); - - decimalVector.setValueCount(2); - - BigDecimal[] expectedValues = - new BigDecimal[] {BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal.valueOf(Long.MIN_VALUE)}; - for (int i = 0; i < 2; i++) { - BigDecimal value = decimalVector.getObject(i); - assertEquals(expectedValues[i], value); - } - } - } - - @Test - public void setUsingArrowBufOfBEBytes() { - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(5, 2, 256), allocator); - ArrowBuf buf = allocator.buffer(9); ) { - BigDecimal[] expectedValues = - new BigDecimal[] { - BigDecimal.valueOf(705.32), BigDecimal.valueOf(-705.32), BigDecimal.valueOf(705.32) - }; - verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 3); - } - - try (Decimal256Vector decimalVector = - TestUtils.newVector( - Decimal256Vector.class, "decimal", new ArrowType.Decimal(43, 2, 256), allocator); - ArrowBuf buf = allocator.buffer(45); ) { - BigDecimal[] expectedValues = - new BigDecimal[] { - new BigDecimal("29823462983462893462934679234653450000000.63"), - new BigDecimal("-2982346298346289346293467923465345.63"), - new BigDecimal("2982346298346289346293467923465345.63") - }; - verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 15); - } - } - - @Test - public void testGetTransferPairWithField() { - final Decimal256Vector fromVector = new Decimal256Vector("decimal", allocator, 10, scale); - final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), allocator); - final Decimal256Vector toVector = (Decimal256Vector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(fromVector.getField(), toVector.getField()); - } - - private void verifyWritingArrowBufWithBigEndianBytes( - Decimal256Vector decimalVector, ArrowBuf buf, BigDecimal[] expectedValues, int length) { - decimalVector.allocateNew(); - for (int i = 0; i < expectedValues.length; i++) { - byte[] bigEndianBytes = expectedValues[i].unscaledValue().toByteArray(); - buf.setBytes(length * i, bigEndianBytes, 0, bigEndianBytes.length); - decimalVector.setBigEndianSafe(i, length * i, buf, bigEndianBytes.length); - } - - decimalVector.setValueCount(3); - - for (int i = 0; i < expectedValues.length; i++) { - BigDecimal value = decimalVector.getObject(i); - assertEquals(expectedValues[i], value); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java deleted file mode 100644 index d5310bad0ea0e..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.math.BigDecimal; -import java.math.BigInteger; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestDecimalVector { - - private static long[] intValues; - - static { - intValues = new long[60]; - for (int i = 0; i < intValues.length / 2; i++) { - intValues[i] = 1L << (i + 1); - intValues[2 * i] = -1L * (1 << (i + 1)); - } - } - - private int scale = 3; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testValuesWriteRead() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(10, scale, 128), allocator); ) { - - try (DecimalVector oldConstructor = new DecimalVector("decimal", allocator, 10, scale); ) { - assertEquals(decimalVector.getField().getType(), oldConstructor.getField().getType()); - } - - decimalVector.allocateNew(); - BigDecimal[] values = new BigDecimal[intValues.length]; - for (int i = 0; i < intValues.length; i++) { - BigDecimal decimal = new BigDecimal(BigInteger.valueOf(intValues[i]), scale); - values[i] = decimal; - decimalVector.setSafe(i, decimal); - } - - decimalVector.setValueCount(intValues.length); - - for (int i = 0; i < intValues.length; i++) { - BigDecimal value = decimalVector.getObject(i); - assertEquals(values[i], value, "unexpected data at index: " + i); - } - } - } - - @Test - public void testBigDecimalDifferentScaleAndPrecision() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(4, 2, 128), allocator); ) { - decimalVector.allocateNew(); - - // test BigDecimal with different scale - boolean hasError = false; - try { - BigDecimal decimal = new BigDecimal(BigInteger.valueOf(0), 3); - decimalVector.setSafe(0, decimal); - } catch (UnsupportedOperationException ue) { - hasError = true; - } finally { - assertTrue(hasError); - } - - // test BigDecimal with larger precision than initialized - hasError = false; - try { - BigDecimal decimal = new BigDecimal(BigInteger.valueOf(12345), 2); - decimalVector.setSafe(0, decimal); - } catch (UnsupportedOperationException ue) { - hasError = true; - } finally { - assertTrue(hasError); - } - } - } - - @Test - public void testWriteBigEndian() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(38, 9, 128), allocator); ) { - decimalVector.allocateNew(); - BigDecimal decimal1 = new BigDecimal("123456789.000000000"); - BigDecimal decimal2 = new BigDecimal("11.123456789"); - BigDecimal decimal3 = new BigDecimal("1.000000000"); - BigDecimal decimal4 = new BigDecimal("0.111111111"); - BigDecimal decimal5 = new BigDecimal("987654321.123456789"); - BigDecimal decimal6 = new BigDecimal("222222222222.222222222"); - BigDecimal decimal7 = new BigDecimal("7777777777777.666666667"); - BigDecimal decimal8 = new BigDecimal("1212121212.343434343"); - - byte[] decimalValue1 = decimal1.unscaledValue().toByteArray(); - byte[] decimalValue2 = decimal2.unscaledValue().toByteArray(); - byte[] decimalValue3 = decimal3.unscaledValue().toByteArray(); - byte[] decimalValue4 = decimal4.unscaledValue().toByteArray(); - byte[] decimalValue5 = decimal5.unscaledValue().toByteArray(); - byte[] decimalValue6 = decimal6.unscaledValue().toByteArray(); - byte[] decimalValue7 = decimal7.unscaledValue().toByteArray(); - byte[] decimalValue8 = decimal8.unscaledValue().toByteArray(); - - decimalVector.setBigEndian(0, decimalValue1); - decimalVector.setBigEndian(1, decimalValue2); - decimalVector.setBigEndian(2, decimalValue3); - decimalVector.setBigEndian(3, decimalValue4); - decimalVector.setBigEndian(4, decimalValue5); - decimalVector.setBigEndian(5, decimalValue6); - decimalVector.setBigEndian(6, decimalValue7); - decimalVector.setBigEndian(7, decimalValue8); - - decimalVector.setValueCount(8); - assertEquals(8, decimalVector.getValueCount()); - assertEquals(decimal1, decimalVector.getObject(0)); - assertEquals(decimal2, decimalVector.getObject(1)); - assertEquals(decimal3, decimalVector.getObject(2)); - assertEquals(decimal4, decimalVector.getObject(3)); - assertEquals(decimal5, decimalVector.getObject(4)); - assertEquals(decimal6, decimalVector.getObject(5)); - assertEquals(decimal7, decimalVector.getObject(6)); - assertEquals(decimal8, decimalVector.getObject(7)); - } - } - - @Test - public void testLongReadWrite() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(38, 0, 128), allocator)) { - decimalVector.allocateNew(); - - long[] longValues = {0L, -2L, Long.MAX_VALUE, Long.MIN_VALUE, 187L}; - - for (int i = 0; i < longValues.length; ++i) { - decimalVector.set(i, longValues[i]); - } - - decimalVector.setValueCount(longValues.length); - - for (int i = 0; i < longValues.length; ++i) { - assertEquals(new BigDecimal(longValues[i]), decimalVector.getObject(i)); - } - } - } - - @Test - public void testBigDecimalReadWrite() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(38, 9, 128), allocator); ) { - decimalVector.allocateNew(); - BigDecimal decimal1 = new BigDecimal("123456789.000000000"); - BigDecimal decimal2 = new BigDecimal("11.123456789"); - BigDecimal decimal3 = new BigDecimal("1.000000000"); - BigDecimal decimal4 = new BigDecimal("-0.111111111"); - BigDecimal decimal5 = new BigDecimal("-987654321.123456789"); - BigDecimal decimal6 = new BigDecimal("-222222222222.222222222"); - BigDecimal decimal7 = new BigDecimal("7777777777777.666666667"); - BigDecimal decimal8 = new BigDecimal("1212121212.343434343"); - - decimalVector.set(0, decimal1); - decimalVector.set(1, decimal2); - decimalVector.set(2, decimal3); - decimalVector.set(3, decimal4); - decimalVector.set(4, decimal5); - decimalVector.set(5, decimal6); - decimalVector.set(6, decimal7); - decimalVector.set(7, decimal8); - - decimalVector.setValueCount(8); - assertEquals(8, decimalVector.getValueCount()); - assertEquals(decimal1, decimalVector.getObject(0)); - assertEquals(decimal2, decimalVector.getObject(1)); - assertEquals(decimal3, decimalVector.getObject(2)); - assertEquals(decimal4, decimalVector.getObject(3)); - assertEquals(decimal5, decimalVector.getObject(4)); - assertEquals(decimal6, decimalVector.getObject(5)); - assertEquals(decimal7, decimalVector.getObject(6)); - assertEquals(decimal8, decimalVector.getObject(7)); - } - } - - /** - * Test {@link DecimalVector#setBigEndian(int, byte[])} which takes BE layout input and stores in - * native-endian (NE) layout. Cases to cover: input byte array in different lengths in range - * [1-16] and negative values. - */ - @Test - public void decimalBE2NE() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(21, 2, 128), allocator)) { - decimalVector.allocateNew(); - - BigInteger[] testBigInts = - new BigInteger[] { - new BigInteger("0"), - new BigInteger("-1"), - new BigInteger("23"), - new BigInteger("234234"), - new BigInteger("-234234234"), - new BigInteger("234234234234"), - new BigInteger("-56345345345345"), - new BigInteger("29823462983462893462934679234653456345"), // converts to 16 byte array - new BigInteger("-3894572983475982374598324598234346536"), // converts to 16 byte array - new BigInteger("-345345"), - new BigInteger("754533") - }; - - int insertionIdx = 0; - insertionIdx++; // insert a null - for (BigInteger val : testBigInts) { - decimalVector.setBigEndian(insertionIdx++, val.toByteArray()); - } - insertionIdx++; // insert a null - // insert a zero length buffer - decimalVector.setBigEndian(insertionIdx++, new byte[0]); - - // Try inserting a buffer larger than 16bytes and expect a failure - try { - decimalVector.setBigEndian(insertionIdx, new byte[17]); - fail("above statement should have failed"); - } catch (IllegalArgumentException ex) { - assertTrue( - ex.getMessage() - .equals("Invalid decimal value length. Valid length in [1 - 16], got 17")); - } - decimalVector.setValueCount(insertionIdx); - - // retrieve values and check if they are correct - int outputIdx = 0; - assertTrue(decimalVector.isNull(outputIdx++)); - for (BigInteger expected : testBigInts) { - final BigDecimal actual = decimalVector.getObject(outputIdx++); - assertEquals(expected, actual.unscaledValue()); - } - assertTrue(decimalVector.isNull(outputIdx++)); - assertEquals(BigInteger.valueOf(0), decimalVector.getObject(outputIdx).unscaledValue()); - } - } - - @Test - public void setUsingArrowBufOfInts() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(5, 2, 128), allocator); - ArrowBuf buf = allocator.buffer(8); ) { - decimalVector.allocateNew(); - - // add a positive value equivalent to 705.32 - int val = 70532; - buf.setInt(0, val); - decimalVector.setSafe(0, 0, buf, 4); - - // add a -ve value equivalent to -705.32 - val = -70532; - buf.setInt(4, val); - decimalVector.setSafe(1, 4, buf, 4); - - decimalVector.setValueCount(2); - - BigDecimal[] expectedValues = - new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal.valueOf(-705.32)}; - for (int i = 0; i < 2; i++) { - BigDecimal value = decimalVector.getObject(i); - assertEquals(expectedValues[i], value); - } - } - } - - @Test - public void setUsingArrowLongBytes() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(18, 0, 128), allocator); - ArrowBuf buf = allocator.buffer(16); ) { - decimalVector.allocateNew(); - - long val = Long.MAX_VALUE; - buf.setLong(0, val); - decimalVector.setSafe(0, 0, buf, 8); - - val = Long.MIN_VALUE; - buf.setLong(8, val); - decimalVector.setSafe(1, 8, buf, 8); - - decimalVector.setValueCount(2); - - BigDecimal[] expectedValues = - new BigDecimal[] {BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal.valueOf(Long.MIN_VALUE)}; - for (int i = 0; i < 2; i++) { - BigDecimal value = decimalVector.getObject(i); - assertEquals(expectedValues[i], value); - } - } - } - - @Test - public void setUsingArrowBufOfBEBytes() { - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(5, 2, 128), allocator); - ArrowBuf buf = allocator.buffer(9); ) { - BigDecimal[] expectedValues = - new BigDecimal[] { - BigDecimal.valueOf(705.32), BigDecimal.valueOf(-705.32), BigDecimal.valueOf(705.32) - }; - verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 3); - } - - try (DecimalVector decimalVector = - TestUtils.newVector( - DecimalVector.class, "decimal", new ArrowType.Decimal(36, 2, 128), allocator); - ArrowBuf buf = allocator.buffer(45); ) { - BigDecimal[] expectedValues = - new BigDecimal[] { - new BigDecimal("2982346298346289346293467923465345.63"), - new BigDecimal("-2982346298346289346293467923465345.63"), - new BigDecimal("2982346298346289346293467923465345.63") - }; - verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 15); - } - } - - @Test - public void testGetTransferPairWithField() { - final DecimalVector fromVector = new DecimalVector("decimal", allocator, 10, scale); - final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), allocator); - final DecimalVector toVector = (DecimalVector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(fromVector.getField(), toVector.getField()); - } - - private void verifyWritingArrowBufWithBigEndianBytes( - DecimalVector decimalVector, ArrowBuf buf, BigDecimal[] expectedValues, int length) { - decimalVector.allocateNew(); - for (int i = 0; i < expectedValues.length; i++) { - byte[] bigEndianBytes = expectedValues[i].unscaledValue().toByteArray(); - buf.setBytes(length * i, bigEndianBytes, 0, bigEndianBytes.length); - decimalVector.setBigEndianSafe(i, length * i, buf, bigEndianBytes.length); - } - - decimalVector.setValueCount(3); - - for (int i = 0; i < expectedValues.length; i++) { - BigDecimal value = decimalVector.getObject(i); - assertEquals(expectedValues[i], value); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java deleted file mode 100644 index 9cd89d57ff074..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java +++ /dev/null @@ -1,709 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.VectorWithOrdinal; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableBitHolder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.JsonStringHashMap; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestDenseUnionVector { - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testDenseUnionVector() throws Exception { - - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 100; - uInt4Holder.isSet = 1; - - try (DenseUnionVector unionVector = - new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { - unionVector.allocateNew(); - - // write some data - byte uint4TypeId = - unionVector.registerNewTypeId(Field.nullable("", MinorType.UINT4.getType())); - unionVector.setTypeId(0, uint4TypeId); - unionVector.setSafe(0, uInt4Holder); - unionVector.setTypeId(2, uint4TypeId); - unionVector.setSafe(2, uInt4Holder); - unionVector.setValueCount(4); - - // check that what we wrote is correct - assertEquals(4, unionVector.getValueCount()); - - assertEquals(false, unionVector.isNull(0)); - assertEquals(100, unionVector.getObject(0)); - - assertNull(unionVector.getObject(1)); - - assertEquals(false, unionVector.isNull(2)); - assertEquals(100, unionVector.getObject(2)); - - assertNull(unionVector.getObject(3)); - } - } - - @Test - public void testSetOffset() { - try (DenseUnionVector duv = DenseUnionVector.empty("foo", allocator)) { - duv.allocateNew(); - byte i32TypeId = duv.registerNewTypeId(Field.notNullable("i32", MinorType.INT.getType())); - byte f64TypeId = duv.registerNewTypeId(Field.notNullable("f64", MinorType.FLOAT8.getType())); - - IntVector i32Vector = ((IntVector) duv.addVector(i32TypeId, new IntVector("i32", allocator))); - Float8Vector f64Vector = - ((Float8Vector) duv.addVector(f64TypeId, new Float8Vector("f64", allocator))); - - i32Vector.allocateNew(3); - f64Vector.allocateNew(1); - - duv.setTypeId(0, i32TypeId); - duv.setOffset(0, 0); - i32Vector.set(0, 42); - - duv.setTypeId(1, i32TypeId); - duv.setOffset(1, 1); - i32Vector.set(1, 43); - - duv.setTypeId(2, f64TypeId); - duv.setOffset(2, 0); - f64Vector.set(0, 3.14); - - duv.setTypeId(3, i32TypeId); - duv.setOffset(3, 2); - i32Vector.set(2, 44); - - duv.setValueCount(4); - - assertEquals(42, duv.getObject(0)); - assertEquals(43, duv.getObject(1)); - assertEquals(3.14, duv.getObject(2)); - assertEquals(44, duv.getObject(3)); - } - } - - @Test - public void testTransfer() throws Exception { - try (DenseUnionVector srcVector = - new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { - srcVector.allocateNew(); - - // write some data - byte intTypeId = srcVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType())); - srcVector.setTypeId(0, intTypeId); - srcVector.setSafe(0, newIntHolder(5)); - byte bitTypeId = srcVector.registerNewTypeId(Field.nullable("", MinorType.BIT.getType())); - srcVector.setTypeId(1, bitTypeId); - srcVector.setSafe(1, newBitHolder(false)); - srcVector.setTypeId(3, intTypeId); - srcVector.setSafe(3, newIntHolder(10)); - srcVector.setTypeId(5, bitTypeId); - srcVector.setSafe(5, newBitHolder(false)); - srcVector.setValueCount(6); - - try (DenseUnionVector destVector = - new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { - TransferPair pair = srcVector.makeTransferPair(destVector); - - // Creating the transfer should transfer the type of the field at least. - assertEquals(srcVector.getField(), destVector.getField()); - - // transfer - pair.transfer(); - - assertEquals(srcVector.getField(), destVector.getField()); - - // now check the values are transferred - assertEquals(6, destVector.getValueCount()); - - assertFalse(destVector.isNull(0)); - assertEquals(5, destVector.getObject(0)); - - assertFalse(destVector.isNull(1)); - assertEquals(false, destVector.getObject(1)); - - assertNull(destVector.getObject(2)); - - assertFalse(destVector.isNull(3)); - assertEquals(10, destVector.getObject(3)); - - assertNull(destVector.getObject(4)); - - assertFalse(destVector.isNull(5)); - assertEquals(false, destVector.getObject(5)); - } - } - } - - @Test - public void testSplitAndTransfer() throws Exception { - try (DenseUnionVector sourceVector = - new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { - - sourceVector.allocateNew(); - - /* populate the UnionVector */ - byte intTypeId = sourceVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType())); - sourceVector.setTypeId(0, intTypeId); - sourceVector.setSafe(0, newIntHolder(5)); - sourceVector.setTypeId(1, intTypeId); - sourceVector.setSafe(1, newIntHolder(10)); - sourceVector.setTypeId(2, intTypeId); - sourceVector.setSafe(2, newIntHolder(15)); - sourceVector.setTypeId(3, intTypeId); - sourceVector.setSafe(3, newIntHolder(20)); - sourceVector.setTypeId(4, intTypeId); - sourceVector.setSafe(4, newIntHolder(25)); - sourceVector.setTypeId(5, intTypeId); - sourceVector.setSafe(5, newIntHolder(30)); - sourceVector.setTypeId(6, intTypeId); - sourceVector.setSafe(6, newIntHolder(35)); - sourceVector.setTypeId(7, intTypeId); - sourceVector.setSafe(7, newIntHolder(40)); - sourceVector.setTypeId(8, intTypeId); - sourceVector.setSafe(8, newIntHolder(45)); - sourceVector.setTypeId(9, intTypeId); - sourceVector.setSafe(9, newIntHolder(50)); - sourceVector.setValueCount(10); - - /* check the vector output */ - assertEquals(10, sourceVector.getValueCount()); - assertEquals(false, sourceVector.isNull(0)); - assertEquals(5, sourceVector.getObject(0)); - assertEquals(false, sourceVector.isNull(1)); - assertEquals(10, sourceVector.getObject(1)); - assertEquals(false, sourceVector.isNull(2)); - assertEquals(15, sourceVector.getObject(2)); - assertEquals(false, sourceVector.isNull(3)); - assertEquals(20, sourceVector.getObject(3)); - assertEquals(false, sourceVector.isNull(4)); - assertEquals(25, sourceVector.getObject(4)); - assertEquals(false, sourceVector.isNull(5)); - assertEquals(30, sourceVector.getObject(5)); - assertEquals(false, sourceVector.isNull(6)); - assertEquals(35, sourceVector.getObject(6)); - assertEquals(false, sourceVector.isNull(7)); - assertEquals(40, sourceVector.getObject(7)); - assertEquals(false, sourceVector.isNull(8)); - assertEquals(45, sourceVector.getObject(8)); - assertEquals(false, sourceVector.isNull(9)); - assertEquals(50, sourceVector.getObject(9)); - - try (DenseUnionVector toVector = - new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { - toVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType())); - - final TransferPair transferPair = sourceVector.makeTransferPair(toVector); - - final int[][] transferLengths = {{0, 3}, {3, 1}, {4, 2}, {6, 1}, {7, 1}, {8, 2}}; - - for (final int[] transferLength : transferLengths) { - final int start = transferLength[0]; - final int length = transferLength[1]; - - transferPair.splitAndTransfer(start, length); - - /* check the toVector output after doing the splitAndTransfer */ - for (int i = 0; i < length; i++) { - assertEquals( - sourceVector.getObject(start + i), - toVector.getObject(i), - "Different data at indexes: " + (start + i) + "and " + i); - } - } - } - } - } - - @Test - public void testSplitAndTransferWithMixedVectors() throws Exception { - try (DenseUnionVector sourceVector = - new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { - - sourceVector.allocateNew(); - - /* populate the UnionVector */ - byte intTypeId = sourceVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType())); - - sourceVector.setTypeId(0, intTypeId); - sourceVector.setSafe(0, newIntHolder(5)); - - byte float4TypeId = - sourceVector.registerNewTypeId(Field.nullable("", MinorType.FLOAT4.getType())); - - sourceVector.setTypeId(1, float4TypeId); - sourceVector.setSafe(1, newFloat4Holder(5.5f)); - - sourceVector.setTypeId(2, intTypeId); - sourceVector.setSafe(2, newIntHolder(10)); - - sourceVector.setTypeId(3, float4TypeId); - sourceVector.setSafe(3, newFloat4Holder(10.5f)); - - sourceVector.setTypeId(4, intTypeId); - sourceVector.setSafe(4, newIntHolder(15)); - - sourceVector.setTypeId(5, float4TypeId); - sourceVector.setSafe(5, newFloat4Holder(15.5f)); - - sourceVector.setTypeId(6, intTypeId); - sourceVector.setSafe(6, newIntHolder(20)); - - sourceVector.setTypeId(7, float4TypeId); - sourceVector.setSafe(7, newFloat4Holder(20.5f)); - - sourceVector.setTypeId(8, intTypeId); - sourceVector.setSafe(8, newIntHolder(30)); - - sourceVector.setTypeId(9, float4TypeId); - sourceVector.setSafe(9, newFloat4Holder(30.5f)); - sourceVector.setValueCount(10); - - /* check the vector output */ - assertEquals(10, sourceVector.getValueCount()); - assertEquals(false, sourceVector.isNull(0)); - assertEquals(5, sourceVector.getObject(0)); - assertEquals(false, sourceVector.isNull(1)); - assertEquals(5.5f, sourceVector.getObject(1)); - assertEquals(false, sourceVector.isNull(2)); - assertEquals(10, sourceVector.getObject(2)); - assertEquals(false, sourceVector.isNull(3)); - assertEquals(10.5f, sourceVector.getObject(3)); - assertEquals(false, sourceVector.isNull(4)); - assertEquals(15, sourceVector.getObject(4)); - assertEquals(false, sourceVector.isNull(5)); - assertEquals(15.5f, sourceVector.getObject(5)); - assertEquals(false, sourceVector.isNull(6)); - assertEquals(20, sourceVector.getObject(6)); - assertEquals(false, sourceVector.isNull(7)); - assertEquals(20.5f, sourceVector.getObject(7)); - assertEquals(false, sourceVector.isNull(8)); - assertEquals(30, sourceVector.getObject(8)); - assertEquals(false, sourceVector.isNull(9)); - assertEquals(30.5f, sourceVector.getObject(9)); - - try (DenseUnionVector toVector = - new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { - toVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType())); - toVector.registerNewTypeId(Field.nullable("", MinorType.FLOAT4.getType())); - - final TransferPair transferPair = sourceVector.makeTransferPair(toVector); - - final int[][] transferLengths = {{0, 2}, {2, 1}, {3, 2}, {5, 3}, {8, 2}}; - - for (final int[] transferLength : transferLengths) { - final int start = transferLength[0]; - final int length = transferLength[1]; - - transferPair.splitAndTransfer(start, length); - - /* check the toVector output after doing the splitAndTransfer */ - for (int i = 0; i < length; i++) { - assertEquals( - sourceVector.getObject(start + i), - toVector.getObject(i), - "Different values at index: " + i); - } - } - } - } - } - - @Test - public void testSplitAndTransferDuvInStruct() { - try (StructVector struct = StructVector.empty("struct", allocator)) { - DenseUnionVector duv = - struct.addOrGet( - "duv", FieldType.notNullable(MinorType.DENSEUNION.getType()), DenseUnionVector.class); - byte i32TypeId = duv.registerNewTypeId(Field.notNullable("i32", MinorType.INT.getType())); - duv.addVector(i32TypeId, new IntVector("i32", allocator)); - - struct.setIndexDefined(0); - duv.setTypeId(0, i32TypeId); - duv.setSafe(0, newIntHolder(42)); - - struct.setNull(1); - struct.setValueCount(2); - - try (StructVector dest = StructVector.empty("dest", allocator)) { - TransferPair pair = struct.makeTransferPair(dest); - pair.splitAndTransfer(0, 2); - - assertEquals(2, dest.getValueCount()); - assertFalse(dest.isNull(0)); - assertEquals(42, dest.getObject(0).get("duv")); - assertTrue(dest.isNull(1)); - } - } - } - - @Test - public void testGetFieldTypeInfo() throws Exception { - Map metadata = new HashMap<>(); - metadata.put("key1", "value1"); - - int[] typeIds = new int[2]; - typeIds[0] = 0; - typeIds[1] = 1; - - List children = new ArrayList<>(); - children.add(new Field("int", FieldType.nullable(MinorType.INT.getType()), null)); - children.add(new Field("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), null)); - - final FieldType fieldType = - new FieldType( - false, new ArrowType.Union(UnionMode.Dense, typeIds), /*dictionary=*/ null, metadata); - final Field field = new Field("union", fieldType, children); - - MinorType minorType = MinorType.DENSEUNION; - DenseUnionVector vector = (DenseUnionVector) minorType.getNewVector(field, allocator, null); - vector.initializeChildrenFromFields(children); - - assertEquals(vector.getField(), field); - - // Union has 2 child vectors - assertEquals(2, vector.size()); - - // Check child field 0 - VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int"); - assertEquals(0, intChild.ordinal); - assertEquals(intChild.vector.getField(), children.get(0)); - - // Check child field 1 - VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar"); - assertEquals(1, varcharChild.ordinal); - assertEquals(varcharChild.vector.getField(), children.get(1)); - } - - @Test - public void testGetBufferAddress() throws Exception { - try (DenseUnionVector vector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) { - boolean error = false; - - vector.allocateNew(); - - /* populate the UnionVector */ - byte intTypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType())); - vector.setTypeId(0, intTypeId); - vector.setSafe(0, newIntHolder(5)); - - byte float4TypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType())); - vector.setTypeId(1, float4TypeId); - vector.setSafe(1, newFloat4Holder(5.5f)); - - vector.setTypeId(2, intTypeId); - vector.setSafe(2, newIntHolder(10)); - - vector.setTypeId(3, float4TypeId); - vector.setSafe(3, newFloat4Holder(10.5f)); - - vector.setValueCount(10); - - /* check the vector output */ - assertEquals(10, vector.getValueCount()); - assertEquals(false, vector.isNull(0)); - assertEquals(5, vector.getObject(0)); - assertEquals(false, vector.isNull(1)); - assertEquals(5.5f, vector.getObject(1)); - assertEquals(false, vector.isNull(2)); - assertEquals(10, vector.getObject(2)); - assertEquals(false, vector.isNull(3)); - assertEquals(10.5f, vector.getObject(3)); - - List buffers = vector.getFieldBuffers(); - - long offsetAddress = vector.getOffsetBufferAddress(); - - try { - vector.getDataBufferAddress(); - } catch (UnsupportedOperationException ue) { - error = true; - } finally { - assertTrue(error); - } - - assertEquals(2, buffers.size()); - assertEquals(offsetAddress, buffers.get(1).memoryAddress()); - } - } - - /** Test adding two struct vectors to the dense union vector. */ - @Test - public void testMultipleStructs() { - FieldType type = new FieldType(true, ArrowType.Struct.INSTANCE, null, null); - try (StructVector structVector1 = new StructVector("struct1", allocator, type, null); - StructVector structVector2 = new StructVector("struct2", allocator, type, null); - DenseUnionVector unionVector = DenseUnionVector.empty("union", allocator)) { - - // prepare sub vectors - - // first struct vector: (int, int) - IntVector subVector11 = - structVector1.addOrGet( - "sub11", FieldType.nullable(MinorType.INT.getType()), IntVector.class); - subVector11.allocateNew(); - ValueVectorDataPopulator.setVector(subVector11, 0, 1); - - IntVector subVector12 = - structVector1.addOrGet( - "sub12", FieldType.nullable(MinorType.INT.getType()), IntVector.class); - subVector12.allocateNew(); - ValueVectorDataPopulator.setVector(subVector12, 0, 10); - - structVector1.setIndexDefined(0); - structVector1.setIndexDefined(1); - structVector1.setValueCount(2); - - // second struct vector: (string, string) - VarCharVector subVector21 = - structVector2.addOrGet( - "sub21", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class); - subVector21.allocateNew(); - ValueVectorDataPopulator.setVector(subVector21, "a0"); - - VarCharVector subVector22 = - structVector2.addOrGet( - "sub22", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class); - subVector22.allocateNew(); - ValueVectorDataPopulator.setVector(subVector22, "b0"); - - structVector2.setIndexDefined(0); - structVector2.setValueCount(1); - - // register relative types - byte typeId1 = unionVector.registerNewTypeId(structVector1.getField()); - byte typeId2 = unionVector.registerNewTypeId(structVector2.getField()); - assertEquals(0, typeId1); - assertEquals(1, typeId2); - - // add two struct vectors to union vector - unionVector.addVector(typeId1, structVector1); - unionVector.addVector(typeId2, structVector2); - - while (unionVector.getValueCapacity() < 3) { - unionVector.reAlloc(); - } - - ArrowBuf offsetBuf = unionVector.getOffsetBuffer(); - - unionVector.setTypeId(0, typeId1); - offsetBuf.setInt(0, 0); - - unionVector.setTypeId(1, typeId2); - offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH, 0); - - unionVector.setTypeId(2, typeId1); - offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 2, 1); - - unionVector.setValueCount(3); - - Map value0 = new JsonStringHashMap<>(); - value0.put("sub11", 0); - value0.put("sub12", 0); - - assertEquals(value0, unionVector.getObject(0)); - - Map value1 = new JsonStringHashMap<>(); - value1.put("sub21", new Text("a0")); - value1.put("sub22", new Text("b0")); - - assertEquals(value1, unionVector.getObject(1)); - - Map value2 = new JsonStringHashMap<>(); - value2.put("sub11", 1); - value2.put("sub12", 10); - - assertEquals(value2, unionVector.getObject(2)); - } - } - - /** Test adding two varchar vectors to the dense union vector. */ - @Test - public void testMultipleVarChars() { - try (VarCharVector childVector1 = new VarCharVector("child1", allocator); - VarCharVector childVector2 = new VarCharVector("child2", allocator); - DenseUnionVector unionVector = DenseUnionVector.empty("union", allocator)) { - - // prepare sub vectors - ValueVectorDataPopulator.setVector(childVector1, "a0", "a4"); - ValueVectorDataPopulator.setVector(childVector2, "b1", "b2"); - - // register relative types - byte typeId1 = unionVector.registerNewTypeId(childVector1.getField()); - byte typeId2 = unionVector.registerNewTypeId(childVector2.getField()); - - assertEquals(0, typeId1); - assertEquals(1, typeId2); - - while (unionVector.getValueCapacity() < 5) { - unionVector.reAlloc(); - } - - // add two struct vectors to union vector - unionVector.addVector(typeId1, childVector1); - unionVector.addVector(typeId2, childVector2); - - ArrowBuf offsetBuf = unionVector.getOffsetBuffer(); - - // slot 0 points to child1 - unionVector.setTypeId(0, typeId1); - offsetBuf.setInt(0, 0); - - // slot 1 points to child2 - unionVector.setTypeId(1, typeId2); - offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH, 0); - - // slot 2 points to child2 - unionVector.setTypeId(2, typeId2); - offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 2, 1); - - // slot 4 points to child1 - unionVector.setTypeId(4, typeId1); - offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 4, 1); - - unionVector.setValueCount(5); - - assertEquals(new Text("a0"), unionVector.getObject(0)); - assertEquals(new Text("b1"), unionVector.getObject(1)); - assertEquals(new Text("b2"), unionVector.getObject(2)); - assertNull(unionVector.getObject(3)); - assertEquals(new Text("a4"), unionVector.getObject(4)); - } - } - - @Test - public void testChildVectorValueCounts() { - final NullableIntHolder intHolder = new NullableIntHolder(); - intHolder.isSet = 1; - - final NullableBigIntHolder longHolder = new NullableBigIntHolder(); - longHolder.isSet = 1; - - final NullableFloat4Holder floatHolder = new NullableFloat4Holder(); - floatHolder.isSet = 1; - - try (DenseUnionVector vector = new DenseUnionVector("vector", allocator, null, null)) { - vector.allocateNew(); - - // populate the delta vector with values {7, null, 8L, 9.0f, 10, 12L} - while (vector.getValueCapacity() < 6) { - vector.reAlloc(); - } - byte intTypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType())); - vector.setTypeId(0, intTypeId); - intHolder.value = 7; - vector.setSafe(0, intHolder); - byte longTypeId = - vector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType())); - vector.setTypeId(2, longTypeId); - longHolder.value = 8L; - vector.setSafe(2, longHolder); - byte floatTypeId = - vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType())); - vector.setTypeId(3, floatTypeId); - floatHolder.value = 9.0f; - vector.setSafe(3, floatHolder); - - vector.setTypeId(4, intTypeId); - intHolder.value = 10; - vector.setSafe(4, intHolder); - vector.setTypeId(5, longTypeId); - longHolder.value = 12L; - vector.setSafe(5, longHolder); - - vector.setValueCount(6); - - // verify results - IntVector intVector = (IntVector) vector.getVectorByType(intTypeId); - assertEquals(2, intVector.getValueCount()); - assertEquals(7, intVector.get(0)); - assertEquals(10, intVector.get(1)); - - BigIntVector longVector = (BigIntVector) vector.getVectorByType(longTypeId); - assertEquals(2, longVector.getValueCount()); - assertEquals(8L, longVector.get(0)); - assertEquals(12L, longVector.get(1)); - - Float4Vector floatVector = (Float4Vector) vector.getVectorByType(floatTypeId); - assertEquals(1, floatVector.getValueCount()); - assertEquals(9.0f, floatVector.get(0), 0); - } - } - - private static NullableIntHolder newIntHolder(int value) { - final NullableIntHolder holder = new NullableIntHolder(); - holder.isSet = 1; - holder.value = value; - return holder; - } - - private static NullableBitHolder newBitHolder(boolean value) { - final NullableBitHolder holder = new NullableBitHolder(); - holder.isSet = 1; - holder.value = value ? 1 : 0; - return holder; - } - - private static NullableFloat4Holder newFloat4Holder(float value) { - final NullableFloat4Holder holder = new NullableFloat4Holder(); - holder.isSet = 1; - holder.value = value; - return holder; - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java deleted file mode 100644 index d65047efb192b..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ /dev/null @@ -1,1246 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.TestUtils.*; -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.function.ToIntBiFunction; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.dictionary.ListSubfieldEncoder; -import org.apache.arrow.vector.dictionary.StructSubfieldEncoder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestDictionaryVector { - - private BufferAllocator allocator; - - byte[] zero = "foo".getBytes(StandardCharsets.UTF_8); - byte[] one = "bar".getBytes(StandardCharsets.UTF_8); - byte[] two = "baz".getBytes(StandardCharsets.UTF_8); - - byte[][] data = new byte[][] {zero, one, two}; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testEncodeStrings() { - // Create a new value vector - try (final VarCharVector vector = newVarCharVector("foo", allocator); - final VarCharVector dictionaryVector = newVarCharVector("dict", allocator); ) { - - setVector(vector, zero, one, one, two, zero); - setVector(dictionaryVector, zero, one, two); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(5, index.getValueCount()); - assertEquals(0, index.get(0)); - assertEquals(1, index.get(1)); - assertEquals(1, index.get(2)); - assertEquals(2, index.get(3)); - assertEquals(0, index.get(4)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), ((VarCharVector) decoded).getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeLargeVector() { - // Create a new value vector - try (final VarCharVector vector = newVarCharVector("foo", allocator); - final VarCharVector dictionaryVector = newVarCharVector("dict", allocator); ) { - vector.allocateNew(); - - int count = 10000; - - for (int i = 0; i < 10000; ++i) { - vector.setSafe(i, data[i % 3], 0, data[i % 3].length); - } - vector.setValueCount(count); - - setVector(dictionaryVector, zero, one, two); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(count, index.getValueCount()); - for (int i = 0; i < count; ++i) { - assertEquals(i % 3, index.get(i)); - } - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < count; ++i) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeList() { - // Create a new value vector - try (final ListVector vector = ListVector.empty("vector", allocator); - final ListVector dictionaryVector = ListVector.empty("dict", allocator); ) { - - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writeListVector(writer, new int[] {10, 20}); - writeListVector(writer, new int[] {10, 20}); - writeListVector(writer, new int[] {10, 20}); - writeListVector(writer, new int[] {30, 40, 50}); - writeListVector(writer, new int[] {30, 40, 50}); - writeListVector(writer, new int[] {10, 20}); - - writer.setValueCount(6); - - UnionListWriter dictWriter = dictionaryVector.getWriter(); - dictWriter.allocate(); - - writeListVector(dictWriter, new int[] {10, 20}); - writeListVector(dictWriter, new int[] {30, 40, 50}); - - dictWriter.setValueCount(2); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(6, index.getValueCount()); - assertEquals(0, index.get(0)); - assertEquals(0, index.get(1)); - assertEquals(0, index.get(2)); - assertEquals(1, index.get(3)); - assertEquals(1, index.get(4)); - assertEquals(0, index.get(5)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeStruct() { - // Create a new value vector - try (final StructVector vector = StructVector.empty("vector", allocator); - final StructVector dictionaryVector = StructVector.empty("dict", allocator); ) { - vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - dictionaryVector.addOrGet( - "f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - dictionaryVector.addOrGet( - "f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - NullableStructWriter writer = vector.getWriter(); - writer.allocate(); - - writeStructVector(writer, 1, 10L); - writeStructVector(writer, 1, 10L); - writeStructVector(writer, 1, 10L); - writeStructVector(writer, 2, 20L); - writeStructVector(writer, 2, 20L); - writeStructVector(writer, 2, 20L); - writeStructVector(writer, 1, 10L); - - writer.setValueCount(7); - - NullableStructWriter dictWriter = dictionaryVector.getWriter(); - dictWriter.allocate(); - - writeStructVector(dictWriter, 1, 10L); - writeStructVector(dictWriter, 2, 20L); - - dictionaryVector.setValueCount(2); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(7, index.getValueCount()); - assertEquals(0, index.get(0)); - assertEquals(0, index.get(1)); - assertEquals(0, index.get(2)); - assertEquals(1, index.get(3)); - assertEquals(1, index.get(4)); - assertEquals(1, index.get(5)); - assertEquals(0, index.get(6)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeBinaryVector() { - // Create a new value vector - try (final VarBinaryVector vector = newVarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = newVarBinaryVector("dict", allocator)) { - - setVector(vector, zero, one, one, two, zero); - setVector(dictionaryVector, zero, one, two); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(5, index.getValueCount()); - assertEquals(0, index.get(0)); - assertEquals(1, index.get(1)); - assertEquals(1, index.get(2)); - assertEquals(2, index.get(3)); - assertEquals(0, index.get(4)); - - // now run through the decoder and verify we get the original back - try (VarBinaryVector decoded = - (VarBinaryVector) DictionaryEncoder.decode(encoded, dictionary)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i))); - } - } - } - } - } - - @Test - public void testEncodeUnion() { - // Create a new value vector - try (final UnionVector vector = - new UnionVector("vector", allocator, /* field type */ null, /* call-back */ null); - final UnionVector dictionaryVector = - new UnionVector("dict", allocator, /* field type */ null, /* call-back */ null); ) { - - final NullableUInt4Holder uintHolder1 = new NullableUInt4Holder(); - uintHolder1.value = 10; - uintHolder1.isSet = 1; - - final NullableIntHolder intHolder1 = new NullableIntHolder(); - intHolder1.value = 10; - intHolder1.isSet = 1; - - final NullableIntHolder intHolder2 = new NullableIntHolder(); - intHolder2.value = 20; - intHolder2.isSet = 1; - - // write data - vector.setType(0, Types.MinorType.UINT4); - vector.setSafe(0, uintHolder1); - - vector.setType(1, Types.MinorType.INT); - vector.setSafe(1, intHolder1); - - vector.setType(2, Types.MinorType.INT); - vector.setSafe(2, intHolder1); - - vector.setType(3, Types.MinorType.INT); - vector.setSafe(3, intHolder2); - - vector.setType(4, Types.MinorType.INT); - vector.setSafe(4, intHolder2); - - vector.setValueCount(5); - - // write dictionary - dictionaryVector.setType(0, Types.MinorType.UINT4); - dictionaryVector.setSafe(0, uintHolder1); - - dictionaryVector.setType(1, Types.MinorType.INT); - dictionaryVector.setSafe(1, intHolder1); - - dictionaryVector.setType(2, Types.MinorType.INT); - dictionaryVector.setSafe(2, intHolder2); - - dictionaryVector.setValueCount(3); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(5, index.getValueCount()); - assertEquals(0, index.get(0)); - assertEquals(1, index.get(1)); - assertEquals(1, index.get(2)); - assertEquals(2, index.get(3)); - assertEquals(2, index.get(4)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - } - - @Test - public void testIntEquals() { - // test Int - try (final IntVector vector1 = new IntVector("int", allocator); - final IntVector vector2 = new IntVector("int", allocator)) { - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - setVector(vector1, 1, 2, 3); - setVector(vector2, 1, 2, 0); - - assertFalse(dict1.equals(dict2)); - - vector2.setSafe(2, 3); - assertTrue(dict1.equals(dict2)); - } - } - - @Test - public void testVarcharEquals() { - try (final VarCharVector vector1 = new VarCharVector("varchar", allocator); - final VarCharVector vector2 = new VarCharVector("varchar", allocator)) { - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - setVector(vector1, zero, one, two); - setVector(vector2, zero, one, one); - - assertFalse(dict1.equals(dict2)); - - vector2.setSafe(2, two, 0, two.length); - assertTrue(dict1.equals(dict2)); - } - } - - @Test - public void testVarBinaryEquals() { - try (final VarBinaryVector vector1 = new VarBinaryVector("binary", allocator); - final VarBinaryVector vector2 = new VarBinaryVector("binary", allocator)) { - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - setVector(vector1, zero, one, two); - setVector(vector2, zero, one, one); - - assertFalse(dict1.equals(dict2)); - - vector2.setSafe(2, two, 0, two.length); - assertTrue(dict1.equals(dict2)); - } - } - - @Test - public void testListEquals() { - try (final ListVector vector1 = ListVector.empty("list", allocator); - final ListVector vector2 = ListVector.empty("list", allocator); ) { - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - UnionListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeListVector(writer1, new int[] {1, 2}); - writeListVector(writer1, new int[] {3, 4}); - writeListVector(writer1, new int[] {5, 6}); - writer1.setValueCount(3); - - UnionListWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeListVector(writer2, new int[] {1, 2}); - writeListVector(writer2, new int[] {3, 4}); - writeListVector(writer2, new int[] {5, 6}); - writer2.setValueCount(3); - - assertTrue(dict1.equals(dict2)); - } - } - - @Test - public void testStructEquals() { - try (final StructVector vector1 = StructVector.empty("struct", allocator); - final StructVector vector2 = StructVector.empty("struct", allocator); ) { - vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - NullableStructWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - writeStructVector(writer1, 1, 10L); - writeStructVector(writer1, 2, 20L); - writer1.setValueCount(2); - - NullableStructWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - writeStructVector(writer2, 1, 10L); - writeStructVector(writer2, 2, 20L); - writer2.setValueCount(2); - - assertTrue(dict1.equals(dict2)); - } - } - - @Test - public void testUnionEquals() { - try (final UnionVector vector1 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); - final UnionVector vector2 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); ) { - - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 10; - uInt4Holder.isSet = 1; - - final NullableIntHolder intHolder = new NullableIntHolder(); - uInt4Holder.value = 20; - uInt4Holder.isSet = 1; - - vector1.setType(0, Types.MinorType.UINT4); - vector1.setSafe(0, uInt4Holder); - - vector1.setType(2, Types.MinorType.INT); - vector1.setSafe(2, intHolder); - vector1.setValueCount(3); - - vector2.setType(0, Types.MinorType.UINT4); - vector2.setSafe(0, uInt4Holder); - - vector2.setType(2, Types.MinorType.INT); - vector2.setSafe(2, intHolder); - vector2.setValueCount(3); - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - assertTrue(dict1.equals(dict2)); - } - } - - @Test - public void testEncodeWithEncoderInstance() { - // Create a new value vector - try (final VarCharVector vector = newVarCharVector("vector", allocator); - final VarCharVector dictionaryVector = newVarCharVector("dict", allocator); ) { - - setVector(vector, zero, one, one, two, zero); - setVector(dictionaryVector, zero, one, two); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - DictionaryEncoder encoder = new DictionaryEncoder(dictionary, allocator); - - try (final ValueVector encoded = encoder.encode(vector)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(5, index.getValueCount()); - assertEquals(0, index.get(0)); - assertEquals(1, index.get(1)); - assertEquals(1, index.get(2)); - assertEquals(2, index.get(3)); - assertEquals(0, index.get(4)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = encoder.decode(encoded)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeMultiVectors() { - // Create a new value vector - try (final VarCharVector vector1 = newVarCharVector("vector1", allocator); - final VarCharVector vector2 = newVarCharVector("vector2", allocator); - final VarCharVector dictionaryVector = newVarCharVector("dict", allocator); ) { - - setVector(vector1, zero, one, one, two, zero); - setVector(vector2, zero, one, one); - setVector(dictionaryVector, zero, one, two); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - DictionaryEncoder encoder = new DictionaryEncoder(dictionary, allocator); - - try (final ValueVector encoded = encoder.encode(vector1)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(5, index.getValueCount()); - assertEquals(0, index.get(0)); - assertEquals(1, index.get(1)); - assertEquals(1, index.get(2)); - assertEquals(2, index.get(3)); - assertEquals(0, index.get(4)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = encoder.decode(encoded)) { - assertEquals(vector1.getClass(), decoded.getClass()); - assertEquals(vector1.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector1.getObject(i), ((VarCharVector) decoded).getObject(i)); - } - } - } - - try (final ValueVector encoded = encoder.encode(vector2)) { - // verify indices - assertEquals(IntVector.class, encoded.getClass()); - - IntVector index = ((IntVector) encoded); - assertEquals(3, index.getValueCount()); - assertEquals(0, index.get(0)); - assertEquals(1, index.get(1)); - assertEquals(1, index.get(2)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = encoder.decode(encoded)) { - assertEquals(vector2.getClass(), decoded.getClass()); - assertEquals(vector2.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 3; i++) { - assertEquals(vector2.getObject(i), ((VarCharVector) decoded).getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeListSubField() { - // Create a new value vector - try (final ListVector vector = ListVector.empty("vector", allocator); - final ListVector dictionaryVector = ListVector.empty("dict", allocator); ) { - - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writeListVector(writer, new int[] {10, 20}); - writeListVector(writer, new int[] {10, 20}); - writeListVector(writer, new int[] {10, 20}); - writeListVector(writer, new int[] {30, 40, 50}); - writeListVector(writer, new int[] {30, 40, 50}); - writeListVector(writer, new int[] {10, 20}); - writer.setValueCount(6); - - UnionListWriter dictWriter = dictionaryVector.getWriter(); - dictWriter.allocate(); - writeListVector(dictWriter, new int[] {10, 20, 30, 40, 50}); - dictionaryVector.setValueCount(1); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - ListSubfieldEncoder encoder = new ListSubfieldEncoder(dictionary, allocator); - - try (final ListVector encoded = (ListVector) encoder.encodeListSubField(vector)) { - // verify indices - assertEquals(ListVector.class, encoded.getClass()); - - assertEquals(6, encoded.getValueCount()); - int[] realValue1 = convertListToIntArray(encoded.getObject(0)); - assertTrue(Arrays.equals(new int[] {0, 1}, realValue1)); - int[] realValue2 = convertListToIntArray(encoded.getObject(1)); - assertTrue(Arrays.equals(new int[] {0, 1}, realValue2)); - int[] realValue3 = convertListToIntArray(encoded.getObject(2)); - assertTrue(Arrays.equals(new int[] {0, 1}, realValue3)); - int[] realValue4 = convertListToIntArray(encoded.getObject(3)); - assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue4)); - int[] realValue5 = convertListToIntArray(encoded.getObject(4)); - assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue5)); - int[] realValue6 = convertListToIntArray(encoded.getObject(5)); - assertTrue(Arrays.equals(new int[] {0, 1}, realValue6)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = encoder.decodeListSubField(encoded)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeFixedSizeListSubField() { - // Create a new value vector - try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", 2, allocator); - final FixedSizeListVector dictionaryVector = - FixedSizeListVector.empty("dict", 2, allocator)) { - - vector.allocateNew(); - vector.setValueCount(4); - - IntVector dataVector = - (IntVector) - vector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())).getVector(); - dataVector.allocateNew(8); - dataVector.setValueCount(8); - // set value at index 0 - vector.setNotNull(0); - dataVector.set(0, 10); - dataVector.set(1, 20); - // set value at index 1 - vector.setNotNull(1); - dataVector.set(2, 10); - dataVector.set(3, 20); - // set value at index 2 - vector.setNotNull(2); - dataVector.set(4, 30); - dataVector.set(5, 40); - // set value at index 3 - vector.setNotNull(3); - dataVector.set(6, 10); - dataVector.set(7, 20); - - dictionaryVector.allocateNew(); - dictionaryVector.setValueCount(2); - IntVector dictDataVector = - (IntVector) - dictionaryVector - .addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())) - .getVector(); - dictDataVector.allocateNew(4); - dictDataVector.setValueCount(4); - - dictionaryVector.setNotNull(0); - dictDataVector.set(0, 10); - dictDataVector.set(1, 20); - dictionaryVector.setNotNull(1); - dictDataVector.set(2, 30); - dictDataVector.set(3, 40); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - ListSubfieldEncoder encoder = new ListSubfieldEncoder(dictionary, allocator); - - try (final FixedSizeListVector encoded = - (FixedSizeListVector) encoder.encodeListSubField(vector)) { - // verify indices - assertEquals(FixedSizeListVector.class, encoded.getClass()); - - assertEquals(4, encoded.getValueCount()); - int[] realValue1 = convertListToIntArray(encoded.getObject(0)); - assertTrue(Arrays.equals(new int[] {0, 1}, realValue1)); - int[] realValue2 = convertListToIntArray(encoded.getObject(1)); - assertTrue(Arrays.equals(new int[] {0, 1}, realValue2)); - int[] realValue3 = convertListToIntArray(encoded.getObject(2)); - assertTrue(Arrays.equals(new int[] {2, 3}, realValue3)); - int[] realValue4 = convertListToIntArray(encoded.getObject(3)); - assertTrue(Arrays.equals(new int[] {0, 1}, realValue4)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = encoder.decodeListSubField(encoded)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeStructSubField() { - try (final StructVector vector = StructVector.empty("vector", allocator); - final VarCharVector dictVector1 = new VarCharVector("f0", allocator); - final VarCharVector dictVector2 = new VarCharVector("f1", allocator)) { - - vector.addOrGet("f0", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class); - vector.addOrGet("f1", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class); - - NullableStructWriter writer = vector.getWriter(); - writer.allocate(); - // set some values - writeStructVector(writer, "aa", "baz"); - writeStructVector(writer, "bb", "bar"); - writeStructVector(writer, "cc", "foo"); - writeStructVector(writer, "aa", "foo"); - writeStructVector(writer, "dd", "foo"); - writer.setValueCount(5); - - // initialize dictionaries - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - - setVector( - dictVector1, - "aa".getBytes(StandardCharsets.UTF_8), - "bb".getBytes(StandardCharsets.UTF_8), - "cc".getBytes(StandardCharsets.UTF_8), - "dd".getBytes(StandardCharsets.UTF_8)); - setVector( - dictVector2, - "foo".getBytes(StandardCharsets.UTF_8), - "baz".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8)); - - provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null))); - provider.put(new Dictionary(dictVector2, new DictionaryEncoding(2L, false, null))); - - StructSubfieldEncoder encoder = new StructSubfieldEncoder(allocator, provider); - Map columnToDictionaryId = new HashMap<>(); - columnToDictionaryId.put(0, 1L); - columnToDictionaryId.put(1, 2L); - - try (final StructVector encoded = - (StructVector) encoder.encode(vector, columnToDictionaryId)) { - // verify indices - assertEquals(StructVector.class, encoded.getClass()); - - assertEquals(5, encoded.getValueCount()); - Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0)); - assertTrue(Arrays.equals(new Object[] {0, 1}, realValue1)); - Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1)); - assertTrue(Arrays.equals(new Object[] {1, 2}, realValue2)); - Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2)); - assertTrue(Arrays.equals(new Object[] {2, 0}, realValue3)); - Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3)); - assertTrue(Arrays.equals(new Object[] {0, 0}, realValue4)); - Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4)); - assertTrue(Arrays.equals(new Object[] {3, 0}, realValue5)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = encoder.decode(encoded)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - } - - @Test - public void testEncodeStructSubFieldWithCertainColumns() { - // in this case, some child vector is encoded and others are not - try (final StructVector vector = StructVector.empty("vector", allocator); - final VarCharVector dictVector1 = new VarCharVector("f0", allocator)) { - - vector.addOrGet("f0", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class); - vector.addOrGet("f1", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class); - - NullableStructWriter writer = vector.getWriter(); - writer.allocate(); - // set some values - writeStructVector(writer, "aa", "baz"); - writeStructVector(writer, "bb", "bar"); - writeStructVector(writer, "cc", "foo"); - writeStructVector(writer, "aa", "foo"); - writeStructVector(writer, "dd", "foo"); - writer.setValueCount(5); - - // initialize dictionaries - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - - setVector( - dictVector1, - "aa".getBytes(StandardCharsets.UTF_8), - "bb".getBytes(StandardCharsets.UTF_8), - "cc".getBytes(StandardCharsets.UTF_8), - "dd".getBytes(StandardCharsets.UTF_8)); - - provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null))); - StructSubfieldEncoder encoder = new StructSubfieldEncoder(allocator, provider); - Map columnToDictionaryId = new HashMap<>(); - columnToDictionaryId.put(0, 1L); - - try (final StructVector encoded = - (StructVector) encoder.encode(vector, columnToDictionaryId)) { - // verify indices - assertEquals(StructVector.class, encoded.getClass()); - - assertEquals(5, encoded.getValueCount()); - Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0)); - assertTrue(Arrays.equals(new Object[] {0, new Text("baz")}, realValue1)); - Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1)); - assertTrue(Arrays.equals(new Object[] {1, new Text("bar")}, realValue2)); - Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2)); - assertTrue(Arrays.equals(new Object[] {2, new Text("foo")}, realValue3)); - Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3)); - assertTrue(Arrays.equals(new Object[] {0, new Text("foo")}, realValue4)); - Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4)); - assertTrue(Arrays.equals(new Object[] {3, new Text("foo")}, realValue5)); - - // now run through the decoder and verify we get the original back - try (ValueVector decoded = encoder.decode(encoded)) { - assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), decoded.getValueCount()); - for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), decoded.getObject(i)); - } - } - } - } - } - - @Test - public void testNoMemoryLeak() { - // test no memory leak when encode - try (final VarCharVector vector = newVarCharVector("foo", allocator); - final VarCharVector dictionaryVector = newVarCharVector("dict", allocator)) { - - setVector(vector, zero, one, two); - setVector(dictionaryVector, zero, one); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) { - fail("There should be an exception when encoding"); - } catch (Exception e) { - assertEquals("Dictionary encoding not defined for value:" + new Text(two), e.getMessage()); - } - } - assertEquals(0, allocator.getAllocatedMemory(), "encode memory leak"); - - // test no memory leak when decode - try (final IntVector indices = newVector(IntVector.class, "", Types.MinorType.INT, allocator); - final VarCharVector dictionaryVector = newVarCharVector("dict", allocator)) { - - setVector(indices, 3); - setVector(dictionaryVector, zero, one); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector decoded = DictionaryEncoder.decode(indices, dictionary, allocator)) { - fail("There should be an exception when decoding"); - } catch (Exception e) { - assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); - } - } - assertEquals(0, allocator.getAllocatedMemory(), "decode memory leak"); - } - - @Test - public void testListNoMemoryLeak() { - // Create a new value vector - try (final ListVector vector = ListVector.empty("vector", allocator); - final ListVector dictionaryVector = ListVector.empty("dict", allocator)) { - - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - writeListVector(writer, new int[] {10, 20}); - writer.setValueCount(1); - - UnionListWriter dictWriter = dictionaryVector.getWriter(); - dictWriter.allocate(); - writeListVector(dictWriter, new int[] {10}); - dictionaryVector.setValueCount(1); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - ListSubfieldEncoder encoder = new ListSubfieldEncoder(dictionary, allocator); - - try (final ListVector encoded = (ListVector) encoder.encodeListSubField(vector)) { - fail("There should be an exception when encoding"); - } catch (Exception e) { - assertEquals("Dictionary encoding not defined for value:20", e.getMessage()); - } - } - assertEquals(0, allocator.getAllocatedMemory(), "list encode memory leak"); - - try (final ListVector indices = ListVector.empty("indices", allocator); - final ListVector dictionaryVector = ListVector.empty("dict", allocator)) { - - UnionListWriter writer = indices.getWriter(); - writer.allocate(); - writeListVector(writer, new int[] {3}); - writer.setValueCount(1); - - UnionListWriter dictWriter = dictionaryVector.getWriter(); - dictWriter.allocate(); - writeListVector(dictWriter, new int[] {10, 20}); - dictionaryVector.setValueCount(1); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - try (final ValueVector decoded = - ListSubfieldEncoder.decodeListSubField(indices, dictionary, allocator)) { - fail("There should be an exception when decoding"); - } catch (Exception e) { - assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); - } - } - assertEquals(0, allocator.getAllocatedMemory(), "list decode memory leak"); - } - - @Test - public void testStructNoMemoryLeak() { - try (final StructVector vector = StructVector.empty("vector", allocator); - final VarCharVector dictVector1 = new VarCharVector("f0", allocator); - final VarCharVector dictVector2 = new VarCharVector("f1", allocator)) { - - vector.addOrGet("f0", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class); - vector.addOrGet("f1", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class); - - NullableStructWriter writer = vector.getWriter(); - writer.allocate(); - writeStructVector(writer, "aa", "baz"); - writer.setValueCount(1); - - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - setVector(dictVector1, "aa".getBytes(StandardCharsets.UTF_8)); - setVector(dictVector2, "foo".getBytes(StandardCharsets.UTF_8)); - - provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null))); - provider.put(new Dictionary(dictVector2, new DictionaryEncoding(2L, false, null))); - - StructSubfieldEncoder encoder = new StructSubfieldEncoder(allocator, provider); - Map columnToDictionaryId = new HashMap<>(); - columnToDictionaryId.put(0, 1L); - columnToDictionaryId.put(1, 2L); - - try (final StructVector encoded = - (StructVector) encoder.encode(vector, columnToDictionaryId)) { - fail("There should be an exception when encoding"); - } catch (Exception e) { - assertEquals("Dictionary encoding not defined for value:baz", e.getMessage()); - } - } - assertEquals(0, allocator.getAllocatedMemory(), "struct encode memory leak"); - - try (final StructVector indices = StructVector.empty("indices", allocator); - final VarCharVector dictVector1 = new VarCharVector("f0", allocator); - final VarCharVector dictVector2 = new VarCharVector("f1", allocator)) { - - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - setVector(dictVector1, "aa".getBytes(StandardCharsets.UTF_8)); - setVector(dictVector2, "foo".getBytes(StandardCharsets.UTF_8)); - - provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null))); - provider.put(new Dictionary(dictVector2, new DictionaryEncoding(2L, false, null))); - - ArrowType int32 = new ArrowType.Int(32, true); - indices.addOrGet( - "f0", new FieldType(true, int32, provider.lookup(1L).getEncoding()), IntVector.class); - indices.addOrGet( - "f1", new FieldType(true, int32, provider.lookup(2L).getEncoding()), IntVector.class); - - NullableStructWriter writer = indices.getWriter(); - writer.allocate(); - writer.start(); - writer.integer("f0").writeInt(1); - writer.integer("f1").writeInt(3); - writer.end(); - writer.setValueCount(1); - - try (final StructVector decode = StructSubfieldEncoder.decode(indices, provider, allocator)) { - fail("There should be an exception when decoding"); - } catch (Exception e) { - assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); - } - } - assertEquals(0, allocator.getAllocatedMemory(), "struct decode memory leak"); - } - - private void testDictionary( - Dictionary dictionary, ToIntBiFunction valGetter) { - try (VarCharVector vector = new VarCharVector("vector", allocator)) { - setVector(vector, "1", "3", "5", "7", "9"); - try (ValueVector encodedVector = DictionaryEncoder.encode(vector, dictionary)) { - - // verify encoded result - assertEquals(vector.getValueCount(), encodedVector.getValueCount()); - assertEquals(1, valGetter.applyAsInt(encodedVector, 0)); - assertEquals(3, valGetter.applyAsInt(encodedVector, 1)); - assertEquals(5, valGetter.applyAsInt(encodedVector, 2)); - assertEquals(7, valGetter.applyAsInt(encodedVector, 3)); - assertEquals(9, valGetter.applyAsInt(encodedVector, 4)); - - try (ValueVector decodedVector = DictionaryEncoder.decode(encodedVector, dictionary)) { - assertTrue(decodedVector instanceof VarCharVector); - assertEquals(vector.getValueCount(), decodedVector.getValueCount()); - assertArrayEquals( - "1".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(0)); - assertArrayEquals( - "3".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(1)); - assertArrayEquals( - "5".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(2)); - assertArrayEquals( - "7".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(3)); - assertArrayEquals( - "9".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(4)); - } - } - } - } - - @Test - public void testDictionaryUInt1() { - try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) { - setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"); - Dictionary dictionary1 = - new Dictionary( - dictionaryVector, - new DictionaryEncoding( - /*id=*/ 10L, - /*ordered=*/ false, - /*indexType=*/ new ArrowType.Int(/*bitWidth*/ 8, /*isSigned*/ false))); - testDictionary(dictionary1, (vector, index) -> ((UInt1Vector) vector).get(index)); - } - } - - @Test - public void testDictionaryUInt2() { - try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) { - setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"); - Dictionary dictionary2 = - new Dictionary( - dictionaryVector, - new DictionaryEncoding( - /*id=*/ 20L, - /*ordered=*/ false, - /*indexType=*/ new ArrowType.Int(/*bitWidth=*/ 16, /*isSigned*/ false))); - testDictionary(dictionary2, (vector, index) -> ((UInt2Vector) vector).get(index)); - } - } - - @Test - public void testDictionaryUInt4() { - try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) { - setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"); - Dictionary dictionary4 = - new Dictionary( - dictionaryVector, - new DictionaryEncoding( - /*id=*/ 30L, - /*ordered=*/ false, - /*indexType=*/ new ArrowType.Int(/*bitWidth=*/ 32, /*isSigned*/ false))); - testDictionary(dictionary4, (vector, index) -> ((UInt4Vector) vector).get(index)); - } - } - - @Test - public void testDictionaryUInt8() { - try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) { - setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"); - Dictionary dictionary8 = - new Dictionary( - dictionaryVector, - new DictionaryEncoding( - /*id=*/ 40L, - /*ordered=*/ false, - /*indexType=*/ new ArrowType.Int(/*bitWidth=*/ 64, /*isSigned*/ false))); - testDictionary(dictionary8, (vector, index) -> (int) ((UInt8Vector) vector).get(index)); - } - } - - @Test - public void testDictionaryUIntOverflow() { - // the size is within the range of UInt1, but outside the range of TinyInt. - final int vecLength = 256; - try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) { - dictionaryVector.allocateNew(vecLength * 3, vecLength); - for (int i = 0; i < vecLength; i++) { - dictionaryVector.set(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - dictionaryVector.setValueCount(vecLength); - - Dictionary dictionary = - new Dictionary( - dictionaryVector, - new DictionaryEncoding( - /*id=*/ 10L, - /*ordered=*/ false, - /*indexType=*/ new ArrowType.Int(/*bitWidth=*/ 8, /*isSigned*/ false))); - - try (VarCharVector vector = new VarCharVector("vector", allocator)) { - setVector(vector, "255"); - try (UInt1Vector encodedVector = - (UInt1Vector) DictionaryEncoder.encode(vector, dictionary)) { - - // verify encoded result - assertEquals(1, encodedVector.getValueCount()); - assertEquals(255, encodedVector.getValueAsLong(0)); - - try (VarCharVector decodedVector = - (VarCharVector) DictionaryEncoder.decode(encodedVector, dictionary)) { - assertEquals(1, decodedVector.getValueCount()); - assertArrayEquals("255".getBytes(StandardCharsets.UTF_8), decodedVector.get(0)); - } - } - } - } - } - - private int[] convertListToIntArray(List list) { - int[] values = new int[list.size()]; - for (int i = 0; i < list.size(); i++) { - values[i] = (int) list.get(i); - } - return values; - } - - private Object[] convertMapValuesToArray(Map map) { - Object[] values = new Object[map.size()]; - Iterator valueIterator = map.values().iterator(); - for (int i = 0; i < map.size(); i++) { - values[i] = valueIterator.next(); - } - return values; - } - - private void writeStructVector(NullableStructWriter writer, String value1, String value2) { - - byte[] bytes1 = value1.getBytes(StandardCharsets.UTF_8); - byte[] bytes2 = value2.getBytes(StandardCharsets.UTF_8); - ArrowBuf temp = allocator.buffer(bytes1.length > bytes2.length ? bytes1.length : bytes2.length); - - writer.start(); - temp.setBytes(0, bytes1); - writer.varChar("f0").writeVarChar(0, bytes1.length, temp); - temp.setBytes(0, bytes2); - writer.varChar("f1").writeVarChar(0, bytes2.length, temp); - writer.end(); - temp.close(); - } - - private void writeStructVector(NullableStructWriter writer, int value1, long value2) { - writer.start(); - writer.integer("f0").writeInt(value1); - writer.bigInt("f1").writeBigInt(value2); - writer.end(); - } - - private void writeListVector(UnionListWriter writer, int[] values) { - writer.startList(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endList(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java deleted file mode 100644 index 5879a3f104608..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; - -import java.time.Duration; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.holders.NullableDurationHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestDurationVector { - RootAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() { - allocator.close(); - } - - @Test - public void testSecBasics() { - try (DurationVector secVector = - TestUtils.newVector( - DurationVector.class, "second", new ArrowType.Duration(TimeUnit.SECOND), allocator)) { - - secVector.allocateNew(); - secVector.setNull(0); - secVector.setSafe(1, 1000); - secVector.setValueCount(2); - assertNull(secVector.getObject(0)); - assertEquals(Duration.ofSeconds(1000), secVector.getObject(1)); - assertNull(secVector.getAsStringBuilder(0)); - assertEquals("PT16M40S", secVector.getAsStringBuilder(1).toString()); - // Holder - NullableDurationHolder holder = new NullableDurationHolder(); - secVector.get(0, holder); - assertEquals(0, holder.isSet); - secVector.get(1, holder); - assertEquals(1, holder.isSet); - assertEquals(1000, holder.value); - } - } - - @Test - public void testMilliBasics() { - try (DurationVector milliVector = - TestUtils.newVector( - DurationVector.class, - "nanos", - new ArrowType.Duration(TimeUnit.MILLISECOND), - allocator)) { - - milliVector.allocateNew(); - milliVector.setNull(0); - milliVector.setSafe(1, 1000); - milliVector.setValueCount(2); - assertNull(milliVector.getObject(0)); - assertEquals(Duration.ofSeconds(1), milliVector.getObject(1)); - assertNull(milliVector.getAsStringBuilder(0)); - assertEquals("PT1S", milliVector.getAsStringBuilder(1).toString()); - // Holder - NullableDurationHolder holder = new NullableDurationHolder(); - milliVector.get(0, holder); - assertEquals(0, holder.isSet); - milliVector.get(1, holder); - assertEquals(1, holder.isSet); - assertEquals(1000, holder.value); - } - } - - @Test - public void testMicroBasics() { - try (DurationVector microVector = - TestUtils.newVector( - DurationVector.class, - "micro", - new ArrowType.Duration(TimeUnit.MICROSECOND), - allocator)) { - - microVector.allocateNew(); - microVector.setNull(0); - microVector.setSafe(1, 1000); - microVector.setValueCount(2); - assertNull(microVector.getObject(0)); - assertEquals(Duration.ofMillis(1), microVector.getObject(1)); - assertNull(microVector.getAsStringBuilder(0)); - assertEquals("PT0.001S", microVector.getAsStringBuilder(1).toString()); - // Holder - NullableDurationHolder holder = new NullableDurationHolder(); - microVector.get(0, holder); - assertEquals(0, holder.isSet); - microVector.get(1, holder); - assertEquals(1, holder.isSet); - assertEquals(1000, holder.value); - } - } - - @Test - public void testNanosBasics() { - try (DurationVector nanoVector = - TestUtils.newVector( - DurationVector.class, - "nanos", - new ArrowType.Duration(TimeUnit.NANOSECOND), - allocator)) { - - nanoVector.allocateNew(); - nanoVector.setNull(0); - nanoVector.setSafe(1, 1000000); - nanoVector.setValueCount(2); - assertNull(nanoVector.getObject(0)); - assertEquals(Duration.ofMillis(1), nanoVector.getObject(1)); - assertNull(nanoVector.getAsStringBuilder(0)); - assertEquals("PT0.001S", nanoVector.getAsStringBuilder(1).toString()); - // Holder - NullableDurationHolder holder = new NullableDurationHolder(); - nanoVector.get(0, holder); - assertEquals(0, holder.isSet); - nanoVector.get(1, holder); - assertEquals(1, holder.isSet); - assertEquals(1000000, holder.value); - } - } - - @Test - public void testGetTransferPairWithField() { - final DurationVector fromVector = - TestUtils.newVector( - DurationVector.class, "nanos", new ArrowType.Duration(TimeUnit.NANOSECOND), allocator); - final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), allocator); - final DurationVector toVector = (DurationVector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(fromVector.getField(), toVector.getField()); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java deleted file mode 100644 index 040bd9f7dc791..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; -import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; -import org.apache.arrow.vector.util.ReusableByteArray; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestFixedSizeBinaryVector { - private static final int numValues = 123; - private static final int typeWidth = 9; - private static final int smallDataSize = 6; - private static final int largeDataSize = 12; - - private static byte[][] values; - - static { - values = new byte[numValues][typeWidth]; - for (int i = 0; i < numValues; i++) { - for (int j = 0; j < typeWidth; j++) { - values[i][j] = ((byte) i); - } - } - } - - private ArrowBuf[] bufs = new ArrowBuf[numValues]; - private FixedSizeBinaryHolder[] holders = new FixedSizeBinaryHolder[numValues]; - private NullableFixedSizeBinaryHolder[] nullableHolders = - new NullableFixedSizeBinaryHolder[numValues]; - - private static byte[] smallValue; - - static { - smallValue = new byte[smallDataSize]; - for (int i = 0; i < smallDataSize; i++) { - smallValue[i] = ((byte) i); - } - } - - private ArrowBuf smallBuf; - private FixedSizeBinaryHolder smallHolder; - private NullableFixedSizeBinaryHolder smallNullableHolder; - - private static byte[] largeValue; - - static { - largeValue = new byte[largeDataSize]; - for (int i = 0; i < largeDataSize; i++) { - largeValue[i] = ((byte) i); - } - } - - private ArrowBuf largeBuf; - private FixedSizeBinaryHolder largeHolder; - private NullableFixedSizeBinaryHolder largeNullableHolder; - - private BufferAllocator allocator; - private FixedSizeBinaryVector vector; - - private static void failWithException(String message) throws Exception { - throw new Exception(message); - } - - @BeforeEach - public void init() throws Exception { - allocator = new DirtyRootAllocator(Integer.MAX_VALUE, (byte) 100); - vector = new FixedSizeBinaryVector("fixedSizeBinary", allocator, typeWidth); - vector.allocateNew(); - - for (int i = 0; i < numValues; i++) { - bufs[i] = allocator.buffer(typeWidth); - bufs[i].setBytes(0, values[i]); - - holders[i] = new FixedSizeBinaryHolder(); - holders[i].byteWidth = typeWidth; - holders[i].buffer = bufs[i]; - - nullableHolders[i] = new NullableFixedSizeBinaryHolder(); - nullableHolders[i].byteWidth = typeWidth; - nullableHolders[i].buffer = bufs[i]; - nullableHolders[i].isSet = 1; - } - - smallBuf = allocator.buffer(smallDataSize); - smallBuf.setBytes(0, smallValue); - - smallHolder = new FixedSizeBinaryHolder(); - smallHolder.byteWidth = smallDataSize; - smallHolder.buffer = smallBuf; - - smallNullableHolder = new NullableFixedSizeBinaryHolder(); - smallNullableHolder.byteWidth = smallDataSize; - smallNullableHolder.buffer = smallBuf; - - largeBuf = allocator.buffer(largeDataSize); - largeBuf.setBytes(0, largeValue); - - largeHolder = new FixedSizeBinaryHolder(); - largeHolder.byteWidth = typeWidth; - largeHolder.buffer = largeBuf; - - largeNullableHolder = new NullableFixedSizeBinaryHolder(); - largeNullableHolder.byteWidth = typeWidth; - largeNullableHolder.buffer = largeBuf; - } - - @AfterEach - public void terminate() throws Exception { - for (int i = 0; i < numValues; i++) { - bufs[i].close(); - } - smallBuf.close(); - largeBuf.close(); - - vector.close(); - allocator.close(); - } - - @Test - public void testSetUsingByteArray() { - for (int i = 0; i < numValues; i++) { - vector.set(i, values[i]); - } - vector.setValueCount(numValues); - for (int i = 0; i < numValues; i++) { - assertArrayEquals(values[i], vector.getObject(i)); - } - } - - @Test - public void testSetUsingNull() { - final byte[] value = null; - for (int i = 0; i < numValues; i++) { - final int index = i; - Exception e = - assertThrows( - NullPointerException.class, - () -> { - vector.set(index, value); - }); - assertEquals("expecting a valid byte array", e.getMessage()); - } - } - - @Test - public void testSetUsingHolder() { - for (int i = 0; i < numValues; i++) { - vector.set(i, holders[i]); - } - vector.setValueCount(numValues); - for (int i = 0; i < numValues; i++) { - assertArrayEquals(values[i], vector.getObject(i)); - } - } - - @Test - public void testSetUsingNullableHolder() { - for (int i = 0; i < numValues; i++) { - vector.set(i, nullableHolders[i]); - } - vector.setValueCount(numValues); - for (int i = 0; i < numValues; i++) { - assertArrayEquals(values[i], vector.getObject(i)); - } - } - - @Test - public void testGetUsingNullableHolder() { - for (int i = 0; i < numValues; i++) { - vector.set(i, holders[i]); - } - vector.setValueCount(numValues); - for (int i = 0; i < numValues; i++) { - vector.get(i, nullableHolders[i]); - assertEquals(typeWidth, nullableHolders[i].byteWidth); - assertTrue(nullableHolders[i].isSet > 0); - byte[] actual = new byte[typeWidth]; - nullableHolders[i].buffer.getBytes(0, actual, 0, typeWidth); - assertArrayEquals(values[i], actual); - } - } - - @Test - public void testSetWithInvalidInput() throws Exception { - String errorMsg = "input data needs to be at least " + typeWidth + " bytes"; - - // test small inputs, byteWidth matches but value or buffer is too small - try { - vector.set(0, smallValue); - failWithException(errorMsg); - } catch (AssertionError | IllegalArgumentException ignore) { - } - - try { - vector.set(0, smallHolder); - failWithException(errorMsg); - } catch (AssertionError | IllegalArgumentException ignore) { - } - - try { - vector.set(0, smallNullableHolder); - failWithException(errorMsg); - } catch (AssertionError | IllegalArgumentException ignore) { - } - - try { - vector.set(0, smallBuf); - failWithException(errorMsg); - } catch (AssertionError | IllegalArgumentException ignore) { - } - - // test large inputs, byteWidth matches but value or buffer is bigger than byteWidth - vector.set(0, largeValue); - vector.set(0, largeHolder); - vector.set(0, largeNullableHolder); - vector.set(0, largeBuf); - } - - @Test - public void setSetSafeWithInvalidInput() throws Exception { - String errorMsg = "input data needs to be at least " + typeWidth + " bytes"; - - // test small inputs, byteWidth matches but value or buffer is too small - try { - vector.setSafe(0, smallValue); - failWithException(errorMsg); - } catch (AssertionError | IllegalArgumentException ignore) { - } - - try { - vector.setSafe(0, smallHolder); - failWithException(errorMsg); - } catch (AssertionError | IllegalArgumentException ignore) { - } - - try { - vector.setSafe(0, smallNullableHolder); - failWithException(errorMsg); - } catch (AssertionError | IllegalArgumentException ignore) { - } - - try { - vector.setSafe(0, smallBuf); - failWithException(errorMsg); - } catch (AssertionError | IllegalArgumentException ignore) { - } - - // test large inputs, byteWidth matches but value or buffer is bigger than byteWidth - vector.setSafe(0, largeValue); - vector.setSafe(0, largeHolder); - vector.setSafe(0, largeNullableHolder); - vector.setSafe(0, largeBuf); - } - - @Test - public void testGetNull() { - vector.setNull(0); - assertNull(vector.get(0)); - } - - @Test - public void testGetTransferPairWithField() { - final FixedSizeBinaryVector fromVector = - new FixedSizeBinaryVector("fixedSizeBinary", allocator, typeWidth); - final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), allocator); - final FixedSizeBinaryVector toVector = (FixedSizeBinaryVector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(fromVector.getField(), toVector.getField()); - } - - @Test - public void testGetBytesRepeatedly() { - for (int i = 0; i < numValues; i++) { - vector.set(i, values[i]); - } - vector.setValueCount(numValues); - - ReusableByteArray reusableByteArray = new ReusableByteArray(); - for (int i = 0; i < numValues; i++) { - // verify results - vector.read(i, reusableByteArray); - assertArrayEquals(values[i], reusableByteArray.getBuffer()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java deleted file mode 100644 index f582406de6808..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader; -import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter; -import org.apache.arrow.vector.complex.impl.UnionListReader; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestFixedSizeListVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testIntType() { - try (FixedSizeListVector vector = FixedSizeListVector.empty("list", /*size=*/ 2, allocator)) { - IntVector nested = - (IntVector) - vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector(); - vector.allocateNew(); - - for (int i = 0; i < 10; i++) { - vector.setNotNull(i); - nested.set(i * 2, i); - nested.set(i * 2 + 1, i + 10); - } - vector.setValueCount(10); - - UnionFixedSizeListReader reader = vector.getReader(); - for (int i = 0; i < 10; i++) { - reader.setPosition(i); - assertTrue(reader.isSet()); - assertTrue(reader.next()); - assertEquals(i, reader.reader().readInteger().intValue()); - assertTrue(reader.next()); - assertEquals(i + 10, reader.reader().readInteger().intValue()); - assertFalse(reader.next()); - assertEquals(Arrays.asList(i, i + 10), reader.readObject()); - } - } - } - - @Test - public void testFloatTypeNullable() { - try (FixedSizeListVector vector = FixedSizeListVector.empty("list", /*size=*/ 2, allocator)) { - Float4Vector nested = - (Float4Vector) - vector.addOrGetVector(FieldType.nullable(MinorType.FLOAT4.getType())).getVector(); - vector.allocateNew(); - - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector.setNotNull(i); - nested.set(i * 2, i + 0.1f); - nested.set(i * 2 + 1, i + 10.1f); - } - } - vector.setValueCount(10); - - UnionFixedSizeListReader reader = vector.getReader(); - for (int i = 0; i < 10; i++) { - reader.setPosition(i); - if (i % 2 == 0) { - assertTrue(reader.isSet()); - assertTrue(reader.next()); - assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001); - assertTrue(reader.next()); - assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001); - assertFalse(reader.next()); - assertEquals(Arrays.asList(i + 0.1f, i + 10.1f), reader.readObject()); - } else { - assertFalse(reader.isSet()); - assertNull(reader.readObject()); - } - } - } - } - - @Test - public void testNestedInList() { - try (ListVector vector = ListVector.empty("list", allocator)) { - FixedSizeListVector tuples = - (FixedSizeListVector) - vector.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeList(2))).getVector(); - IntVector innerVector = - (IntVector) - tuples.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector(); - vector.allocateNew(); - - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - int position = vector.startNewValue(i); - for (int j = 0; j < i % 7; j++) { - tuples.setNotNull(position + j); - innerVector.set((position + j) * 2, j); - innerVector.set((position + j) * 2 + 1, j + 1); - } - vector.endValue(i, i % 7); - } - } - vector.setValueCount(10); - - UnionListReader reader = vector.getReader(); - for (int i = 0; i < 10; i++) { - reader.setPosition(i); - if (i % 2 == 0) { - for (int j = 0; j < i % 7; j++) { - assertTrue(reader.next()); - FieldReader innerListReader = reader.reader(); - for (int k = 0; k < 2; k++) { - assertTrue(innerListReader.next()); - assertEquals(k + j, innerListReader.reader().readInteger().intValue()); - } - assertFalse(innerListReader.next()); - } - assertFalse(reader.next()); - } else { - assertFalse(reader.isSet()); - assertNull(reader.readObject()); - } - } - } - } - - @Test - public void testTransferPair() { - try (FixedSizeListVector from = - new FixedSizeListVector( - "from", - allocator, - new FieldType(true, new ArrowType.FixedSizeList(2), null), - null); - FixedSizeListVector to = - new FixedSizeListVector( - "to", allocator, new FieldType(true, new ArrowType.FixedSizeList(2), null), null)) { - Float4Vector nested = - (Float4Vector) - from.addOrGetVector(FieldType.nullable(MinorType.FLOAT4.getType())).getVector(); - from.allocateNew(); - - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - from.setNotNull(i); - nested.set(i * 2, i + 0.1f); - nested.set(i * 2 + 1, i + 10.1f); - } - } - from.setValueCount(10); - - TransferPair pair = from.makeTransferPair(to); - - pair.copyValueSafe(0, 1); - pair.copyValueSafe(2, 2); - to.copyFromSafe(4, 3, from); - - to.setValueCount(10); - - UnionFixedSizeListReader reader = to.getReader(); - - reader.setPosition(0); - assertFalse(reader.isSet()); - assertNull(reader.readObject()); - - reader.setPosition(1); - assertTrue(reader.isSet()); - assertTrue(reader.next()); - assertEquals(0.1f, reader.reader().readFloat(), 0.00001); - assertTrue(reader.next()); - assertEquals(10.1f, reader.reader().readFloat(), 0.00001); - assertFalse(reader.next()); - assertEquals(Arrays.asList(0.1f, 10.1f), reader.readObject()); - - reader.setPosition(2); - assertTrue(reader.isSet()); - assertTrue(reader.next()); - assertEquals(2.1f, reader.reader().readFloat(), 0.00001); - assertTrue(reader.next()); - assertEquals(12.1f, reader.reader().readFloat(), 0.00001); - assertFalse(reader.next()); - assertEquals(Arrays.asList(2.1f, 12.1f), reader.readObject()); - - reader.setPosition(3); - assertTrue(reader.isSet()); - assertTrue(reader.next()); - assertEquals(4.1f, reader.reader().readFloat(), 0.00001); - assertTrue(reader.next()); - assertEquals(14.1f, reader.reader().readFloat(), 0.00001); - assertFalse(reader.next()); - assertEquals(Arrays.asList(4.1f, 14.1f), reader.readObject()); - - for (int i = 4; i < 10; i++) { - reader.setPosition(i); - assertFalse(reader.isSet()); - assertNull(reader.readObject()); - } - } - } - - @Test - public void testTransferEmptyVector() throws Exception { - // #43320 - try (FixedSizeListVector src = - new FixedSizeListVector( - "src", allocator, FieldType.nullable(new ArrowType.FixedSizeList(2)), null); - FixedSizeListVector dest = - new FixedSizeListVector( - "dest", allocator, FieldType.nullable(new ArrowType.FixedSizeList(2)), null)) { - src.makeTransferPair(dest).transfer(); - - IntVector els = - (IntVector) dest.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector(); - - dest.allocateNew(); - dest.startNewValue(0); - els.setSafe(0, 1); - els.setSafe(1, 2); - dest.setValueCount(1); - - List expected = new ArrayList<>(2); - expected.add(1); - expected.add(2); - - assertEquals(expected, dest.getObject(0)); - } - } - - @Test - public void testConsistentChildName() throws Exception { - try (FixedSizeListVector listVector = - FixedSizeListVector.empty("sourceVector", /*size=*/ 2, allocator)) { - String emptyListStr = listVector.getField().toString(); - assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); - - listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - String emptyVectorStr = listVector.getField().toString(); - assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); - } - } - - @Test - public void testUnionFixedSizeListWriterWithNulls() throws Exception { - /* Write to a decimal list vector - * each list of size 3 and having its data values alternating between null and a non-null. - * Read and verify - */ - try (final FixedSizeListVector vector = - FixedSizeListVector.empty("vector", /*size=*/ 3, allocator)) { - - UnionFixedSizeListWriter writer = vector.getWriter(); - writer.allocate(); - - final int valueCount = 100; - - for (int i = 0; i < valueCount; i++) { - writer.startList(); - writer.decimal().writeDecimal(new BigDecimal(i)); - writer.writeNull(); - writer.decimal().writeDecimal(new BigDecimal(i * 3)); - writer.endList(); - } - vector.setValueCount(valueCount); - - for (int i = 0; i < valueCount; i++) { - List values = (List) vector.getObject(i); - assertEquals(3, values.size()); - assertEquals(new BigDecimal(i), values.get(0)); - assertEquals(null, values.get(1)); - assertEquals(new BigDecimal(i * 3), values.get(2)); - } - } - } - - @Test - public void testUnionFixedSizeListWriter() throws Exception { - try (final FixedSizeListVector vector1 = - FixedSizeListVector.empty("vector", /*size=*/ 3, allocator)) { - - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - int[] values1 = new int[] {1, 2, 3}; - int[] values2 = new int[] {4, 5, 6}; - int[] values3 = new int[] {7, 8, 9}; - - // set some values - writeListVector(vector1, writer1, values1); - writeListVector(vector1, writer1, values2); - writeListVector(vector1, writer1, values3); - writer1.setValueCount(3); - - assertEquals(3, vector1.getValueCount()); - - int[] realValue1 = convertListToIntArray(vector1.getObject(0)); - assertTrue(Arrays.equals(values1, realValue1)); - int[] realValue2 = convertListToIntArray(vector1.getObject(1)); - assertTrue(Arrays.equals(values2, realValue2)); - int[] realValue3 = convertListToIntArray(vector1.getObject(2)); - assertTrue(Arrays.equals(values3, realValue3)); - } - } - - @Test - public void testWriteDecimal() throws Exception { - try (final FixedSizeListVector vector = - FixedSizeListVector.empty("vector", /*size=*/ 3, allocator)) { - - UnionFixedSizeListWriter writer = vector.getWriter(); - writer.allocate(); - - final int valueCount = 100; - - for (int i = 0; i < valueCount; i++) { - writer.startList(); - writer.decimal().writeDecimal(new BigDecimal(i)); - writer.decimal().writeDecimal(new BigDecimal(i * 2)); - writer.decimal().writeDecimal(new BigDecimal(i * 3)); - writer.endList(); - } - vector.setValueCount(valueCount); - - for (int i = 0; i < valueCount; i++) { - List values = (List) vector.getObject(i); - assertEquals(3, values.size()); - assertEquals(new BigDecimal(i), values.get(0)); - assertEquals(new BigDecimal(i * 2), values.get(1)); - assertEquals(new BigDecimal(i * 3), values.get(2)); - } - } - } - - @Test - public void testDecimalIndexCheck() throws Exception { - try (final FixedSizeListVector vector = - FixedSizeListVector.empty("vector", /*size=*/ 3, allocator)) { - - UnionFixedSizeListWriter writer = vector.getWriter(); - writer.allocate(); - - IllegalStateException e = - assertThrows( - IllegalStateException.class, - () -> { - writer.startList(); - writer.decimal().writeDecimal(new BigDecimal(1)); - writer.decimal().writeDecimal(new BigDecimal(2)); - writer.decimal().writeDecimal(new BigDecimal(3)); - writer.decimal().writeDecimal(new BigDecimal(4)); - writer.endList(); - }); - assertEquals("values at index 0 is greater than listSize 3", e.getMessage()); - } - } - - @Test - public void testWriteIllegalData() throws Exception { - assertThrows( - IllegalStateException.class, - () -> { - try (final FixedSizeListVector vector1 = - FixedSizeListVector.empty("vector", /*size=*/ 3, allocator)) { - - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - int[] values1 = new int[] {1, 2, 3}; - int[] values2 = new int[] {4, 5, 6, 7, 8}; - - // set some values - writeListVector(vector1, writer1, values1); - writeListVector(vector1, writer1, values2); - writer1.setValueCount(3); - - assertEquals(3, vector1.getValueCount()); - int[] realValue1 = convertListToIntArray(vector1.getObject(0)); - assertTrue(Arrays.equals(values1, realValue1)); - int[] realValue2 = convertListToIntArray(vector1.getObject(1)); - assertTrue(Arrays.equals(values2, realValue2)); - } - }); - } - - @Test - public void testSplitAndTransfer() throws Exception { - try (final FixedSizeListVector vector1 = - FixedSizeListVector.empty("vector", /*size=*/ 3, allocator)) { - - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - int[] values1 = new int[] {1, 2, 3}; - int[] values2 = new int[] {4, 5, 6}; - int[] values3 = new int[] {7, 8, 9}; - - // set some values - writeListVector(vector1, writer1, values1); - writeListVector(vector1, writer1, values2); - writeListVector(vector1, writer1, values3); - writer1.setValueCount(3); - - TransferPair transferPair = vector1.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 2); - FixedSizeListVector targetVector = (FixedSizeListVector) transferPair.getTo(); - - assertEquals(2, targetVector.getValueCount()); - int[] realValue1 = convertListToIntArray(targetVector.getObject(0)); - assertArrayEquals(values1, realValue1); - int[] realValue2 = convertListToIntArray(targetVector.getObject(1)); - assertArrayEquals(values2, realValue2); - - targetVector.clear(); - } - } - - @Test - public void testZeroWidthVector() { - try (final FixedSizeListVector vector1 = - FixedSizeListVector.empty("vector", /*size=*/ 0, allocator)) { - - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - int[] values1 = new int[] {}; - int[] values2 = new int[] {}; - int[] values3 = null; - int[] values4 = new int[] {}; - - // set some values - writeListVector(vector1, writer1, values1); - writeListVector(vector1, writer1, values2); - writeListVector(vector1, writer1, values3); - writeListVector(vector1, writer1, values4); - writer1.setValueCount(4); - - assertEquals(4, vector1.getValueCount()); - - int[] realValue1 = convertListToIntArray(vector1.getObject(0)); - assertArrayEquals(values1, realValue1); - int[] realValue2 = convertListToIntArray(vector1.getObject(1)); - assertArrayEquals(values2, realValue2); - assertNull(vector1.getObject(2)); - int[] realValue4 = convertListToIntArray(vector1.getObject(3)); - assertArrayEquals(values4, realValue4); - } - } - - @Test - public void testVectorWithNulls() { - try (final FixedSizeListVector vector1 = - FixedSizeListVector.empty("vector", /*size=*/ 4, allocator)) { - - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - List values1 = Arrays.asList(null, 1, 2, 3); - List values2 = Arrays.asList(4, null, 5, 6); - List values3 = null; - List values4 = Arrays.asList(7, 8, null, 9); - - // set some values - writeListVector(vector1, writer1, values1); - writeListVector(vector1, writer1, values2); - writeListVector(vector1, writer1, values3); - writeListVector(vector1, writer1, values4); - writer1.setValueCount(4); - - assertEquals(4, vector1.getValueCount()); - - List realValue1 = vector1.getObject(0); - assertEquals(values1, realValue1); - List realValue2 = vector1.getObject(1); - assertEquals(values2, realValue2); - List realValue3 = vector1.getObject(2); - assertEquals(values3, realValue3); - List realValue4 = vector1.getObject(3); - assertEquals(values4, realValue4); - } - } - - @Test - public void testWriteVarCharHelpers() throws Exception { - try (final FixedSizeListVector vector = - FixedSizeListVector.empty("vector", /*size=*/ 4, allocator)) { - - UnionFixedSizeListWriter writer = vector.getWriter(); - writer.allocate(); - - writer.startList(); - writer.writeVarChar("row1,1"); - writer.writeVarChar(new Text("row1,2")); - writer.writeNull(); - writer.writeNull(); - writer.endList(); - - assertEquals("row1,1", vector.getObject(0).get(0).toString()); - assertEquals("row1,2", vector.getObject(0).get(1).toString()); - } - } - - @Test - public void testWriteLargeVarCharHelpers() throws Exception { - try (final FixedSizeListVector vector = - FixedSizeListVector.empty("vector", /*size=*/ 4, allocator)) { - - UnionFixedSizeListWriter writer = vector.getWriter(); - writer.allocate(); - - writer.startList(); - writer.writeLargeVarChar("row1,1"); - writer.writeLargeVarChar(new Text("row1,2")); - writer.writeNull(); - writer.writeNull(); - writer.endList(); - - assertEquals("row1,1", vector.getObject(0).get(0).toString()); - assertEquals("row1,2", vector.getObject(0).get(1).toString()); - } - } - - @Test - public void testWriteVarBinaryHelpers() throws Exception { - try (final FixedSizeListVector vector = - FixedSizeListVector.empty("vector", /*size=*/ 4, allocator)) { - - UnionFixedSizeListWriter writer = vector.getWriter(); - writer.allocate(); - - writer.startList(); - writer.writeVarBinary("row1,1".getBytes(StandardCharsets.UTF_8)); - writer.writeVarBinary( - "row1,2".getBytes(StandardCharsets.UTF_8), - 0, - "row1,2".getBytes(StandardCharsets.UTF_8).length); - writer.writeVarBinary(ByteBuffer.wrap("row1,3".getBytes(StandardCharsets.UTF_8))); - writer.writeVarBinary( - ByteBuffer.wrap("row1,4".getBytes(StandardCharsets.UTF_8)), - 0, - "row1,4".getBytes(StandardCharsets.UTF_8).length); - writer.endList(); - - assertEquals( - "row1,1", new String((byte[]) vector.getObject(0).get(0), StandardCharsets.UTF_8)); - assertEquals( - "row1,2", new String((byte[]) vector.getObject(0).get(1), StandardCharsets.UTF_8)); - assertEquals( - "row1,3", new String((byte[]) vector.getObject(0).get(2), StandardCharsets.UTF_8)); - assertEquals( - "row1,4", new String((byte[]) vector.getObject(0).get(3), StandardCharsets.UTF_8)); - } - } - - @Test - public void testWriteLargeVarBinaryHelpers() throws Exception { - try (final FixedSizeListVector vector = - FixedSizeListVector.empty("vector", /*size=*/ 4, allocator)) { - - UnionFixedSizeListWriter writer = vector.getWriter(); - writer.allocate(); - - writer.startList(); - writer.writeLargeVarBinary("row1,1".getBytes(StandardCharsets.UTF_8)); - writer.writeLargeVarBinary( - "row1,2".getBytes(StandardCharsets.UTF_8), - 0, - "row1,2".getBytes(StandardCharsets.UTF_8).length); - writer.writeLargeVarBinary(ByteBuffer.wrap("row1,3".getBytes(StandardCharsets.UTF_8))); - writer.writeLargeVarBinary( - ByteBuffer.wrap("row1,4".getBytes(StandardCharsets.UTF_8)), - 0, - "row1,4".getBytes(StandardCharsets.UTF_8).length); - writer.endList(); - - assertEquals( - "row1,1", new String((byte[]) vector.getObject(0).get(0), StandardCharsets.UTF_8)); - assertEquals( - "row1,2", new String((byte[]) vector.getObject(0).get(1), StandardCharsets.UTF_8)); - assertEquals( - "row1,3", new String((byte[]) vector.getObject(0).get(2), StandardCharsets.UTF_8)); - assertEquals( - "row1,4", new String((byte[]) vector.getObject(0).get(3), StandardCharsets.UTF_8)); - } - } - - private int[] convertListToIntArray(List list) { - int[] values = new int[list.size()]; - for (int i = 0; i < list.size(); i++) { - values[i] = (int) list.get(i); - } - return values; - } - - private void writeListVector( - FixedSizeListVector vector, UnionFixedSizeListWriter writer, int[] values) { - writer.startList(); - if (values != null) { - for (int v : values) { - writer.integer().writeInt(v); - } - } else { - vector.setNull(writer.getPosition()); - } - writer.endList(); - } - - private void writeListVector( - FixedSizeListVector vector, UnionFixedSizeListWriter writer, List values) { - writer.startList(); - if (values != null) { - for (Integer v : values) { - if (v == null) { - writer.writeNull(); - } else { - writer.integer().writeInt(v); - } - } - } else { - vector.setNull(writer.getPosition()); - } - writer.endList(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java deleted file mode 100644 index 2b39db3cd4d68..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.time.Duration; -import java.time.Period; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.holders.IntervalMonthDayNanoHolder; -import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestIntervalMonthDayNanoVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testBasics() { - try (final IntervalMonthDayNanoVector vector = - new IntervalMonthDayNanoVector(/*name=*/ "", allocator)) { - int valueCount = 100; - vector.setInitialCapacity(valueCount); - vector.allocateNew(); - NullableIntervalMonthDayNanoHolder nullableHolder = new NullableIntervalMonthDayNanoHolder(); - nullableHolder.isSet = 1; - nullableHolder.months = 2; - nullableHolder.days = 20; - nullableHolder.nanoseconds = 123; - IntervalMonthDayNanoHolder holder = new IntervalMonthDayNanoHolder(); - holder.months = Integer.MIN_VALUE; - holder.days = Integer.MIN_VALUE; - holder.nanoseconds = Long.MIN_VALUE; - - vector.set(0, /*months=*/ 1, /*days=*/ 2, /*nanoseconds=*/ -2); - vector.setSafe(2, /*months=*/ 1, /*days=*/ 2, /*nanoseconds=*/ -3); - vector.setSafe(/*index=*/ 4, nullableHolder); - vector.set(3, holder); - nullableHolder.isSet = 0; - vector.setSafe(/*index=*/ 5, nullableHolder); - vector.setValueCount(5); - - assertEquals("P1M2D PT-0.000000002S ", vector.getAsStringBuilder(0).toString()); - assertEquals(null, vector.getAsStringBuilder(1)); - assertEquals("P1M2D PT-0.000000003S ", vector.getAsStringBuilder(2).toString()); - assertEquals( - new PeriodDuration( - Period.of(0, Integer.MIN_VALUE, Integer.MIN_VALUE), Duration.ofNanos(Long.MIN_VALUE)), - vector.getObject(3)); - assertEquals("P2M20D PT0.000000123S ", vector.getAsStringBuilder(4).toString()); - - assertEquals(null, vector.getObject(5)); - - vector.get(1, nullableHolder); - assertEquals(0, nullableHolder.isSet); - - vector.get(2, nullableHolder); - assertEquals(1, nullableHolder.isSet); - assertEquals(1, nullableHolder.months); - assertEquals(2, nullableHolder.days); - assertEquals(-3, nullableHolder.nanoseconds); - - IntervalMonthDayNanoVector.getDays(vector.valueBuffer, 2); - assertEquals(1, IntervalMonthDayNanoVector.getMonths(vector.valueBuffer, 2)); - assertEquals(2, IntervalMonthDayNanoVector.getDays(vector.valueBuffer, 2)); - assertEquals(-3, IntervalMonthDayNanoVector.getNanoseconds(vector.valueBuffer, 2)); - - assertEquals(0, vector.isSet(1)); - assertEquals(1, vector.isSet(2)); - - assertEquals(Types.MinorType.INTERVALMONTHDAYNANO, vector.getMinorType()); - ArrowType fieldType = vector.getField().getType(); - assertEquals(ArrowType.ArrowTypeID.Interval, fieldType.getTypeID()); - ArrowType.Interval intervalType = (ArrowType.Interval) fieldType; - assertEquals(IntervalUnit.MONTH_DAY_NANO, intervalType.getUnit()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java deleted file mode 100644 index 0ba2bf9f406e9..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertSame; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestIntervalYearVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testGetAsStringBuilder() { - try (final IntervalYearVector vector = new IntervalYearVector("", allocator)) { - int valueCount = 100; - vector.setInitialCapacity(valueCount); - vector.allocateNew(); - for (int i = 0; i < valueCount; i++) { - vector.set(i, i); - } - - assertEquals("0 years 1 month ", vector.getAsStringBuilder(1).toString()); - assertEquals("0 years 10 months ", vector.getAsStringBuilder(10).toString()); - assertEquals("1 year 8 months ", vector.getAsStringBuilder(20).toString()); - assertEquals("2 years 6 months ", vector.getAsStringBuilder(30).toString()); - - assertEquals(Types.MinorType.INTERVALYEAR, vector.getMinorType()); - ArrowType fieldType = vector.getField().getType(); - assertEquals(ArrowType.ArrowTypeID.Interval, fieldType.getTypeID()); - ArrowType.Interval intervalType = (ArrowType.Interval) fieldType; - assertEquals(IntervalUnit.YEAR_MONTH, intervalType.getUnit()); - } - } - - @Test - public void testGetTransferPairWithField() { - final IntervalYearVector fromVector = new IntervalYearVector("", allocator); - final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), allocator); - final IntervalYearVector toVector = (IntervalYearVector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(fromVector.getField(), toVector.getField()); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java deleted file mode 100644 index 101d942d2a4c5..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ /dev/null @@ -1,1030 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.BaseRepeatedValueVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestLargeListVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testCopyFrom() throws Exception { - try (LargeListVector inVector = LargeListVector.empty("input", allocator); - LargeListVector outVector = LargeListVector.empty("output", allocator)) { - UnionLargeListWriter writer = inVector.getWriter(); - writer.allocate(); - - // populate input vector with the following records - // [1, 2, 3] - // null - // [] - writer.setPosition(0); // optional - writer.startList(); - writer.bigInt().writeBigInt(1); - writer.bigInt().writeBigInt(2); - writer.bigInt().writeBigInt(3); - writer.endList(); - - writer.setPosition(2); - writer.startList(); - writer.endList(); - - writer.setValueCount(3); - - // copy values from input to output - outVector.allocateNew(); - for (int i = 0; i < 3; i++) { - outVector.copyFrom(i, i, inVector); - } - outVector.setValueCount(3); - - // assert the output vector is correct - FieldReader reader = outVector.getReader(); - assertTrue(reader.isSet(), "shouldn't be null"); - reader.setPosition(1); - assertFalse(reader.isSet(), "should be null"); - reader.setPosition(2); - assertTrue(reader.isSet(), "shouldn't be null"); - - /* index 0 */ - Object result = outVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(1), resultSet.get(0)); - assertEquals(Long.valueOf(2), resultSet.get(1)); - assertEquals(Long.valueOf(3), resultSet.get(2)); - - /* index 1 */ - result = outVector.getObject(1); - assertNull(result); - - /* index 2 */ - result = outVector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(0, resultSet.size()); - - /* 3+0+0/3 */ - assertEquals(1.0D, inVector.getDensity(), 0); - } - } - - @Test - public void testSetLastSetUsage() throws Exception { - try (LargeListVector listVector = LargeListVector.empty("input", allocator)) { - - /* Explicitly add the dataVector */ - MinorType type = MinorType.BIGINT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - /* allocate memory */ - listVector.allocateNew(); - - /* get inner buffers; validityBuffer and offsetBuffer */ - - ArrowBuf validityBuffer = listVector.getValidityBuffer(); - ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - /* get the underlying data vector -- BigIntVector */ - BigIntVector dataVector = (BigIntVector) listVector.getDataVector(); - - /* check current lastSet */ - assertEquals(-1L, listVector.getLastSet()); - - int index = 0; - int offset; - - /* write [10, 11, 12] to the list vector at index 0 */ - BitVectorHelper.setBit(validityBuffer, index); - dataVector.setSafe(0, 1, 10); - dataVector.setSafe(1, 1, 11); - dataVector.setSafe(2, 1, 12); - offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 3); - - index += 1; - - /* write [13, 14] to the list vector at index 1 */ - BitVectorHelper.setBit(validityBuffer, index); - dataVector.setSafe(3, 1, 13); - dataVector.setSafe(4, 1, 14); - offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 5); - - index += 1; - - /* write [15, 16, 17] to the list vector at index 2 */ - BitVectorHelper.setBit(validityBuffer, index); - dataVector.setSafe(5, 1, 15); - dataVector.setSafe(6, 1, 16); - dataVector.setSafe(7, 1, 17); - offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 8); - - /* check current lastSet */ - assertEquals(-1L, listVector.getLastSet()); - - /* set lastset and arbitrary valuecount for list vector. - * - * NOTE: if we don't execute setLastSet() before setLastValueCount(), then - * the latter will corrupt the offsetBuffer and thus the accessor will not - * retrieve the correct values from underlying dataBuffer. Run the test - * by commenting out next line and we should see failures from 5th assert - * onwards. This is why doing setLastSet() is important before setValueCount() - * once the vector has been loaded. - * - * Another important thing to remember is the value of lastSet itself. - * Even though the listVector has elements till index 2 only, the lastSet should - * be set as 3. This is because the offsetBuffer has valid offsets filled till index 3. - * If we do setLastSet(2), the offsetBuffer at index 3 will contain incorrect value - * after execution of setValueCount(). - * - * correct state of the listVector - * bitvector {1, 1, 1, 0, 0.... } - * offsetvector {0, 3, 5, 8, 8, 8.....} - * datavector { [10, 11, 12], - * [13, 14], - * [15, 16, 17] - * } - * - * if we don't do setLastSet() before setValueCount --> incorrect state - * bitvector {1, 1, 1, 0, 0.... } - * offsetvector {0, 0, 0, 0, 0, 0.....} - * datavector { [10, 11, 12], - * [13, 14], - * [15, 16, 17] - * } - * - * if we do setLastSet(2) before setValueCount --> incorrect state - * bitvector {1, 1, 1, 0, 0.... } - * offsetvector {0, 3, 5, 5, 5, 5.....} - * datavector { [10, 11, 12], - * [13, 14], - * [15, 16, 17] - * } - */ - listVector.setLastSet(2); - listVector.setValueCount(10); - - /* (3+2+3)/10 */ - assertEquals(0.8D, listVector.getDensity(), 0); - - index = 0; - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(0), Integer.toString(offset)); - - Long actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(10), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(11), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(12), actual); - - index++; - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(3), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(13), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(14), actual); - - index++; - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(5), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(15), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(16), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(17), actual); - - index++; - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(8), Integer.toString(offset)); - actual = dataVector.getObject(offset); - assertNull(actual); - } - } - - @Test - public void testSplitAndTransfer() throws Exception { - try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { - - /* Explicitly add the dataVector */ - MinorType type = MinorType.BIGINT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - UnionLargeListWriter listWriter = listVector.getWriter(); - - /* allocate memory */ - listWriter.allocate(); - - /* populate data */ - listWriter.setPosition(0); - listWriter.startList(); - listWriter.bigInt().writeBigInt(10); - listWriter.bigInt().writeBigInt(11); - listWriter.bigInt().writeBigInt(12); - listWriter.endList(); - - listWriter.setPosition(1); - listWriter.startList(); - listWriter.bigInt().writeBigInt(13); - listWriter.bigInt().writeBigInt(14); - listWriter.endList(); - - listWriter.setPosition(2); - listWriter.startList(); - listWriter.bigInt().writeBigInt(15); - listWriter.bigInt().writeBigInt(16); - listWriter.bigInt().writeBigInt(17); - listWriter.bigInt().writeBigInt(18); - listWriter.endList(); - - listWriter.setPosition(3); - listWriter.startList(); - listWriter.bigInt().writeBigInt(19); - listWriter.endList(); - - listWriter.setPosition(4); - listWriter.startList(); - listWriter.bigInt().writeBigInt(20); - listWriter.bigInt().writeBigInt(21); - listWriter.bigInt().writeBigInt(22); - listWriter.bigInt().writeBigInt(23); - listWriter.endList(); - - listVector.setValueCount(5); - - assertEquals(4, listVector.getLastSet()); - - /* get offset buffer */ - final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - /* get dataVector */ - BigIntVector dataVector = (BigIntVector) listVector.getDataVector(); - - /* check the vector output */ - - int index = 0; - int offset; - Long actual; - - /* index 0 */ - assertFalse(listVector.isNull(index)); - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(0), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(10), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(11), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(12), actual); - - /* index 1 */ - index++; - assertFalse(listVector.isNull(index)); - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(3), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(13), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(14), actual); - - /* index 2 */ - index++; - assertFalse(listVector.isNull(index)); - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(5), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(15), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(16), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(17), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(18), actual); - - /* index 3 */ - index++; - assertFalse(listVector.isNull(index)); - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(9), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(19), actual); - - /* index 4 */ - index++; - assertFalse(listVector.isNull(index)); - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(10), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(20), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(21), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(22), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(23), actual); - - /* index 5 */ - index++; - assertTrue(listVector.isNull(index)); - offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(14), Integer.toString(offset)); - - /* do split and transfer */ - try (LargeListVector toVector = LargeListVector.empty("toVector", allocator)) { - - TransferPair transferPair = listVector.makeTransferPair(toVector); - - int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}}; - - for (final int[] transferLength : transferLengths) { - int start = transferLength[0]; - int splitLength = transferLength[1]; - - int dataLength1 = 0; - int dataLength2 = 0; - - int offset1 = 0; - int offset2 = 0; - - transferPair.splitAndTransfer(start, splitLength); - - /* get offsetBuffer of toVector */ - final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); - - /* get dataVector of toVector */ - BigIntVector dataVector1 = (BigIntVector) toVector.getDataVector(); - - for (int i = 0; i < splitLength; i++) { - dataLength1 = - (int) offsetBuffer.getLong((start + i + 1) * LargeListVector.OFFSET_WIDTH) - - (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH); - dataLength2 = - (int) toOffsetBuffer.getLong((i + 1) * LargeListVector.OFFSET_WIDTH) - - (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH); - - assertEquals( - dataLength1, - dataLength2, - "Different data lengths at index: " + i + " and start: " + start); - - offset1 = (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH); - offset2 = (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH); - - for (int j = 0; j < dataLength1; j++) { - assertEquals( - dataVector.getObject(offset1), - dataVector1.getObject(offset2), - "Different data at indexes: " + offset1 + " and " + offset2); - - offset1++; - offset2++; - } - } - } - } - } - } - - @Test - public void testNestedLargeListVector() throws Exception { - try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { - - UnionLargeListWriter listWriter = listVector.getWriter(); - - /* allocate memory */ - listWriter.allocate(); - - /* the dataVector that backs a listVector will also be a - * listVector for this test. - */ - - /* write one or more inner lists at index 0 */ - listWriter.setPosition(0); - listWriter.startList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(50); - listWriter.list().bigInt().writeBigInt(100); - listWriter.list().bigInt().writeBigInt(200); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(75); - listWriter.list().bigInt().writeBigInt(125); - listWriter.list().bigInt().writeBigInt(150); - listWriter.list().bigInt().writeBigInt(175); - listWriter.list().endList(); - - listWriter.endList(); - - /* write one or more inner lists at index 1 */ - listWriter.setPosition(1); - listWriter.startList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(10); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(15); - listWriter.list().bigInt().writeBigInt(20); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(25); - listWriter.list().bigInt().writeBigInt(30); - listWriter.list().bigInt().writeBigInt(35); - listWriter.list().endList(); - - listWriter.endList(); - - assertEquals(1, listVector.getLastSet()); - - listVector.setValueCount(2); - - assertEquals(2, listVector.getValueCount()); - - /* get listVector value at index 0 -- the value itself is a listvector */ - Object result = listVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(4, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - assertEquals(Long.valueOf(150), list.get(2)); - assertEquals(Long.valueOf(175), list.get(3)); - - /* get listVector value at index 1 -- the value itself is a listvector */ - result = listVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(1, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ - assertEquals(3, resultSet.get(2).size()); /* size of third inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(10), list.get(0)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(2); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(listVector.isNull(0)); - assertFalse(listVector.isNull(1)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - /* listVector has 2 lists at index 0 and 3 lists at index 1 */ - assertEquals(0, offsetBuffer.getLong(0 * LargeListVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getLong(1 * LargeListVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getLong(2 * LargeListVector.OFFSET_WIDTH)); - } - } - - @Test - public void testNestedLargeListVector1() throws Exception { - try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { - - MinorType listType = MinorType.LIST; - MinorType scalarType = MinorType.BIGINT; - - listVector.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList1 = (ListVector) listVector.getDataVector(); - innerList1.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList2 = (ListVector) innerList1.getDataVector(); - innerList2.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList3 = (ListVector) innerList2.getDataVector(); - innerList3.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList4 = (ListVector) innerList3.getDataVector(); - innerList4.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList5 = (ListVector) innerList4.getDataVector(); - innerList5.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList6 = (ListVector) innerList5.getDataVector(); - innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); - - listVector.setInitialCapacity(128); - } - } - - @Test - public void testNestedLargeListVector2() throws Exception { - try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { - listVector.setInitialCapacity(1); - UnionLargeListWriter listWriter = listVector.getWriter(); - /* allocate memory */ - listWriter.allocate(); - - /* write one or more inner lists at index 0 */ - listWriter.setPosition(0); - listWriter.startList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(50); - listWriter.list().bigInt().writeBigInt(100); - listWriter.list().bigInt().writeBigInt(200); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(75); - listWriter.list().bigInt().writeBigInt(125); - listWriter.list().endList(); - - listWriter.endList(); - - /* write one or more inner lists at index 1 */ - listWriter.setPosition(1); - listWriter.startList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(15); - listWriter.list().bigInt().writeBigInt(20); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(25); - listWriter.list().bigInt().writeBigInt(30); - listWriter.list().bigInt().writeBigInt(35); - listWriter.list().endList(); - - listWriter.endList(); - - assertEquals(1, listVector.getLastSet()); - - listVector.setValueCount(2); - - assertEquals(2, listVector.getValueCount()); - - /* get listVector value at index 0 -- the value itself is a listvector */ - Object result = listVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - - /* get listVector value at index 1 -- the value itself is a listvector */ - result = listVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(2, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(3, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(listVector.isNull(0)); - assertFalse(listVector.isNull(1)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - /* listVector has 2 lists at index 0 and 3 lists at index 1 */ - assertEquals(0, offsetBuffer.getLong(0 * LargeListVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getLong(1 * LargeListVector.OFFSET_WIDTH)); - assertEquals(4, offsetBuffer.getLong(2 * LargeListVector.OFFSET_WIDTH)); - } - } - - @Test - public void testGetBufferAddress() throws Exception { - try (LargeListVector listVector = LargeListVector.empty("vector", allocator)) { - - UnionLargeListWriter listWriter = listVector.getWriter(); - boolean error = false; - - listWriter.allocate(); - - listWriter.setPosition(0); - listWriter.startList(); - listWriter.bigInt().writeBigInt(50); - listWriter.bigInt().writeBigInt(100); - listWriter.bigInt().writeBigInt(200); - listWriter.endList(); - - listWriter.setPosition(1); - listWriter.startList(); - listWriter.bigInt().writeBigInt(250); - listWriter.bigInt().writeBigInt(300); - listWriter.endList(); - - listVector.setValueCount(2); - - /* check listVector contents */ - Object result = listVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(50), resultSet.get(0)); - assertEquals(Long.valueOf(100), resultSet.get(1)); - assertEquals(Long.valueOf(200), resultSet.get(2)); - - result = listVector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(250), resultSet.get(0)); - assertEquals(Long.valueOf(300), resultSet.get(1)); - - List buffers = listVector.getFieldBuffers(); - - long bitAddress = listVector.getValidityBufferAddress(); - long offsetAddress = listVector.getOffsetBufferAddress(); - - try { - listVector.getDataBufferAddress(); - } catch (UnsupportedOperationException ue) { - error = true; - } finally { - assertTrue(error); - } - - assertEquals(2, buffers.size()); - assertEquals(bitAddress, buffers.get(0).memoryAddress()); - assertEquals(offsetAddress, buffers.get(1).memoryAddress()); - - /* (3+2)/2 */ - assertEquals(2.5, listVector.getDensity(), 0); - } - } - - @Test - public void testConsistentChildName() throws Exception { - try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { - String emptyListStr = listVector.getField().toString(); - assertTrue(emptyListStr.contains(LargeListVector.DATA_VECTOR_NAME)); - - listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - String emptyVectorStr = listVector.getField().toString(); - assertTrue(emptyVectorStr.contains(LargeListVector.DATA_VECTOR_NAME)); - } - } - - @Test - public void testSetInitialCapacity() { - try (final LargeListVector vector = LargeListVector.empty("", allocator)) { - vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - - /** - * use the default multiplier of 5, 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value - * capacity. - */ - vector.setInitialCapacity(512); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 5); - - /* use density as 4 */ - vector.setInitialCapacity(512, 4); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); - - /** - * inner value capacity we pass to data vector is 512 * 0.1 => 51 For an int vector this is - * 204 bytes of memory for data buffer and 7 bytes for validity buffer. and with power of 2 - * allocation, we allocate 256 bytes and 8 bytes for the data buffer and validity buffer of - * the inner vector. Thus value capacity of inner vector is 64 - */ - vector.setInitialCapacity(512, 0.1); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 51); - - /** - * inner value capacity we pass to data vector is 512 * 0.01 => 5 For an int vector this is 20 - * bytes of memory for data buffer and 1 byte for validity buffer. and with power of 2 - * allocation, we allocate 32 bytes and 1 bytes for the data buffer and validity buffer of the - * inner vector. Thus value capacity of inner vector is 8 - */ - vector.setInitialCapacity(512, 0.01); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 5); - - /** - * inner value capacity we pass to data vector is 5 * 0.1 => 0 which is then rounded off to 1. - * So we pass value count as 1 to the inner int vector. the offset buffer of the list vector - * is allocated for 6 values which is 24 bytes and then rounded off to 32 bytes (8 values) the - * validity buffer of the list vector is allocated for 5 values which is 1 byte. This is why - * value capacity of the list vector is 7 as we take the min of validity buffer value capacity - * and offset buffer value capacity. - */ - vector.setInitialCapacity(5, 0.1); - vector.allocateNew(); - assertEquals(7, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 1); - } - } - - @Test - public void testClearAndReuse() { - try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { - BigIntVector bigIntVector = - (BigIntVector) - vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); - vector.setInitialCapacity(10); - vector.allocateNew(); - - vector.startNewValue(0); - bigIntVector.setSafe(0, 7); - vector.endValue(0, 1); - vector.startNewValue(1); - bigIntVector.setSafe(1, 8); - vector.endValue(1, 1); - vector.setValueCount(2); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(Long.valueOf(7), resultSet.get(0)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(8), resultSet.get(0)); - - // Clear and release the buffers to trigger a realloc when adding next value - vector.clear(); - - // The list vector should reuse a buffer when reallocating the offset buffer - vector.startNewValue(0); - bigIntVector.setSafe(0, 7); - vector.endValue(0, 1); - vector.startNewValue(1); - bigIntVector.setSafe(1, 8); - vector.endValue(1, 1); - vector.setValueCount(2); - - result = vector.getObject(0); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(7), resultSet.get(0)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(8), resultSet.get(0)); - } - } - - @Test - public void testWriterGetField() { - try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { - - UnionLargeListWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writer.startList(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endList(); - vector.setValueCount(2); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Int(32, true)), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.LargeList.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - } - } - - @Test - public void testClose() throws Exception { - try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { - - UnionLargeListWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writer.startList(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endList(); - vector.setValueCount(2); - - assertTrue(vector.getBufferSize() > 0); - assertTrue(vector.getDataVector().getBufferSize() > 0); - - writer.close(); - assertEquals(0, vector.getBufferSize()); - assertEquals(0, vector.getDataVector().getBufferSize()); - } - } - - @Test - public void testGetBufferSizeFor() { - try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { - - UnionLargeListWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writeIntValues(writer, new int[] {1, 2}); - writeIntValues(writer, new int[] {3, 4}); - writeIntValues(writer, new int[] {5, 6}); - writeIntValues(writer, new int[] {7, 8, 9, 10}); - writeIntValues(writer, new int[] {11, 12, 13, 14}); - writer.setValueCount(5); - - IntVector dataVector = (IntVector) vector.getDataVector(); - int[] indices = new int[] {0, 2, 4, 6, 10, 14}; - - for (int valueCount = 1; valueCount <= 5; valueCount++) { - int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); - int offsetBufferSize = (valueCount + 1) * LargeListVector.OFFSET_WIDTH; - - int expectedSize = - validityBufferSize - + offsetBufferSize - + dataVector.getBufferSizeFor(indices[valueCount]); - assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); - } - } - } - - @Test - public void testIsEmpty() { - try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { - UnionLargeListWriter writer = vector.getWriter(); - writer.allocate(); - - // set values [1,2], null, [], [5,6] - writeIntValues(writer, new int[] {1, 2}); - writer.setPosition(2); - writeIntValues(writer, new int[] {}); - writeIntValues(writer, new int[] {5, 6}); - writer.setValueCount(4); - - assertFalse(vector.isEmpty(0)); - assertTrue(vector.isNull(1)); - assertTrue(vector.isEmpty(1)); - assertFalse(vector.isNull(2)); - assertTrue(vector.isEmpty(2)); - assertFalse(vector.isEmpty(3)); - } - } - - @Test - public void testTotalCapacity() { - final FieldType type = FieldType.nullable(MinorType.INT.getType()); - try (final LargeListVector vector = new LargeListVector("list", allocator, type, null)) { - // Force the child vector to be allocated based on the type - // (this is a bad API: we have to track and repeat the type twice) - vector.addOrGetVector(type); - - // Specify the allocation size but do not actually allocate - vector.setInitialTotalCapacity(10, 100); - - // Finally actually do the allocation - vector.allocateNewSafe(); - - // Note: allocator rounds up and can be greater than the requested allocation. - assertTrue(vector.getValueCapacity() >= 10); - assertTrue(vector.getDataVector().getValueCapacity() >= 100); - } - } - - @Test - public void testGetTransferPairWithField() throws Exception { - try (final LargeListVector fromVector = LargeListVector.empty("list", allocator)) { - - UnionLargeListWriter writer = fromVector.getWriter(); - writer.allocate(); - - // set some values - writer.startList(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endList(); - fromVector.setValueCount(2); - - final TransferPair transferPair = - fromVector.getTransferPair(fromVector.getField(), allocator); - final LargeListVector toVector = (LargeListVector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(toVector.getField(), fromVector.getField()); - } - } - - private void writeIntValues(UnionLargeListWriter writer, int[] values) { - writer.startList(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endList(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java deleted file mode 100644 index 26e7bb4a0d3b2..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java +++ /dev/null @@ -1,2239 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestLargeListViewVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testBasicLargeListViewVector() { - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter(); - - /* allocate memory */ - largeListViewWriter.allocate(); - - /* write the first list at index 0 */ - largeListViewWriter.setPosition(0); - largeListViewWriter.startListView(); - - largeListViewWriter.bigInt().writeBigInt(12); - largeListViewWriter.bigInt().writeBigInt(-7); - largeListViewWriter.bigInt().writeBigInt(25); - largeListViewWriter.endListView(); - - /* the second list at index 1 is null (we are not setting any)*/ - - /* write the third list at index 2 */ - largeListViewWriter.setPosition(2); - largeListViewWriter.startListView(); - - largeListViewWriter.bigInt().writeBigInt(0); - largeListViewWriter.bigInt().writeBigInt(-127); - largeListViewWriter.bigInt().writeBigInt(127); - largeListViewWriter.bigInt().writeBigInt(50); - largeListViewWriter.endListView(); - - /* write the fourth list at index 3 (empty list) */ - largeListViewWriter.setPosition(3); - largeListViewWriter.startListView(); - largeListViewWriter.endListView(); - - /* write the fifth list at index 4 */ - largeListViewWriter.setPosition(4); - largeListViewWriter.startListView(); - largeListViewWriter.bigInt().writeBigInt(1); - largeListViewWriter.bigInt().writeBigInt(2); - largeListViewWriter.bigInt().writeBigInt(3); - largeListViewWriter.bigInt().writeBigInt(4); - largeListViewWriter.endListView(); - - largeListViewWriter.setValueCount(5); - // check value count - assertEquals(5, largeListViewVector.getValueCount()); - - /* get vector at index 0 -- the value is a BigIntVector*/ - final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer(); - final FieldVector dataVec = largeListViewVector.getDataVector(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // check data vector - assertEquals(12, ((BigIntVector) dataVec).get(0)); - assertEquals(-7, ((BigIntVector) dataVec).get(1)); - assertEquals(25, ((BigIntVector) dataVec).get(2)); - assertEquals(0, ((BigIntVector) dataVec).get(3)); - assertEquals(-127, ((BigIntVector) dataVec).get(4)); - assertEquals(127, ((BigIntVector) dataVec).get(5)); - assertEquals(50, ((BigIntVector) dataVec).get(6)); - assertEquals(1, ((BigIntVector) dataVec).get(7)); - assertEquals(2, ((BigIntVector) dataVec).get(8)); - assertEquals(3, ((BigIntVector) dataVec).get(9)); - assertEquals(4, ((BigIntVector) dataVec).get(10)); - - largeListViewVector.validate(); - } - } - - @Test - public void testImplicitNullVectors() { - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter(); - /* allocate memory */ - largeListViewWriter.allocate(); - - final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer(); - - /* write the first list at index 0 */ - largeListViewWriter.setPosition(0); - largeListViewWriter.startListView(); - - largeListViewWriter.bigInt().writeBigInt(12); - largeListViewWriter.bigInt().writeBigInt(-7); - largeListViewWriter.bigInt().writeBigInt(25); - largeListViewWriter.endListView(); - - int offSet0 = offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH); - int size0 = sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH); - - // after the first list is written, - // the initial offset must be 0, - // the size must be 3 (as there are 3 elements in the array), - // the lastSet must be 0 since, the first list is written at index 0. - - assertEquals(0, offSet0); - assertEquals(3, size0); - - largeListViewWriter.setPosition(5); - largeListViewWriter.startListView(); - - // writing the 6th list at index 5, - // and the list items from index 1 through 4 are not populated. - // but since there is a gap between the 0th and 5th list, in terms - // of buffer allocation, the offset and size buffers must be updated - // to reflect the implicit null vectors. - - for (int i = 1; i < 5; i++) { - int offSet = offSetBuffer.getInt(i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH); - int size = sizeBuffer.getInt(i * BaseLargeRepeatedValueViewVector.SIZE_WIDTH); - // Since the list is not written, the offset and size must equal to child vector's size - // i.e., 3, and size should be 0 as the list is not written. - // And the last set value is the value currently being written, which is 5. - assertEquals(0, offSet); - assertEquals(0, size); - } - - largeListViewWriter.bigInt().writeBigInt(12); - largeListViewWriter.bigInt().writeBigInt(25); - largeListViewWriter.endListView(); - - int offSet5 = offSetBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH); - int size5 = sizeBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH); - - assertEquals(3, offSet5); - assertEquals(2, size5); - - largeListViewWriter.setPosition(10); - largeListViewWriter.startListView(); - - // writing the 11th list at index 10, - // and the list items from index 6 through 10 are not populated. - // but since there is a gap between the 5th and 11th list, in terms - // of buffer allocation, the offset and size buffers must be updated - // to reflect the implicit null vectors. - for (int i = 6; i < 10; i++) { - int offSet = offSetBuffer.getInt(i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH); - int size = sizeBuffer.getInt(i * BaseLargeRepeatedValueViewVector.SIZE_WIDTH); - // Since the list is not written, the offset and size must equal to 0 - // and size should be 0 as the list is not written. - // And the last set value is the value currently being written, which is 10. - assertEquals(0, offSet); - assertEquals(0, size); - } - - largeListViewWriter.bigInt().writeBigInt(12); - largeListViewWriter.endListView(); - - int offSet11 = offSetBuffer.getInt(10 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH); - int size11 = sizeBuffer.getInt(10 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH); - - assertEquals(5, offSet11); - assertEquals(1, size11); - - largeListViewVector.setValueCount(11); - - largeListViewVector.validate(); - } - } - - @Test - public void testNestedLargeListViewVector() throws Exception { - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - - UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter(); - - /* allocate memory */ - largeListViewWriter.allocate(); - - /* the dataVector that backs a largeListViewVector will also be a - * largeListViewVector for this test. - */ - - /* write one or more inner lists at index 0 */ - largeListViewWriter.setPosition(0); - largeListViewWriter.startListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(50); - largeListViewWriter.listView().bigInt().writeBigInt(100); - largeListViewWriter.listView().bigInt().writeBigInt(200); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(75); - largeListViewWriter.listView().bigInt().writeBigInt(125); - largeListViewWriter.listView().bigInt().writeBigInt(150); - largeListViewWriter.listView().bigInt().writeBigInt(175); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.endListView(); - - /* write one or more inner lists at index 1 */ - largeListViewWriter.setPosition(1); - largeListViewWriter.startListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(10); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(15); - largeListViewWriter.listView().bigInt().writeBigInt(20); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(25); - largeListViewWriter.listView().bigInt().writeBigInt(30); - largeListViewWriter.listView().bigInt().writeBigInt(35); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.endListView(); - - largeListViewVector.setValueCount(2); - - assertEquals(2, largeListViewVector.getValueCount()); - - /* get largeListViewVector value at index 0 -- the value itself is a largeListViewVector */ - Object result = largeListViewVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(4, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - assertEquals(Long.valueOf(150), list.get(2)); - assertEquals(Long.valueOf(175), list.get(3)); - - /* get largeListViewVector value at index 1 -- the value itself is a largeListViewVector */ - result = largeListViewVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(1, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ - assertEquals(3, resultSet.get(2).size()); /* size of third inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(10), list.get(0)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(2); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(largeListViewVector.isNull(0)); - assertFalse(largeListViewVector.isNull(1)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = largeListViewVector.getOffsetBuffer(); - - /* largeListViewVector has 2 lists at index 0 and 3 lists at index 1 */ - assertEquals(0, offsetBuffer.getLong(0 * LargeListViewVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getLong(1 * LargeListViewVector.OFFSET_WIDTH)); - } - } - - @Test - public void testNestedLargeListViewVector1() { - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - - MinorType listViewType = MinorType.LARGELISTVIEW; - MinorType scalarType = MinorType.BIGINT; - - largeListViewVector.addOrGetVector(FieldType.nullable(listViewType.getType())); - - LargeListViewVector innerList1 = (LargeListViewVector) largeListViewVector.getDataVector(); - innerList1.addOrGetVector(FieldType.nullable(listViewType.getType())); - - LargeListViewVector innerList2 = (LargeListViewVector) innerList1.getDataVector(); - innerList2.addOrGetVector(FieldType.nullable(listViewType.getType())); - - LargeListViewVector innerList3 = (LargeListViewVector) innerList2.getDataVector(); - innerList3.addOrGetVector(FieldType.nullable(listViewType.getType())); - - LargeListViewVector innerList4 = (LargeListViewVector) innerList3.getDataVector(); - innerList4.addOrGetVector(FieldType.nullable(listViewType.getType())); - - LargeListViewVector innerList5 = (LargeListViewVector) innerList4.getDataVector(); - innerList5.addOrGetVector(FieldType.nullable(listViewType.getType())); - - LargeListViewVector innerList6 = (LargeListViewVector) innerList5.getDataVector(); - innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); - - largeListViewVector.setInitialCapacity(128); - } - } - - @Test - public void testNestedLargeListViewVector2() throws Exception { - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - largeListViewVector.setInitialCapacity(1); - UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter(); - /* allocate memory */ - largeListViewWriter.allocate(); - - /* write one or more inner lists at index 0 */ - largeListViewWriter.setPosition(0); - largeListViewWriter.startListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(50); - largeListViewWriter.listView().bigInt().writeBigInt(100); - largeListViewWriter.listView().bigInt().writeBigInt(200); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(75); - largeListViewWriter.listView().bigInt().writeBigInt(125); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.endListView(); - - /* write one or more inner lists at index 1 */ - largeListViewWriter.setPosition(1); - largeListViewWriter.startListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(15); - largeListViewWriter.listView().bigInt().writeBigInt(20); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.listView().startListView(); - largeListViewWriter.listView().bigInt().writeBigInt(25); - largeListViewWriter.listView().bigInt().writeBigInt(30); - largeListViewWriter.listView().bigInt().writeBigInt(35); - largeListViewWriter.listView().endListView(); - - largeListViewWriter.endListView(); - - largeListViewVector.setValueCount(2); - - assertEquals(2, largeListViewVector.getValueCount()); - - /* get largeListViewVector value at index 0 -- the value itself is a largeListViewVector */ - Object result = largeListViewVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - - /* get largeListViewVector value at index 1 -- the value itself is a largeListViewVector */ - result = largeListViewVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(2, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(3, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(largeListViewVector.isNull(0)); - assertFalse(largeListViewVector.isNull(1)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = largeListViewVector.getOffsetBuffer(); - - /* largeListViewVector has 2 lists at index 0 and 3 lists at index 1 */ - assertEquals(0, offsetBuffer.getLong(0 * LargeListViewVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getLong(1 * LargeListViewVector.OFFSET_WIDTH)); - } - } - - @Test - public void testGetBufferAddress() throws Exception { - try (LargeListViewVector largeListViewVector = LargeListViewVector.empty("vector", allocator)) { - - UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter(); - boolean error = false; - - largeListViewWriter.allocate(); - - largeListViewWriter.setPosition(0); - largeListViewWriter.startListView(); - largeListViewWriter.bigInt().writeBigInt(50); - largeListViewWriter.bigInt().writeBigInt(100); - largeListViewWriter.bigInt().writeBigInt(200); - largeListViewWriter.endListView(); - - largeListViewWriter.setPosition(1); - largeListViewWriter.startListView(); - largeListViewWriter.bigInt().writeBigInt(250); - largeListViewWriter.bigInt().writeBigInt(300); - largeListViewWriter.endListView(); - - largeListViewVector.setValueCount(2); - - /* check largeListViewVector contents */ - Object result = largeListViewVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(50), resultSet.get(0)); - assertEquals(Long.valueOf(100), resultSet.get(1)); - assertEquals(Long.valueOf(200), resultSet.get(2)); - - result = largeListViewVector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(250), resultSet.get(0)); - assertEquals(Long.valueOf(300), resultSet.get(1)); - - List buffers = largeListViewVector.getFieldBuffers(); - - long bitAddress = largeListViewVector.getValidityBufferAddress(); - long offsetAddress = largeListViewVector.getOffsetBufferAddress(); - long sizeAddress = largeListViewVector.getSizeBufferAddress(); - - try { - largeListViewVector.getDataBufferAddress(); - } catch (UnsupportedOperationException ue) { - error = true; - } finally { - assertTrue(error); - } - - assertEquals(3, buffers.size()); - assertEquals(bitAddress, buffers.get(0).memoryAddress()); - assertEquals(offsetAddress, buffers.get(1).memoryAddress()); - assertEquals(sizeAddress, buffers.get(2).memoryAddress()); - - /* (3+2)/2 */ - assertEquals(2.5, largeListViewVector.getDensity(), 0); - } - } - - /* - * Setting up the buffers directly needs to be validated with the base method used in - * the LargeListViewVector class where we use the approach of startListView(), - * write to the child vector and endListView(). - *

    - * To support this, we have to consider the following scenarios; - *

    - * 1. Only using directly buffer-based inserts. - * 2. Default list insertion followed by buffer-based inserts. - * 3. Buffer-based inserts followed by default list insertion. - */ - - /* Setting up buffers directly would require the following steps to be taken - * 0. Allocate buffers in largeListViewVector by calling `allocateNew` method. - * 1. Initialize the child vector using `initializeChildrenFromFields` method. - * 2. Set values in the child vector. - * 3. Set validity, offset and size buffers using `setValidity`, - * `setOffset` and `setSize` methods. - * 4. Set value count using `setValueCount` method. - */ - @Test - public void testBasicLargeListViewSet() { - - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - // Allocate buffers in largeListViewVector by calling `allocateNew` method. - largeListViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); - Field field = new Field("child-vector", fieldType, null); - largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = largeListViewVector.getDataVector(); - fieldVector.clear(); - - BigIntVector childVector = (BigIntVector) fieldVector; - childVector.allocateNew(7); - - childVector.set(0, 12); - childVector.set(1, -7); - childVector.set(2, 25); - childVector.set(3, 0); - childVector.set(4, -127); - childVector.set(5, 127); - childVector.set(6, 50); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - largeListViewVector.setOffset(0, 0); - largeListViewVector.setOffset(1, 3); - largeListViewVector.setOffset(2, 3); - largeListViewVector.setOffset(3, 7); - - largeListViewVector.setSize(0, 3); - largeListViewVector.setSize(1, 0); - largeListViewVector.setSize(2, 4); - largeListViewVector.setSize(3, 0); - - largeListViewVector.setValidity(0, 1); - largeListViewVector.setValidity(1, 0); - largeListViewVector.setValidity(2, 1); - largeListViewVector.setValidity(3, 1); - - // Set value count using `setValueCount` method. - largeListViewVector.setValueCount(4); - - final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) largeListViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) largeListViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) largeListViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) largeListViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) largeListViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) largeListViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) largeListViewVector.getDataVector()).get(6)); - - largeListViewVector.validate(); - } - } - - @Test - public void testBasicLargeListViewSetNested() { - // Expected largeListViewVector - // [[[50,100,200],[75,125,150,175]],[[10],[15,20],[25,30,35]]] - - // Setting child vector - // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]] - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - // Allocate buffers in largeListViewVector by calling `allocateNew` method. - largeListViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - FieldType fieldType = new FieldType(true, new ArrowType.LargeListView(), null, null); - FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); - Field childField = new Field("child-vector", childFieldType, null); - List children = new ArrayList<>(); - children.add(childField); - Field field = new Field("child-vector", fieldType, children); - largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = largeListViewVector.getDataVector(); - fieldVector.clear(); - - LargeListViewVector childVector = (LargeListViewVector) fieldVector; - UnionLargeListViewWriter largeListViewWriter = childVector.getWriter(); - largeListViewWriter.allocate(); - - largeListViewWriter.setPosition(0); - largeListViewWriter.startListView(); - - largeListViewWriter.bigInt().writeBigInt(50); - largeListViewWriter.bigInt().writeBigInt(100); - largeListViewWriter.bigInt().writeBigInt(200); - - largeListViewWriter.endListView(); - - largeListViewWriter.setPosition(1); - largeListViewWriter.startListView(); - - largeListViewWriter.bigInt().writeBigInt(75); - largeListViewWriter.bigInt().writeBigInt(125); - largeListViewWriter.bigInt().writeBigInt(150); - largeListViewWriter.bigInt().writeBigInt(175); - - largeListViewWriter.endListView(); - - largeListViewWriter.setPosition(2); - largeListViewWriter.startListView(); - - largeListViewWriter.bigInt().writeBigInt(10); - - largeListViewWriter.endListView(); - - largeListViewWriter.startListView(); - largeListViewWriter.setPosition(3); - - largeListViewWriter.bigInt().writeBigInt(15); - largeListViewWriter.bigInt().writeBigInt(20); - - largeListViewWriter.endListView(); - - largeListViewWriter.startListView(); - largeListViewWriter.setPosition(4); - - largeListViewWriter.bigInt().writeBigInt(25); - largeListViewWriter.bigInt().writeBigInt(30); - largeListViewWriter.bigInt().writeBigInt(35); - - largeListViewWriter.endListView(); - - childVector.setValueCount(5); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - - largeListViewVector.setValidity(0, 1); - largeListViewVector.setValidity(1, 1); - - largeListViewVector.setOffset(0, 0); - largeListViewVector.setOffset(1, 2); - - largeListViewVector.setSize(0, 2); - largeListViewVector.setSize(1, 3); - - // Set value count using `setValueCount` method. - largeListViewVector.setValueCount(2); - - assertEquals(2, largeListViewVector.getValueCount()); - - /* get largeListViewVector value at index 0 -- the value itself is a largeListViewVector */ - Object result = largeListViewVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ - assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - assertEquals(Long.valueOf(150), list.get(2)); - assertEquals(Long.valueOf(175), list.get(3)); - - /* get largeListViewVector value at index 1 -- the value itself is a largeListViewVector */ - result = largeListViewVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ - assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(10), list.get(0)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(2); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(largeListViewVector.isNull(0)); - assertFalse(largeListViewVector.isNull(1)); - - final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(2, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - largeListViewVector.validate(); - } - } - - @Test - public void testBasicLargeListViewSetWithListViewWriter() { - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - // Allocate buffers in largeListViewVector by calling `allocateNew` method. - largeListViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); - Field field = new Field("child-vector", fieldType, null); - largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = largeListViewVector.getDataVector(); - fieldVector.clear(); - - BigIntVector childVector = (BigIntVector) fieldVector; - childVector.allocateNew(7); - - childVector.set(0, 12); - childVector.set(1, -7); - childVector.set(2, 25); - childVector.set(3, 0); - childVector.set(4, -127); - childVector.set(5, 127); - childVector.set(6, 50); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - - largeListViewVector.setValidity(0, 1); - largeListViewVector.setValidity(1, 0); - largeListViewVector.setValidity(2, 1); - largeListViewVector.setValidity(3, 1); - - largeListViewVector.setOffset(0, 0); - largeListViewVector.setOffset(1, 3); - largeListViewVector.setOffset(2, 3); - largeListViewVector.setOffset(3, 7); - - largeListViewVector.setSize(0, 3); - largeListViewVector.setSize(1, 0); - largeListViewVector.setSize(2, 4); - largeListViewVector.setSize(3, 0); - - // Set value count using `setValueCount` method. - largeListViewVector.setValueCount(4); - - final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) largeListViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) largeListViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) largeListViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) largeListViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) largeListViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) largeListViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) largeListViewVector.getDataVector()).get(6)); - - UnionLargeListViewWriter listViewWriter = largeListViewVector.getWriter(); - - listViewWriter.setPosition(4); - listViewWriter.startListView(); - - listViewWriter.bigInt().writeBigInt(121); - listViewWriter.bigInt().writeBigInt(-71); - listViewWriter.bigInt().writeBigInt(251); - listViewWriter.endListView(); - - largeListViewVector.setValueCount(5); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) largeListViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) largeListViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) largeListViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) largeListViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) largeListViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) largeListViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) largeListViewVector.getDataVector()).get(6)); - assertEquals(121, ((BigIntVector) largeListViewVector.getDataVector()).get(7)); - assertEquals(-71, ((BigIntVector) largeListViewVector.getDataVector()).get(8)); - assertEquals(251, ((BigIntVector) largeListViewVector.getDataVector()).get(9)); - - largeListViewVector.validate(); - } - } - - @Test - public void testConsistentChildName() throws Exception { - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("sourceVector", allocator)) { - String emptyListStr = largeListViewVector.getField().toString(); - assertTrue(emptyListStr.contains(LargeListViewVector.DATA_VECTOR_NAME)); - - largeListViewVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - String emptyVectorStr = largeListViewVector.getField().toString(); - assertTrue(emptyVectorStr.contains(LargeListViewVector.DATA_VECTOR_NAME)); - } - } - - @Test - public void testSetInitialCapacity() { - try (final LargeListViewVector vector = LargeListViewVector.empty("", allocator)) { - vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - - vector.setInitialCapacity(512); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 512); - - vector.setInitialCapacity(512, 4); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); - - vector.setInitialCapacity(512, 0.1); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 51); - - vector.setInitialCapacity(512, 0.01); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 5); - - vector.setInitialCapacity(5, 0.1); - vector.allocateNew(); - assertEquals(8, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 1); - - vector.validate(); - } - } - - @Test - public void testClearAndReuse() { - try (final LargeListViewVector vector = LargeListViewVector.empty("list", allocator)) { - BigIntVector bigIntVector = - (BigIntVector) - vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); - vector.setInitialCapacity(10); - vector.allocateNew(); - - vector.startNewValue(0); - bigIntVector.setSafe(0, 7); - vector.endValue(0, 1); - vector.startNewValue(1); - bigIntVector.setSafe(1, 8); - vector.endValue(1, 1); - vector.setValueCount(2); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(Long.valueOf(7), resultSet.get(0)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(8), resultSet.get(0)); - - // Clear and release the buffers to trigger a realloc when adding next value - vector.clear(); - - // The list vector should reuse a buffer when reallocating the offset buffer - vector.startNewValue(0); - bigIntVector.setSafe(0, 7); - vector.endValue(0, 1); - vector.startNewValue(1); - bigIntVector.setSafe(1, 8); - vector.endValue(1, 1); - vector.setValueCount(2); - - result = vector.getObject(0); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(7), resultSet.get(0)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(8), resultSet.get(0)); - } - } - - @Test - public void testWriterGetField() { - try (final LargeListViewVector vector = LargeListViewVector.empty("list", allocator)) { - - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writer.startListView(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endListView(); - vector.setValueCount(2); - - Field expectedDataField = - new Field( - BaseLargeRepeatedValueViewVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Int(32, true)), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.LargeListView.INSTANCE), - Collections.singletonList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - } - } - - @Test - public void testClose() throws Exception { - try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writer.startListView(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endListView(); - vector.setValueCount(2); - - assertTrue(vector.getBufferSize() > 0); - assertTrue(vector.getDataVector().getBufferSize() > 0); - - writer.close(); - assertEquals(0, vector.getBufferSize()); - assertEquals(0, vector.getDataVector().getBufferSize()); - - vector.validate(); - } - } - - @Test - public void testGetBufferSizeFor() { - try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writeIntValues(writer, new int[] {1, 2}); - writeIntValues(writer, new int[] {3, 4}); - writeIntValues(writer, new int[] {5, 6}); - writeIntValues(writer, new int[] {7, 8, 9, 10}); - writeIntValues(writer, new int[] {11, 12, 13, 14}); - writer.setValueCount(5); - - IntVector dataVector = (IntVector) vector.getDataVector(); - int[] indices = new int[] {0, 2, 4, 6, 10, 14}; - - for (int valueCount = 1; valueCount <= 5; valueCount++) { - int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); - int offsetBufferSize = valueCount * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH; - int sizeBufferSize = valueCount * BaseLargeRepeatedValueViewVector.SIZE_WIDTH; - - int expectedSize = - validityBufferSize - + offsetBufferSize - + sizeBufferSize - + dataVector.getBufferSizeFor(indices[valueCount]); - assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); - } - vector.validate(); - } - } - - @Test - public void testIsEmpty() { - try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - // set values [1,2], null, [], [5,6] - writeIntValues(writer, new int[] {1, 2}); - writer.setPosition(2); - writeIntValues(writer, new int[] {}); - writeIntValues(writer, new int[] {5, 6}); - writer.setValueCount(4); - - assertFalse(vector.isEmpty(0)); - assertTrue(vector.isNull(1)); - assertTrue(vector.isEmpty(1)); - assertFalse(vector.isNull(2)); - assertTrue(vector.isEmpty(2)); - assertFalse(vector.isEmpty(3)); - - vector.validate(); - } - } - - @Test - public void testTotalCapacity() { - final FieldType type = FieldType.nullable(MinorType.INT.getType()); - try (final LargeListViewVector vector = - new LargeListViewVector("largelistview", allocator, type, null)) { - // Force the child vector to be allocated based on the type - // (this is a bad API: we have to track and repeat the type twice) - vector.addOrGetVector(type); - - // Specify the allocation size but do not actually allocate - vector.setInitialTotalCapacity(10, 100); - - // Finally, actually do the allocation - vector.allocateNewSafe(); - - // Note: allocator rounds up and can be greater than the requested allocation. - assertTrue(vector.getValueCapacity() >= 10); - assertTrue(vector.getDataVector().getValueCapacity() >= 100); - } - } - - @Test - public void testSetNull1() { - try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.endListView(); - - vector.setNull(1); - - writer.setPosition(2); - writer.startListView(); - writer.bigInt().writeBigInt(30); - writer.bigInt().writeBigInt(40); - writer.endListView(); - - vector.setNull(3); - vector.setNull(4); - - writer.setPosition(5); - writer.startListView(); - writer.bigInt().writeBigInt(50); - writer.bigInt().writeBigInt(60); - writer.endListView(); - - vector.setValueCount(6); - - assertFalse(vector.isNull(0)); - assertTrue(vector.isNull(1)); - assertFalse(vector.isNull(2)); - assertTrue(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertFalse(vector.isNull(5)); - - // validate buffers - - final ArrowBuf validityBuffer = vector.getValidityBuffer(); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = vector.getSizeBuffer(); - - assertEquals(1, BitVectorHelper.get(validityBuffer, 0)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 1)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 2)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 3)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); - - assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(4, offsetBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - assertEquals(2, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // validate values - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(10), resultSet.get(0)); - assertEquals(Long.valueOf(20), resultSet.get(1)); - - result = vector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(30), resultSet.get(0)); - assertEquals(Long.valueOf(40), resultSet.get(1)); - - result = vector.getObject(5); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(50), resultSet.get(0)); - assertEquals(Long.valueOf(60), resultSet.get(1)); - - vector.validate(); - } - } - - @Test - public void testSetNull2() { - try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - // validate setting nulls first and then writing values - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - vector.setNull(0); - vector.setNull(2); - vector.setNull(4); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.bigInt().writeBigInt(30); - writer.endListView(); - - writer.setPosition(3); - writer.startListView(); - writer.bigInt().writeBigInt(40); - writer.bigInt().writeBigInt(50); - writer.endListView(); - - writer.setPosition(5); - writer.startListView(); - writer.bigInt().writeBigInt(60); - writer.bigInt().writeBigInt(70); - writer.bigInt().writeBigInt(80); - writer.endListView(); - - vector.setValueCount(6); - - assertTrue(vector.isNull(0)); - assertFalse(vector.isNull(1)); - assertTrue(vector.isNull(2)); - assertFalse(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertFalse(vector.isNull(5)); - - // validate buffers - - final ArrowBuf validityBuffer = vector.getValidityBuffer(); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = vector.getSizeBuffer(); - - assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); - - assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offsetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - assertEquals(0, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // validate values - - Object result = vector.getObject(1); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(10), resultSet.get(0)); - assertEquals(Long.valueOf(20), resultSet.get(1)); - assertEquals(Long.valueOf(30), resultSet.get(2)); - - result = vector.getObject(3); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(40), resultSet.get(0)); - assertEquals(Long.valueOf(50), resultSet.get(1)); - - result = vector.getObject(5); - resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(60), resultSet.get(0)); - assertEquals(Long.valueOf(70), resultSet.get(1)); - assertEquals(Long.valueOf(80), resultSet.get(2)); - - vector.validate(); - } - } - - @Test - public void testSetNull3() { - try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - // validate setting values first and then writing nulls - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.bigInt().writeBigInt(30); - writer.endListView(); - - writer.setPosition(3); - writer.startListView(); - writer.bigInt().writeBigInt(40); - writer.bigInt().writeBigInt(50); - writer.endListView(); - - writer.setPosition(5); - writer.startListView(); - writer.bigInt().writeBigInt(60); - writer.bigInt().writeBigInt(70); - writer.bigInt().writeBigInt(80); - writer.endListView(); - - vector.setNull(0); - vector.setNull(2); - vector.setNull(4); - - vector.setValueCount(6); - - assertTrue(vector.isNull(0)); - assertFalse(vector.isNull(1)); - assertTrue(vector.isNull(2)); - assertFalse(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertFalse(vector.isNull(5)); - - // validate buffers - - final ArrowBuf validityBuffer = vector.getValidityBuffer(); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = vector.getSizeBuffer(); - - assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); - - assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offsetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - assertEquals(0, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // validate values - - Object result = vector.getObject(1); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(10), resultSet.get(0)); - assertEquals(Long.valueOf(20), resultSet.get(1)); - assertEquals(Long.valueOf(30), resultSet.get(2)); - - result = vector.getObject(3); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(40), resultSet.get(0)); - assertEquals(Long.valueOf(50), resultSet.get(1)); - - result = vector.getObject(5); - resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(60), resultSet.get(0)); - assertEquals(Long.valueOf(70), resultSet.get(1)); - assertEquals(Long.valueOf(80), resultSet.get(2)); - - vector.validate(); - } - } - - @Test - public void testOverWrite1() { - try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.bigInt().writeBigInt(30); - writer.endListView(); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(40); - writer.bigInt().writeBigInt(50); - writer.endListView(); - - vector.setValueCount(2); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(60); - writer.bigInt().writeBigInt(70); - writer.endListView(); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(80); - writer.bigInt().writeBigInt(90); - writer.endListView(); - - vector.setValueCount(2); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(60), resultSet.get(0)); - assertEquals(Long.valueOf(70), resultSet.get(1)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(80), resultSet.get(0)); - assertEquals(Long.valueOf(90), resultSet.get(1)); - - vector.validate(); - } - } - - @Test - public void testOverwriteWithNull() { - try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - ArrowBuf sizeBuffer = vector.getSizeBuffer(); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.bigInt().writeBigInt(30); - writer.endListView(); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(40); - writer.bigInt().writeBigInt(50); - writer.endListView(); - - vector.setValueCount(2); - - assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - vector.setNull(0); - - assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - vector.setNull(1); - - assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - assertTrue(vector.isNull(0)); - assertTrue(vector.isNull(1)); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(60); - writer.bigInt().writeBigInt(70); - writer.endListView(); - - assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(80); - writer.bigInt().writeBigInt(90); - writer.endListView(); - - assertEquals(2, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - vector.setValueCount(2); - - assertFalse(vector.isNull(0)); - assertFalse(vector.isNull(1)); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(60), resultSet.get(0)); - assertEquals(Long.valueOf(70), resultSet.get(1)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(80), resultSet.get(0)); - assertEquals(Long.valueOf(90), resultSet.get(1)); - - vector.validate(); - } - } - - @Test - public void testOutOfOrderOffset1() { - // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("largelistview", allocator)) { - // Allocate buffers in largeListViewVector by calling `allocateNew` method. - largeListViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), null, null); - Field field = new Field("child-vector", fieldType, null); - largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = largeListViewVector.getDataVector(); - fieldVector.clear(); - - SmallIntVector childVector = (SmallIntVector) fieldVector; - - childVector.allocateNew(7); - - childVector.set(0, 0); - childVector.set(1, -127); - childVector.set(2, 127); - childVector.set(3, 50); - childVector.set(4, 12); - childVector.set(5, -7); - childVector.set(6, 25); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - largeListViewVector.setValidity(0, 1); - largeListViewVector.setValidity(1, 0); - largeListViewVector.setValidity(2, 1); - largeListViewVector.setValidity(3, 1); - largeListViewVector.setValidity(4, 1); - - largeListViewVector.setOffset(0, 4); - largeListViewVector.setOffset(1, 7); - largeListViewVector.setOffset(2, 0); - largeListViewVector.setOffset(3, 0); - largeListViewVector.setOffset(4, 3); - - largeListViewVector.setSize(0, 3); - largeListViewVector.setSize(1, 0); - largeListViewVector.setSize(2, 4); - largeListViewVector.setSize(3, 0); - largeListViewVector.setSize(4, 2); - - // Set value count using `setValueCount` method. - largeListViewVector.setValueCount(5); - - final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(4, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // check child vector - assertEquals(0, ((SmallIntVector) largeListViewVector.getDataVector()).get(0)); - assertEquals(-127, ((SmallIntVector) largeListViewVector.getDataVector()).get(1)); - assertEquals(127, ((SmallIntVector) largeListViewVector.getDataVector()).get(2)); - assertEquals(50, ((SmallIntVector) largeListViewVector.getDataVector()).get(3)); - assertEquals(12, ((SmallIntVector) largeListViewVector.getDataVector()).get(4)); - assertEquals(-7, ((SmallIntVector) largeListViewVector.getDataVector()).get(5)); - assertEquals(25, ((SmallIntVector) largeListViewVector.getDataVector()).get(6)); - - // check values - Object result = largeListViewVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Short.valueOf("12"), resultSet.get(0)); - assertEquals(Short.valueOf("-7"), resultSet.get(1)); - assertEquals(Short.valueOf("25"), resultSet.get(2)); - - assertTrue(largeListViewVector.isNull(1)); - - result = largeListViewVector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(4, resultSet.size()); - assertEquals(Short.valueOf("0"), resultSet.get(0)); - assertEquals(Short.valueOf("-127"), resultSet.get(1)); - assertEquals(Short.valueOf("127"), resultSet.get(2)); - assertEquals(Short.valueOf("50"), resultSet.get(3)); - - assertTrue(largeListViewVector.isEmpty(3)); - - result = largeListViewVector.getObject(4); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Short.valueOf("50"), resultSet.get(0)); - assertEquals(Short.valueOf("12"), resultSet.get(1)); - - largeListViewVector.validate(); - } - } - - private int validateSizeBufferAndCalculateMinOffset( - int start, - int splitLength, - ArrowBuf fromOffsetBuffer, - ArrowBuf fromSizeBuffer, - ArrowBuf toSizeBuffer) { - int minOffset = fromOffsetBuffer.getInt((long) start * LargeListViewVector.OFFSET_WIDTH); - int fromDataLength; - int toDataLength; - - for (int i = 0; i < splitLength; i++) { - fromDataLength = fromSizeBuffer.getInt((long) (start + i) * LargeListViewVector.SIZE_WIDTH); - toDataLength = toSizeBuffer.getInt((long) (i) * LargeListViewVector.SIZE_WIDTH); - - /* validate size */ - assertEquals( - fromDataLength, - toDataLength, - "Different data lengths at index: " + i + " and start: " + start); - - /* calculate minimum offset */ - int currentOffset = - fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH); - if (currentOffset < minOffset) { - minOffset = currentOffset; - } - } - - return minOffset; - } - - private void validateOffsetBuffer( - int start, - int splitLength, - ArrowBuf fromOffsetBuffer, - ArrowBuf toOffsetBuffer, - int minOffset) { - int offset1; - int offset2; - - for (int i = 0; i < splitLength; i++) { - offset1 = fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH); - offset2 = toOffsetBuffer.getInt((long) (i) * LargeListViewVector.OFFSET_WIDTH); - assertEquals( - offset1 - minOffset, - offset2, - "Different offset values at index: " + i + " and start: " + start); - } - } - - private void validateDataBuffer( - int start, - int splitLength, - ArrowBuf fromOffsetBuffer, - ArrowBuf fromSizeBuffer, - BigIntVector fromDataVector, - ArrowBuf toOffsetBuffer, - BigIntVector toDataVector) { - int dataLength; - Long fromValue; - for (int i = 0; i < splitLength; i++) { - dataLength = fromSizeBuffer.getInt((long) (start + i) * LargeListViewVector.SIZE_WIDTH); - for (int j = 0; j < dataLength; j++) { - fromValue = - fromDataVector.getObject( - (fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH) - + j)); - Long toValue = - toDataVector.getObject( - (toOffsetBuffer.getInt((long) i * LargeListViewVector.OFFSET_WIDTH) + j)); - assertEquals( - fromValue, toValue, "Different data values at index: " + i + " and start: " + start); - } - } - } - - /** - * Validate split and transfer of data from fromVector to toVector. Note that this method assumes - * that the child vector is BigIntVector. - * - * @param start start index - * @param splitLength length of data to split and transfer - * @param fromVector fromVector - * @param toVector toVector - */ - private void validateSplitAndTransfer( - TransferPair transferPair, - int start, - int splitLength, - LargeListViewVector fromVector, - LargeListViewVector toVector) { - - transferPair.splitAndTransfer(start, splitLength); - - /* get offsetBuffer of toVector */ - final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); - - /* get sizeBuffer of toVector */ - final ArrowBuf toSizeBuffer = toVector.getSizeBuffer(); - - /* get dataVector of toVector */ - BigIntVector toDataVector = (BigIntVector) toVector.getDataVector(); - - /* get offsetBuffer of toVector */ - final ArrowBuf fromOffsetBuffer = fromVector.getOffsetBuffer(); - - /* get sizeBuffer of toVector */ - final ArrowBuf fromSizeBuffer = fromVector.getSizeBuffer(); - - /* get dataVector of toVector */ - BigIntVector fromDataVector = (BigIntVector) fromVector.getDataVector(); - - /* validate size buffers */ - int minOffset = - validateSizeBufferAndCalculateMinOffset( - start, splitLength, fromOffsetBuffer, fromSizeBuffer, toSizeBuffer); - /* validate offset buffers */ - validateOffsetBuffer(start, splitLength, fromOffsetBuffer, toOffsetBuffer, minOffset); - /* validate data */ - validateDataBuffer( - start, - splitLength, - fromOffsetBuffer, - fromSizeBuffer, - fromDataVector, - toOffsetBuffer, - toDataVector); - } - - @Test - public void testSplitAndTransfer() throws Exception { - try (LargeListViewVector fromVector = LargeListViewVector.empty("sourceVector", allocator)) { - - /* Explicitly add the dataVector */ - MinorType type = MinorType.BIGINT; - fromVector.addOrGetVector(FieldType.nullable(type.getType())); - - UnionLargeListViewWriter listViewWriter = fromVector.getWriter(); - - /* allocate memory */ - listViewWriter.allocate(); - - /* populate data */ - listViewWriter.setPosition(0); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(10); - listViewWriter.bigInt().writeBigInt(11); - listViewWriter.bigInt().writeBigInt(12); - listViewWriter.endListView(); - - listViewWriter.setPosition(1); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(13); - listViewWriter.bigInt().writeBigInt(14); - listViewWriter.endListView(); - - listViewWriter.setPosition(2); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(15); - listViewWriter.bigInt().writeBigInt(16); - listViewWriter.bigInt().writeBigInt(17); - listViewWriter.bigInt().writeBigInt(18); - listViewWriter.endListView(); - - listViewWriter.setPosition(3); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(19); - listViewWriter.endListView(); - - listViewWriter.setPosition(4); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(20); - listViewWriter.bigInt().writeBigInt(21); - listViewWriter.bigInt().writeBigInt(22); - listViewWriter.bigInt().writeBigInt(23); - listViewWriter.endListView(); - - fromVector.setValueCount(5); - - /* get offset buffer */ - final ArrowBuf offsetBuffer = fromVector.getOffsetBuffer(); - - /* get size buffer */ - final ArrowBuf sizeBuffer = fromVector.getSizeBuffer(); - - /* get dataVector */ - BigIntVector dataVector = (BigIntVector) fromVector.getDataVector(); - - /* check the vector output */ - - int index = 0; - int offset; - int size = 0; - Long actual; - - /* index 0 */ - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(0), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(10), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(11), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(12), actual); - assertEquals( - Integer.toString(3), - Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); - - /* index 1 */ - index++; - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(3), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(13), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(14), actual); - size++; - assertEquals( - Integer.toString(size), - Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); - - /* index 2 */ - size = 0; - index++; - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(5), Integer.toString(offset)); - size++; - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(15), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(16), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(17), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(18), actual); - assertEquals( - Integer.toString(size), - Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); - - /* index 3 */ - size = 0; - index++; - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(9), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(19), actual); - size++; - assertEquals( - Integer.toString(size), - Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); - - /* index 4 */ - size = 0; - index++; - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(10), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(20), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(21), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(22), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(23), actual); - size++; - assertEquals( - Integer.toString(size), - Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); - - /* do split and transfer */ - try (LargeListViewVector toVector = LargeListViewVector.empty("toVector", allocator)) { - int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}}; - TransferPair transferPair = fromVector.makeTransferPair(toVector); - - for (final int[] transferLength : transferLengths) { - int start = transferLength[0]; - int splitLength = transferLength[1]; - validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector); - } - } - } - } - - @Test - public void testGetTransferPairWithField() throws Exception { - try (final LargeListViewVector fromVector = LargeListViewVector.empty("listview", allocator)) { - - UnionLargeListViewWriter writer = fromVector.getWriter(); - writer.allocate(); - - // set some values - writer.startListView(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endListView(); - fromVector.setValueCount(2); - - final TransferPair transferPair = - fromVector.getTransferPair(fromVector.getField(), allocator); - final LargeListViewVector toVector = (LargeListViewVector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(toVector.getField(), fromVector.getField()); - } - } - - @Test - public void testOutOfOrderOffsetSplitAndTransfer() { - // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] - try (LargeListViewVector fromVector = LargeListViewVector.empty("fromVector", allocator)) { - // Allocate buffers in LargeListViewVector by calling `allocateNew` method. - fromVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); - Field field = new Field("child-vector", fieldType, null); - fromVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = fromVector.getDataVector(); - fieldVector.clear(); - - BigIntVector childVector = (BigIntVector) fieldVector; - - childVector.allocateNew(7); - - childVector.set(0, 0); - childVector.set(1, -127); - childVector.set(2, 127); - childVector.set(3, 50); - childVector.set(4, 12); - childVector.set(5, -7); - childVector.set(6, 25); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - fromVector.setValidity(0, 1); - fromVector.setValidity(1, 0); - fromVector.setValidity(2, 1); - fromVector.setValidity(3, 1); - fromVector.setValidity(4, 1); - - fromVector.setOffset(0, 4); - fromVector.setOffset(1, 7); - fromVector.setOffset(2, 0); - fromVector.setOffset(3, 0); - fromVector.setOffset(4, 3); - - fromVector.setSize(0, 3); - fromVector.setSize(1, 0); - fromVector.setSize(2, 4); - fromVector.setSize(3, 0); - fromVector.setSize(4, 2); - - // Set value count using `setValueCount` method. - fromVector.setValueCount(5); - - final ArrowBuf offSetBuffer = fromVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = fromVector.getSizeBuffer(); - - // check offset buffer - assertEquals(4, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); - - // check child vector - assertEquals(0, ((BigIntVector) fromVector.getDataVector()).get(0)); - assertEquals(-127, ((BigIntVector) fromVector.getDataVector()).get(1)); - assertEquals(127, ((BigIntVector) fromVector.getDataVector()).get(2)); - assertEquals(50, ((BigIntVector) fromVector.getDataVector()).get(3)); - assertEquals(12, ((BigIntVector) fromVector.getDataVector()).get(4)); - assertEquals(-7, ((BigIntVector) fromVector.getDataVector()).get(5)); - assertEquals(25, ((BigIntVector) fromVector.getDataVector()).get(6)); - - // check values - Object result = fromVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(12), resultSet.get(0)); - assertEquals(Long.valueOf(-7), resultSet.get(1)); - assertEquals(Long.valueOf(25), resultSet.get(2)); - - assertTrue(fromVector.isNull(1)); - - result = fromVector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(4, resultSet.size()); - assertEquals(Long.valueOf(0), resultSet.get(0)); - assertEquals(Long.valueOf(-127), resultSet.get(1)); - assertEquals(Long.valueOf(127), resultSet.get(2)); - assertEquals(Long.valueOf(50), resultSet.get(3)); - - assertTrue(fromVector.isEmpty(3)); - - result = fromVector.getObject(4); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(50), resultSet.get(0)); - assertEquals(Long.valueOf(12), resultSet.get(1)); - - fromVector.validate(); - - /* do split and transfer */ - try (LargeListViewVector toVector = LargeListViewVector.empty("toVector", allocator)) { - int[][] transferLengths = {{2, 3}, {0, 1}, {0, 3}}; - TransferPair transferPair = fromVector.makeTransferPair(toVector); - - for (final int[] transferLength : transferLengths) { - int start = transferLength[0]; - int splitLength = transferLength[1]; - validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector); - } - } - } - } - - @Test - public void testRangeChildVector1() { - /* - * Non-overlapping ranges - * offsets: [0, 2] - * sizes: [4, 1] - * values: [0, 1, 2, 3] - * - * vector: [[0, 1, 2, 3], [2]] - * */ - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("largelistview", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - largeListViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null); - Field field = new Field("child-vector", fieldType, null); - largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = largeListViewVector.getDataVector(); - fieldVector.clear(); - - IntVector childVector = (IntVector) fieldVector; - - childVector.allocateNew(8); - - childVector.set(0, 0); - childVector.set(1, 1); - childVector.set(2, 2); - childVector.set(3, 3); - childVector.set(4, 4); - childVector.set(5, 5); - childVector.set(6, 6); - childVector.set(7, 7); - - childVector.setValueCount(8); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - largeListViewVector.setValidity(0, 1); - largeListViewVector.setValidity(1, 1); - - largeListViewVector.setOffset(0, 0); - largeListViewVector.setOffset(1, 2); - - largeListViewVector.setSize(0, 4); - largeListViewVector.setSize(1, 1); - - assertEquals(8, largeListViewVector.getDataVector().getValueCount()); - - largeListViewVector.setValueCount(2); - assertEquals(4, largeListViewVector.getDataVector().getValueCount()); - - IntVector childVector1 = (IntVector) largeListViewVector.getDataVector(); - final ArrowBuf dataBuffer = childVector1.getDataBuffer(); - final ArrowBuf validityBuffer = childVector1.getValidityBuffer(); - - // yet the underneath buffer contains the original buffer - for (int i = 0; i < validityBuffer.capacity(); i++) { - assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH)); - } - } - } - - @Test - public void testRangeChildVector2() { - /* - * Overlapping ranges - * offsets: [0, 2] - * sizes: [3, 1] - * values: [0, 1, 2, 3] - * - * vector: [[1, 2, 3], [2]] - * */ - try (LargeListViewVector largeListViewVector = - LargeListViewVector.empty("largelistview", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - largeListViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null); - Field field = new Field("child-vector", fieldType, null); - largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = largeListViewVector.getDataVector(); - fieldVector.clear(); - - IntVector childVector = (IntVector) fieldVector; - - childVector.allocateNew(8); - - childVector.set(0, 0); - childVector.set(1, 1); - childVector.set(2, 2); - childVector.set(3, 3); - childVector.set(4, 4); - childVector.set(5, 5); - childVector.set(6, 6); - childVector.set(7, 7); - - childVector.setValueCount(8); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - largeListViewVector.setValidity(0, 1); - largeListViewVector.setValidity(1, 1); - - largeListViewVector.setOffset(0, 1); - largeListViewVector.setOffset(1, 2); - - largeListViewVector.setSize(0, 3); - largeListViewVector.setSize(1, 1); - - assertEquals(8, largeListViewVector.getDataVector().getValueCount()); - - largeListViewVector.setValueCount(2); - assertEquals(4, largeListViewVector.getDataVector().getValueCount()); - - IntVector childVector1 = (IntVector) largeListViewVector.getDataVector(); - final ArrowBuf dataBuffer = childVector1.getDataBuffer(); - final ArrowBuf validityBuffer = childVector1.getValidityBuffer(); - - // yet the underneath buffer contains the original buffer - for (int i = 0; i < validityBuffer.capacity(); i++) { - assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH)); - } - } - } - - private void writeIntValues(UnionLargeListViewWriter writer, int[] values) { - writer.startListView(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endListView(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java deleted file mode 100644 index bfe1859bf5681..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Objects; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder; -import org.apache.arrow.vector.util.ReusableByteArray; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestLargeVarBinaryVector { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testSetNullableLargeVarBinaryHolder() { - try (LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) { - vector.allocateNew(100, 10); - - NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder(); - nullHolder.isSet = 0; - - NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder(); - binHolder.isSet = 1; - - String str = "hello"; - try (ArrowBuf buf = allocator.buffer(16)) { - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - - binHolder.start = 0; - binHolder.end = str.length(); - binHolder.buffer = buf; - - vector.set(0, nullHolder); - vector.set(1, binHolder); - - // verify results - assertTrue(vector.isNull(0)); - assertEquals( - str, new String(Objects.requireNonNull(vector.get(1)), StandardCharsets.UTF_8)); - } - } - } - - @Test - public void testSetNullableLargeVarBinaryHolderSafe() { - try (LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) { - vector.allocateNew(5, 1); - - NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder(); - nullHolder.isSet = 0; - - NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder(); - binHolder.isSet = 1; - - String str = "hello world"; - try (ArrowBuf buf = allocator.buffer(16)) { - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - - binHolder.start = 0; - binHolder.end = str.length(); - binHolder.buffer = buf; - - vector.setSafe(0, binHolder); - vector.setSafe(1, nullHolder); - - // verify results - assertEquals( - str, new String(Objects.requireNonNull(vector.get(0)), StandardCharsets.UTF_8)); - assertTrue(vector.isNull(1)); - } - } - } - - @Test - public void testGetBytesRepeatedly() { - try (LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) { - vector.allocateNew(5, 1); - - final String str = "hello world"; - final String str2 = "foo"; - vector.setSafe(0, str.getBytes(StandardCharsets.UTF_8)); - vector.setSafe(1, str2.getBytes(StandardCharsets.UTF_8)); - - // verify results - ReusableByteArray reusableByteArray = new ReusableByteArray(); - vector.read(0, reusableByteArray); - byte[] oldBuffer = reusableByteArray.getBuffer(); - assertArrayEquals( - str.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange( - reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); - - vector.read(1, reusableByteArray); - assertArrayEquals( - str2.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange( - reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); - - // There should not have been any reallocation since the newer value is smaller in length. - assertSame(oldBuffer, reusableByteArray.getBuffer()); - } - } - - @Test - public void testGetTransferPairWithField() { - try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000); - LargeVarBinaryVector v1 = new LargeVarBinaryVector("v1", childAllocator1)) { - v1.allocateNew(); - v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11); - v1.setValueCount(4001); - - TransferPair tp = v1.getTransferPair(v1.getField(), allocator); - tp.transfer(); - LargeVarBinaryVector v2 = (LargeVarBinaryVector) tp.getTo(); - assertSame(v1.getField(), v2.getField()); - v2.clear(); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java deleted file mode 100644 index 7ea39c194c179..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java +++ /dev/null @@ -1,861 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.holders.NullableLargeVarCharHolder; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestLargeVarCharVector { - - private static final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8); - private static final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8); - private static final byte[] STR3 = "CCCC3".getBytes(StandardCharsets.UTF_8); - private static final byte[] STR4 = "DDDDDDDD4".getBytes(StandardCharsets.UTF_8); - private static final byte[] STR5 = "EEE5".getBytes(StandardCharsets.UTF_8); - private static final byte[] STR6 = "FFFFF6".getBytes(StandardCharsets.UTF_8); - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testTransfer() { - try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000); - BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 1000000, 1000000); - LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1); - LargeVarCharVector v2 = new LargeVarCharVector("v2", childAllocator2); ) { - v1.allocateNew(); - v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11); - v1.setValueCount(4001); - - long memoryBeforeTransfer = childAllocator1.getAllocatedMemory(); - - v1.makeTransferPair(v2).transfer(); - - assertEquals(0, childAllocator1.getAllocatedMemory()); - assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory()); - } - } - - @Test - public void testCopyValueSafe() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("myvector", allocator); - final LargeVarCharVector newLargeVarCharVector = - new LargeVarCharVector("newvector", allocator)) { - largeVarCharVector.allocateNew(10000, 1000); - - final int valueCount = 500; - populateLargeVarcharVector(largeVarCharVector, valueCount, null); - - final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - - // new vector memory is not pre-allocated, we expect copyValueSafe work fine. - for (int i = 0; i < valueCount; i++) { - tp.copyValueSafe(i, i); - } - newLargeVarCharVector.setValueCount(valueCount); - - for (int i = 0; i < valueCount; i++) { - final boolean expectedSet = (i % 3) == 0; - if (expectedSet) { - assertFalse(largeVarCharVector.isNull(i)); - assertFalse(newLargeVarCharVector.isNull(i)); - assertArrayEquals(largeVarCharVector.get(i), newLargeVarCharVector.get(i)); - } else { - assertTrue(newLargeVarCharVector.isNull(i)); - } - } - } - } - - @Test - public void testSplitAndTransferNon() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("myvector", allocator)) { - - largeVarCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateLargeVarcharVector(largeVarCharVector, valueCount, null); - - final TransferPair tp = largeVarCharVector.getTransferPair(allocator); - try (LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) { - - tp.splitAndTransfer(0, 0); - assertEquals(0, newLargeVarCharVector.getValueCount()); - } - } - } - - @Test - public void testSplitAndTransferAll() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("myvector", allocator)) { - - largeVarCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateLargeVarcharVector(largeVarCharVector, valueCount, null); - - final TransferPair tp = largeVarCharVector.getTransferPair(allocator); - try (LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) { - - tp.splitAndTransfer(0, valueCount); - assertEquals(valueCount, newLargeVarCharVector.getValueCount()); - } - } - } - - @Test - public void testInvalidStartIndex() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("myvector", allocator); - final LargeVarCharVector newLargeVarCharVector = - new LargeVarCharVector("newvector", allocator)) { - - largeVarCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateLargeVarcharVector(largeVarCharVector, valueCount, null); - - final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - - IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10)); - - assertEquals( - "Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); - } - } - - @Test - public void testInvalidLength() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("myvector", allocator); - final LargeVarCharVector newLargeVarCharVector = - new LargeVarCharVector("newvector", allocator)) { - - largeVarCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateLargeVarcharVector(largeVarCharVector, valueCount, null); - - final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - - IllegalArgumentException e = - assertThrows( - IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2)); - - assertEquals( - "Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); - } - } - - @Test /* LargeVarCharVector */ - public void testSizeOfValueBuffer() { - try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { - int valueCount = 100; - int currentSize = 0; - vector.setInitialCapacity(valueCount); - vector.allocateNew(); - vector.setValueCount(valueCount); - for (int i = 0; i < valueCount; i++) { - currentSize += i; - vector.setSafe(i, new byte[i]); - } - - assertEquals(currentSize, vector.sizeOfValueBuffer()); - } - } - - @Test - public void testSetLastSetUsage() { - final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8); - final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8); - final byte[] STR3 = "CCCC3".getBytes(StandardCharsets.UTF_8); - final byte[] STR4 = "DDDDDDDD4".getBytes(StandardCharsets.UTF_8); - final byte[] STR5 = "EEE5".getBytes(StandardCharsets.UTF_8); - final byte[] STR6 = "FFFFF6".getBytes(StandardCharsets.UTF_8); - - try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) { - vector.allocateNew(1024 * 10, 1024); - - setBytes(0, STR1, vector); - setBytes(1, STR2, vector); - setBytes(2, STR3, vector); - setBytes(3, STR4, vector); - setBytes(4, STR5, vector); - setBytes(5, STR6, vector); - - /* Check current lastSet */ - assertEquals(-1, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - - /* - * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt - * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays. - * Run the test by commenting out next line and we should see incorrect vector output. - */ - vector.setLastSet(5); - vector.setValueCount(20); - - /* Check current lastSet */ - assertEquals(19, vector.getLastSet()); - - /* Check the vector output again */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - assertEquals(0, vector.getValueLength(10)); - assertEquals(0, vector.getValueLength(11)); - assertEquals(0, vector.getValueLength(12)); - assertEquals(0, vector.getValueLength(13)); - assertEquals(0, vector.getValueLength(14)); - assertEquals(0, vector.getValueLength(15)); - assertEquals(0, vector.getValueLength(16)); - assertEquals(0, vector.getValueLength(17)); - assertEquals(0, vector.getValueLength(18)); - assertEquals(0, vector.getValueLength(19)); - - /* Check offsets */ - assertEquals(0, vector.offsetBuffer.getLong(0 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(6, vector.offsetBuffer.getLong(1 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(16, vector.offsetBuffer.getLong(2 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(21, vector.offsetBuffer.getLong(3 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(30, vector.offsetBuffer.getLong(4 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(34, vector.offsetBuffer.getLong(5 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(6 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(7 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(8 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(9 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(10 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(11 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(12 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(13 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(14 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(15 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(16 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(17 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(18 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(19 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - - vector.set(19, STR6); - assertArrayEquals(STR6, vector.get(19)); - assertEquals(40, vector.offsetBuffer.getLong(19 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(46, vector.offsetBuffer.getLong(20 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - } - } - - @Test - public void testVectorAllocateNew() { - assertThrows( - OutOfMemoryException.class, - () -> { - try (RootAllocator smallAllocator = new RootAllocator(200); - LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) { - vector.allocateNew(); - } - }); - } - - @Test - public void testLargeVariableVectorReallocation() { - assertThrows( - OversizedAllocationException.class, - () -> { - final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator); - // edge case 1: value count = MAX_VALUE_ALLOCATION - final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; - final int expectedOffsetSize = 10; - try { - vector.allocateNew(expectedAllocationInBytes, 10); - assertTrue(expectedOffsetSize <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); - vector.reAlloc(); - assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); - } finally { - vector.close(); - } - - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this tests if it overflows - } finally { - vector.close(); - } - }); - } - - @Test - public void testSplitAndTransfer() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("myvector", allocator)) { - largeVarCharVector.allocateNew(10000, 1000); - - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; - - populateLargeVarcharVector(largeVarCharVector, valueCount, compareArray); - - final TransferPair tp = largeVarCharVector.getTransferPair(allocator); - try (final LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) { - final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; - - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - for (int i = 0; i < length; i++) { - final boolean expectedSet = ((start + i) % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); - assertFalse(newLargeVarCharVector.isNull(i)); - assertArrayEquals(expectedValue, newLargeVarCharVector.get(i)); - } else { - assertTrue(newLargeVarCharVector.isNull(i)); - } - } - } - } - } - } - - @Test - public void testReallocAfterVectorTransfer() { - final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8); - final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8); - - try (final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator)) { - /* 4096 values with 10 byte per record */ - vector.allocateNew(4096 * 10, 4096); - int valueCapacity = vector.getValueCapacity(); - assertTrue(valueCapacity >= 4096); - - /* populate the vector */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } - } - - /* Check the vector output */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } - } - - /* trigger first realloc */ - vector.setSafe(valueCapacity, STR2, 0, STR2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) { - vector.reallocDataBuffer(); - } - - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } - } - - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } - } - - /* trigger second realloc */ - vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) { - vector.reallocDataBuffer(); - } - - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } - } - - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } - } - - /* we are potentially working with 4x the size of vector buffer - * that we initially started with. Now let's transfer the vector. - */ - - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - try (LargeVarCharVector toVector = (LargeVarCharVector) transferPair.getTo()) { - valueCapacity = toVector.getValueCapacity(); - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, toVector.get(i)); - } else { - assertArrayEquals(STR2, toVector.get(i)); - } - } - } - } - } - - @Test - public void testVectorLoadUnload() { - try (final LargeVarCharVector vector1 = new LargeVarCharVector("myvector", allocator)) { - - ValueVectorDataPopulator.setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); - - assertEquals(5, vector1.getLastSet()); - vector1.setValueCount(15); - assertEquals(14, vector1.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector1.get(0)); - assertArrayEquals(STR2, vector1.get(1)); - assertArrayEquals(STR3, vector1.get(2)); - assertArrayEquals(STR4, vector1.get(3)); - assertArrayEquals(STR5, vector1.get(4)); - assertArrayEquals(STR6, vector1.get(5)); - - Field field = vector1.getField(); - String fieldName = field.getName(); - - List fields = new ArrayList<>(); - List fieldVectors = new ArrayList<>(); - - fields.add(field); - fieldVectors.add(vector1); - - Schema schema = new Schema(fields); - - VectorSchemaRoot schemaRoot1 = - new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); - VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); - - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, allocator); ) { - - VectorLoader vectorLoader = new VectorLoader(schemaRoot2); - vectorLoader.load(recordBatch); - - LargeVarCharVector vector2 = (LargeVarCharVector) schemaRoot2.getVector(fieldName); - /* - * lastSet would have internally been set by VectorLoader.load() when it invokes - * loadFieldBuffers. - */ - assertEquals(14, vector2.getLastSet()); - vector2.setValueCount(25); - assertEquals(24, vector2.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector2.get(0)); - assertArrayEquals(STR2, vector2.get(1)); - assertArrayEquals(STR3, vector2.get(2)); - assertArrayEquals(STR4, vector2.get(3)); - assertArrayEquals(STR5, vector2.get(4)); - assertArrayEquals(STR6, vector2.get(5)); - } - } - } - - @Test - public void testFillEmptiesUsage() { - try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) { - - vector.allocateNew(1024 * 10, 1024); - - setBytes(0, STR1, vector); - setBytes(1, STR2, vector); - setBytes(2, STR3, vector); - setBytes(3, STR4, vector); - setBytes(4, STR5, vector); - setBytes(5, STR6, vector); - - /* Check current lastSet */ - assertEquals(-1, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - - vector.setLastSet(5); - /* fill empty byte arrays from index [6, 9] */ - vector.fillEmpties(10); - - /* Check current lastSet */ - assertEquals(9, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - - setBytes(10, STR1, vector); - setBytes(11, STR2, vector); - - vector.setLastSet(11); - /* fill empty byte arrays from index [12, 14] */ - vector.setValueCount(15); - - /* Check current lastSet */ - assertEquals(14, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - assertArrayEquals(STR1, vector.get(10)); - assertArrayEquals(STR2, vector.get(11)); - assertEquals(0, vector.getValueLength(12)); - assertEquals(0, vector.getValueLength(13)); - assertEquals(0, vector.getValueLength(14)); - - /* Check offsets */ - assertEquals(0, vector.offsetBuffer.getLong(0 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(6, vector.offsetBuffer.getLong(1 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(16, vector.offsetBuffer.getLong(2 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(21, vector.offsetBuffer.getLong(3 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(30, vector.offsetBuffer.getLong(4 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(34, vector.offsetBuffer.getLong(5 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - - assertEquals(40, vector.offsetBuffer.getLong(6 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(7 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(8 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(9 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getLong(10 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - - assertEquals(46, vector.offsetBuffer.getLong(11 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(56, vector.offsetBuffer.getLong(12 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - - assertEquals(56, vector.offsetBuffer.getLong(13 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(56, vector.offsetBuffer.getLong(14 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - assertEquals(56, vector.offsetBuffer.getLong(15 * BaseLargeVariableWidthVector.OFFSET_WIDTH)); - } - } - - @Test - public void testGetBufferAddress1() { - try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) { - - ValueVectorDataPopulator.setVector(vector, STR1, STR2, STR3, STR4, STR5, STR6); - vector.setValueCount(15); - - /* check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - - List buffers = vector.getFieldBuffers(); - long bitAddress = vector.getValidityBufferAddress(); - long offsetAddress = vector.getOffsetBufferAddress(); - long dataAddress = vector.getDataBufferAddress(); - - assertEquals(3, buffers.size()); - assertEquals(bitAddress, buffers.get(0).memoryAddress()); - assertEquals(offsetAddress, buffers.get(1).memoryAddress()); - assertEquals(dataAddress, buffers.get(2).memoryAddress()); - } - } - - @Test - public void testSetNullableLargeVarCharHolder() { - try (LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { - vector.allocateNew(100, 10); - - NullableLargeVarCharHolder nullHolder = new NullableLargeVarCharHolder(); - nullHolder.isSet = 0; - - NullableLargeVarCharHolder stringHolder = new NullableLargeVarCharHolder(); - stringHolder.isSet = 1; - - String str = "hello"; - ArrowBuf buf = allocator.buffer(16); - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - - stringHolder.start = 0; - stringHolder.end = str.length(); - stringHolder.buffer = buf; - - vector.set(0, nullHolder); - vector.set(1, stringHolder); - - // verify results - assertTrue(vector.isNull(0)); - assertEquals(str, new String(Objects.requireNonNull(vector.get(1)), StandardCharsets.UTF_8)); - - buf.close(); - } - } - - @Test - public void testSetNullableLargeVarCharHolderSafe() { - try (LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { - vector.allocateNew(5, 1); - - NullableLargeVarCharHolder nullHolder = new NullableLargeVarCharHolder(); - nullHolder.isSet = 0; - - NullableLargeVarCharHolder stringHolder = new NullableLargeVarCharHolder(); - stringHolder.isSet = 1; - - String str = "hello world"; - ArrowBuf buf = allocator.buffer(16); - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - - stringHolder.start = 0; - stringHolder.end = str.length(); - stringHolder.buffer = buf; - - vector.setSafe(0, stringHolder); - vector.setSafe(1, nullHolder); - - // verify results - assertEquals(str, new String(Objects.requireNonNull(vector.get(0)), StandardCharsets.UTF_8)); - assertTrue(vector.isNull(1)); - - buf.close(); - } - } - - @Test - public void testGetNullFromLargeVariableWidthVector() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("largevarcharvec", allocator); - final LargeVarBinaryVector largeVarBinaryVector = - new LargeVarBinaryVector("largevarbinary", allocator)) { - largeVarCharVector.allocateNew(10, 1); - largeVarBinaryVector.allocateNew(10, 1); - - largeVarCharVector.setNull(0); - largeVarBinaryVector.setNull(0); - - assertNull(largeVarCharVector.get(0)); - assertNull(largeVarBinaryVector.get(0)); - } - } - - @Test - public void testLargeVariableWidthVectorNullHashCode() { - try (LargeVarCharVector largeVarChVec = - new LargeVarCharVector("large var char vector", allocator)) { - largeVarChVec.allocateNew(100, 1); - largeVarChVec.setValueCount(1); - - largeVarChVec.set(0, "abc".getBytes(StandardCharsets.UTF_8)); - largeVarChVec.setNull(0); - - assertEquals(0, largeVarChVec.hashCode(0)); - } - } - - @Test - public void testUnloadLargeVariableWidthVector() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("var char", allocator)) { - largeVarCharVector.allocateNew(5, 2); - largeVarCharVector.setValueCount(2); - - largeVarCharVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8)); - - List bufs = largeVarCharVector.getFieldBuffers(); - assertEquals(3, bufs.size()); - - ArrowBuf offsetBuf = bufs.get(1); - ArrowBuf dataBuf = bufs.get(2); - - assertEquals(24, offsetBuf.writerIndex()); - assertEquals(4, offsetBuf.getLong(8)); - assertEquals(4, offsetBuf.getLong(16)); - - assertEquals(4, dataBuf.writerIndex()); - } - } - - @Test - public void testNullableType() { - try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { - vector.setInitialCapacity(512); - vector.allocateNew(); - - assertTrue(vector.getValueCapacity() >= 512); - int initialCapacity = vector.getValueCapacity(); - - try { - vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - fail("Expected out of bounds exception"); - } catch (Exception e) { - // ok - } - - vector.reAlloc(); - assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - - vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8)); - } - } - - @Test - public void testGetTextRepeatedly() { - try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) { - - ValueVectorDataPopulator.setVector(vector, STR1, STR2); - vector.setValueCount(2); - - /* check the vector output */ - Text text = new Text(); - vector.read(0, text); - byte[] result = new byte[(int) text.getLength()]; - System.arraycopy(text.getBytes(), 0, result, 0, (int) text.getLength()); - assertArrayEquals(STR1, result); - vector.read(1, text); - result = new byte[(int) text.getLength()]; - System.arraycopy(text.getBytes(), 0, result, 0, (int) text.getLength()); - assertArrayEquals(STR2, text.getBytes()); - } - } - - @Test - public void testGetTransferPairWithField() { - try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000); - LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1)) { - v1.allocateNew(); - v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11); - v1.setValueCount(4001); - - TransferPair tp = v1.getTransferPair(v1.getField(), allocator); - tp.transfer(); - LargeVarCharVector v2 = (LargeVarCharVector) tp.getTo(); - assertSame(v1.getField(), v2.getField()); - v2.clear(); - } - } - - private void populateLargeVarcharVector( - final LargeVarCharVector vector, int valueCount, String[] values) { - for (int i = 0; i < valueCount; i += 3) { - final String s = String.format("%010d", i); - vector.set(i, s.getBytes(StandardCharsets.UTF_8)); - if (values != null) { - values[i] = s; - } - } - vector.setValueCount(valueCount); - } - - public static void setBytes(int index, byte[] bytes, LargeVarCharVector vector) { - final long currentOffset = - vector.offsetBuffer.getLong((long) index * BaseLargeVariableWidthVector.OFFSET_WIDTH); - - BitVectorHelper.setBit(vector.validityBuffer, index); - vector.offsetBuffer.setLong( - (long) (index + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH, - currentOffset + bytes.length); - vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java deleted file mode 100644 index 1d6fa39f9ea4f..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ /dev/null @@ -1,1208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.complex.BaseRepeatedValueVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.holders.DurationHolder; -import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; -import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestListVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testCopyFrom() throws Exception { - try (ListVector inVector = ListVector.empty("input", allocator); - ListVector outVector = ListVector.empty("output", allocator)) { - UnionListWriter writer = inVector.getWriter(); - writer.allocate(); - - // populate input vector with the following records - // [1, 2, 3] - // null - // [] - writer.setPosition(0); // optional - writer.startList(); - writer.bigInt().writeBigInt(1); - writer.bigInt().writeBigInt(2); - writer.bigInt().writeBigInt(3); - writer.endList(); - - writer.setPosition(2); - writer.startList(); - writer.endList(); - - writer.setValueCount(3); - - // copy values from input to output - outVector.allocateNew(); - for (int i = 0; i < 3; i++) { - outVector.copyFrom(i, i, inVector); - } - outVector.setValueCount(3); - - // assert the output vector is correct - FieldReader reader = outVector.getReader(); - assertTrue(reader.isSet(), "shouldn't be null"); - reader.setPosition(1); - assertFalse(reader.isSet(), "should be null"); - reader.setPosition(2); - assertTrue(reader.isSet(), "shouldn't be null"); - - /* index 0 */ - Object result = outVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(1), resultSet.get(0)); - assertEquals(Long.valueOf(2), resultSet.get(1)); - assertEquals(Long.valueOf(3), resultSet.get(2)); - - /* index 1 */ - result = outVector.getObject(1); - assertNull(result); - - /* index 2 */ - result = outVector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(0, resultSet.size()); - - /* 3+0+0/3 */ - assertEquals(1.0D, inVector.getDensity(), 0); - } - } - - @Test - public void testSetLastSetUsage() throws Exception { - try (ListVector listVector = ListVector.empty("input", allocator)) { - - /* Explicitly add the dataVector */ - MinorType type = MinorType.BIGINT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - /* allocate memory */ - listVector.allocateNew(); - - /* get inner buffers; validityBuffer and offsetBuffer */ - - ArrowBuf validityBuffer = listVector.getValidityBuffer(); - ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - /* get the underlying data vector -- BigIntVector */ - BigIntVector dataVector = (BigIntVector) listVector.getDataVector(); - - /* check current lastSet */ - assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet())); - - int index = 0; - int offset; - - /* write [10, 11, 12] to the list vector at index 0 */ - BitVectorHelper.setBit(validityBuffer, index); - dataVector.setSafe(0, 1, 10); - dataVector.setSafe(1, 1, 11); - dataVector.setSafe(2, 1, 12); - offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 3); - - index += 1; - - /* write [13, 14] to the list vector at index 1 */ - BitVectorHelper.setBit(validityBuffer, index); - dataVector.setSafe(3, 1, 13); - dataVector.setSafe(4, 1, 14); - offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 5); - - index += 1; - - /* write [15, 16, 17] to the list vector at index 2 */ - BitVectorHelper.setBit(validityBuffer, index); - dataVector.setSafe(5, 1, 15); - dataVector.setSafe(6, 1, 16); - dataVector.setSafe(7, 1, 17); - offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 8); - - /* check current lastSet */ - assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet())); - - /* set lastset and arbitrary valuecount for list vector. - * - * NOTE: if we don't execute setLastSet() before setLastValueCount(), then - * the latter will corrupt the offsetBuffer and thus the accessor will not - * retrieve the correct values from underlying dataBuffer. Run the test - * by commenting out next line and we should see failures from 5th assert - * onwards. This is why doing setLastSet() is important before setValueCount() - * once the vector has been loaded. - * - * Another important thing to remember is the value of lastSet itself. - * Even though the listVector has elements till index 2 only, the lastSet should - * be set as 3. This is because the offsetBuffer has valid offsets filled till index 3. - * If we do setLastSet(2), the offsetBuffer at index 3 will contain incorrect value - * after execution of setValueCount(). - * - * correct state of the listVector - * bitvector {1, 1, 1, 0, 0.... } - * offsetvector {0, 3, 5, 8, 8, 8.....} - * datavector { [10, 11, 12], - * [13, 14], - * [15, 16, 17] - * } - * - * if we don't do setLastSet() before setValueCount --> incorrect state - * bitvector {1, 1, 1, 0, 0.... } - * offsetvector {0, 0, 0, 0, 0, 0.....} - * datavector { [10, 11, 12], - * [13, 14], - * [15, 16, 17] - * } - * - * if we do setLastSet(2) before setValueCount --> incorrect state - * bitvector {1, 1, 1, 0, 0.... } - * offsetvector {0, 3, 5, 5, 5, 5.....} - * datavector { [10, 11, 12], - * [13, 14], - * [15, 16, 17] - * } - */ - listVector.setLastSet(2); - listVector.setValueCount(10); - - /* (3+2+3)/10 */ - assertEquals(0.8D, listVector.getDensity(), 0); - - index = 0; - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(0), Integer.toString(offset)); - - Long actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(10), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(11), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(12), actual); - - index++; - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(3), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(13), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(14), actual); - - index++; - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(5), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(15), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(16), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(17), actual); - - index++; - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(8), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertNull(actual); - } - } - - @Test - public void testSplitAndTransfer() throws Exception { - try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { - - /* Explicitly add the dataVector */ - MinorType type = MinorType.BIGINT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - UnionListWriter listWriter = listVector.getWriter(); - - /* allocate memory */ - listWriter.allocate(); - - /* populate data */ - listWriter.setPosition(0); - listWriter.startList(); - listWriter.bigInt().writeBigInt(10); - listWriter.bigInt().writeBigInt(11); - listWriter.bigInt().writeBigInt(12); - listWriter.endList(); - - listWriter.setPosition(1); - listWriter.startList(); - listWriter.bigInt().writeBigInt(13); - listWriter.bigInt().writeBigInt(14); - listWriter.endList(); - - listWriter.setPosition(2); - listWriter.startList(); - listWriter.bigInt().writeBigInt(15); - listWriter.bigInt().writeBigInt(16); - listWriter.bigInt().writeBigInt(17); - listWriter.bigInt().writeBigInt(18); - listWriter.endList(); - - listWriter.setPosition(3); - listWriter.startList(); - listWriter.bigInt().writeBigInt(19); - listWriter.endList(); - - listWriter.setPosition(4); - listWriter.startList(); - listWriter.bigInt().writeBigInt(20); - listWriter.bigInt().writeBigInt(21); - listWriter.bigInt().writeBigInt(22); - listWriter.bigInt().writeBigInt(23); - listWriter.endList(); - - listVector.setValueCount(5); - - assertEquals(4, listVector.getLastSet()); - - /* get offset buffer */ - final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - /* get dataVector */ - BigIntVector dataVector = (BigIntVector) listVector.getDataVector(); - - /* check the vector output */ - - int index = 0; - int offset; - Long actual; - - /* index 0 */ - assertFalse(listVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(0), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(10), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(11), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(12), actual); - - /* index 1 */ - index++; - assertFalse(listVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(3), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(13), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(14), actual); - - /* index 2 */ - index++; - assertFalse(listVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(5), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(15), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(16), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(17), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(18), actual); - - /* index 3 */ - index++; - assertFalse(listVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(9), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(19), actual); - - /* index 4 */ - index++; - assertFalse(listVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(10), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(20), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(21), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(22), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(23), actual); - - /* index 5 */ - index++; - assertTrue(listVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH); - assertEquals(Integer.toString(14), Integer.toString(offset)); - - /* do split and transfer */ - try (ListVector toVector = ListVector.empty("toVector", allocator)) { - - TransferPair transferPair = listVector.makeTransferPair(toVector); - - int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}}; - - for (final int[] transferLength : transferLengths) { - int start = transferLength[0]; - int splitLength = transferLength[1]; - - int dataLength1 = 0; - int dataLength2 = 0; - - int offset1 = 0; - int offset2 = 0; - - transferPair.splitAndTransfer(start, splitLength); - - /* get offsetBuffer of toVector */ - final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); - - /* get dataVector of toVector */ - BigIntVector dataVector1 = (BigIntVector) toVector.getDataVector(); - - for (int i = 0; i < splitLength; i++) { - dataLength1 = - offsetBuffer.getInt((start + i + 1) * ListVector.OFFSET_WIDTH) - - offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH); - dataLength2 = - toOffsetBuffer.getInt((i + 1) * ListVector.OFFSET_WIDTH) - - toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH); - - assertEquals( - dataLength1, - dataLength2, - "Different data lengths at index: " + i + " and start: " + start); - - offset1 = offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH); - offset2 = toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH); - - for (int j = 0; j < dataLength1; j++) { - assertEquals( - dataVector.getObject(offset1), - dataVector1.getObject(offset2), - "Different data at indexes: " + offset1 + " and " + offset2); - - offset1++; - offset2++; - } - } - } - } - } - } - - @Test - public void testNestedListVector() throws Exception { - try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { - - UnionListWriter listWriter = listVector.getWriter(); - - /* allocate memory */ - listWriter.allocate(); - - /* the dataVector that backs a listVector will also be a - * listVector for this test. - */ - - /* write one or more inner lists at index 0 */ - listWriter.setPosition(0); - listWriter.startList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(50); - listWriter.list().bigInt().writeBigInt(100); - listWriter.list().bigInt().writeBigInt(200); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(75); - listWriter.list().bigInt().writeBigInt(125); - listWriter.list().bigInt().writeBigInt(150); - listWriter.list().bigInt().writeBigInt(175); - listWriter.list().endList(); - - listWriter.endList(); - - /* write one or more inner lists at index 1 */ - listWriter.setPosition(1); - listWriter.startList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(10); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(15); - listWriter.list().bigInt().writeBigInt(20); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(25); - listWriter.list().bigInt().writeBigInt(30); - listWriter.list().bigInt().writeBigInt(35); - listWriter.list().endList(); - - listWriter.endList(); - - assertEquals(1, listVector.getLastSet()); - - listVector.setValueCount(2); - - assertEquals(2, listVector.getValueCount()); - - /* get listVector value at index 0 -- the value itself is a listvector */ - Object result = listVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(4, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - assertEquals(Long.valueOf(150), list.get(2)); - assertEquals(Long.valueOf(175), list.get(3)); - - /* get listVector value at index 1 -- the value itself is a listvector */ - result = listVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(1, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ - assertEquals(3, resultSet.get(2).size()); /* size of third inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(10), list.get(0)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(2); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(listVector.isNull(0)); - assertFalse(listVector.isNull(1)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - /* listVector has 2 lists at index 0 and 3 lists at index 1 */ - assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH)); - } - } - - @Test - public void testNestedListVector1() throws Exception { - try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { - - MinorType listType = MinorType.LIST; - MinorType scalarType = MinorType.BIGINT; - - listVector.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList1 = (ListVector) listVector.getDataVector(); - innerList1.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList2 = (ListVector) innerList1.getDataVector(); - innerList2.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList3 = (ListVector) innerList2.getDataVector(); - innerList3.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList4 = (ListVector) innerList3.getDataVector(); - innerList4.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList5 = (ListVector) innerList4.getDataVector(); - innerList5.addOrGetVector(FieldType.nullable(listType.getType())); - - ListVector innerList6 = (ListVector) innerList5.getDataVector(); - innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); - - listVector.setInitialCapacity(128); - } - } - - @Test - public void testNestedListVector2() throws Exception { - try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { - listVector.setInitialCapacity(1); - UnionListWriter listWriter = listVector.getWriter(); - /* allocate memory */ - listWriter.allocate(); - - /* write one or more inner lists at index 0 */ - listWriter.setPosition(0); - listWriter.startList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(50); - listWriter.list().bigInt().writeBigInt(100); - listWriter.list().bigInt().writeBigInt(200); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(75); - listWriter.list().bigInt().writeBigInt(125); - listWriter.list().endList(); - - listWriter.endList(); - - /* write one or more inner lists at index 1 */ - listWriter.setPosition(1); - listWriter.startList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(15); - listWriter.list().bigInt().writeBigInt(20); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(25); - listWriter.list().bigInt().writeBigInt(30); - listWriter.list().bigInt().writeBigInt(35); - listWriter.list().endList(); - - listWriter.endList(); - - assertEquals(1, listVector.getLastSet()); - - listVector.setValueCount(2); - - assertEquals(2, listVector.getValueCount()); - - /* get listVector value at index 0 -- the value itself is a listvector */ - Object result = listVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - - /* get listVector value at index 1 -- the value itself is a listvector */ - result = listVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(2, resultSet.get(0).size()); /* size of first inner list */ - assertEquals(3, resultSet.get(1).size()); /* size of second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(listVector.isNull(0)); - assertFalse(listVector.isNull(1)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); - - /* listVector has 2 lists at index 0 and 3 lists at index 1 */ - assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH)); - assertEquals(4, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH)); - } - } - - @Test - public void testGetBufferAddress() throws Exception { - try (ListVector listVector = ListVector.empty("vector", allocator)) { - - UnionListWriter listWriter = listVector.getWriter(); - boolean error = false; - - listWriter.allocate(); - - listWriter.setPosition(0); - listWriter.startList(); - listWriter.bigInt().writeBigInt(50); - listWriter.bigInt().writeBigInt(100); - listWriter.bigInt().writeBigInt(200); - listWriter.endList(); - - listWriter.setPosition(1); - listWriter.startList(); - listWriter.bigInt().writeBigInt(250); - listWriter.bigInt().writeBigInt(300); - listWriter.endList(); - - listVector.setValueCount(2); - - /* check listVector contents */ - Object result = listVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(50), resultSet.get(0)); - assertEquals(Long.valueOf(100), resultSet.get(1)); - assertEquals(Long.valueOf(200), resultSet.get(2)); - - result = listVector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(250), resultSet.get(0)); - assertEquals(Long.valueOf(300), resultSet.get(1)); - - List buffers = listVector.getFieldBuffers(); - - long bitAddress = listVector.getValidityBufferAddress(); - long offsetAddress = listVector.getOffsetBufferAddress(); - - try { - listVector.getDataBufferAddress(); - } catch (UnsupportedOperationException ue) { - error = true; - } finally { - assertTrue(error); - } - - assertEquals(2, buffers.size()); - assertEquals(bitAddress, buffers.get(0).memoryAddress()); - assertEquals(offsetAddress, buffers.get(1).memoryAddress()); - - /* (3+2)/2 */ - assertEquals(2.5, listVector.getDensity(), 0); - } - } - - @Test - public void testConsistentChildName() throws Exception { - try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { - String emptyListStr = listVector.getField().toString(); - assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); - - listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - String emptyVectorStr = listVector.getField().toString(); - assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); - } - } - - @Test - public void testSetInitialCapacity() { - try (final ListVector vector = ListVector.empty("", allocator)) { - vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - - /* - * use the default multiplier of 5, - * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity. - */ - vector.setInitialCapacity(512); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 5); - - /* use density as 4 */ - vector.setInitialCapacity(512, 4); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); - - /* - * inner value capacity we pass to data vector is 512 * 0.1 => 51 - * For an int vector this is 204 bytes of memory for data buffer - * and 7 bytes for validity buffer. - * and with power of 2 allocation, we allocate 256 bytes and 8 bytes - * for the data buffer and validity buffer of the inner vector. Thus - * value capacity of inner vector is 64 - */ - vector.setInitialCapacity(512, 0.1); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 51); - - /* - * inner value capacity we pass to data vector is 512 * 0.01 => 5 - * For an int vector this is 20 bytes of memory for data buffer - * and 1 byte for validity buffer. - * and with power of 2 allocation, we allocate 32 bytes and 1 bytes - * for the data buffer and validity buffer of the inner vector. Thus - * value capacity of inner vector is 8 - */ - vector.setInitialCapacity(512, 0.01); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 5); - - /* - * inner value capacity we pass to data vector is 5 * 0.1 => 0 - * which is then rounded off to 1. So we pass value count as 1 - * to the inner int vector. - * the offset buffer of the list vector is allocated for 6 values - * which is 24 bytes and then rounded off to 32 bytes (8 values) - * the validity buffer of the list vector is allocated for 5 - * values which is 1 byte. This is why value capacity of the list - * vector is 7 as we take the min of validity buffer value capacity - * and offset buffer value capacity. - */ - vector.setInitialCapacity(5, 0.1); - vector.allocateNew(); - assertEquals(7, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 1); - } - } - - @Test - public void testClearAndReuse() { - try (final ListVector vector = ListVector.empty("list", allocator)) { - BigIntVector bigIntVector = - (BigIntVector) - vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); - vector.setInitialCapacity(10); - vector.allocateNew(); - - vector.startNewValue(0); - bigIntVector.setSafe(0, 7); - vector.endValue(0, 1); - vector.startNewValue(1); - bigIntVector.setSafe(1, 8); - vector.endValue(1, 1); - vector.setValueCount(2); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(Long.valueOf(7), resultSet.get(0)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(8), resultSet.get(0)); - - // Clear and release the buffers to trigger a realloc when adding next value - vector.clear(); - - // The list vector should reuse a buffer when reallocating the offset buffer - vector.startNewValue(0); - bigIntVector.setSafe(0, 7); - vector.endValue(0, 1); - vector.startNewValue(1); - bigIntVector.setSafe(1, 8); - vector.endValue(1, 1); - vector.setValueCount(2); - - result = vector.getObject(0); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(7), resultSet.get(0)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(8), resultSet.get(0)); - } - } - - @Test - public void testWriterGetField() { - try (final ListVector vector = ListVector.empty("list", allocator)) { - - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writer.startList(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endList(); - vector.setValueCount(2); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Int(32, true)), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.List.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - } - } - - @Test - public void testWriterGetTimestampMilliTZField() { - try (final ListVector vector = ListVector.empty("list", allocator)) { - org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); - writer.allocate(); - - writer.startList(); - writer.timeStampMilliTZ().writeTimeStampMilliTZ(1000L); - writer.timeStampMilliTZ().writeTimeStampMilliTZ(2000L); - writer.endList(); - vector.setValueCount(1); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.List.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - } - } - - @Test - public void testWriterUsingHolderGetTimestampMilliTZField() { - try (final ListVector vector = ListVector.empty("list", allocator)) { - org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); - writer.allocate(); - - TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); - holder.timezone = "SomeFakeTimeZone"; - writer.startList(); - holder.value = 12341234L; - writer.timeStampMilliTZ().write(holder); - holder.value = 55555L; - writer.timeStampMilliTZ().write(holder); - - // Writing with a different timezone should throw - holder.timezone = "AsdfTimeZone"; - holder.value = 77777; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, () -> writer.timeStampMilliTZ().write(holder)); - assertEquals( - "holder.timezone: AsdfTimeZone not equal to vector timezone: SomeFakeTimeZone", - ex.getMessage()); - - writer.endList(); - vector.setValueCount(1); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "SomeFakeTimeZone")), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.List.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - } - } - - @Test - public void testWriterGetDurationField() { - try (final ListVector vector = ListVector.empty("list", allocator)) { - org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); - writer.allocate(); - - DurationHolder durationHolder = new DurationHolder(); - durationHolder.unit = TimeUnit.MILLISECOND; - - writer.startList(); - durationHolder.value = 812374L; - writer.duration().write(durationHolder); - durationHolder.value = 143451L; - writer.duration().write(durationHolder); - - // Writing with a different unit should throw - durationHolder.unit = TimeUnit.SECOND; - durationHolder.value = 8888888; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, () -> writer.duration().write(durationHolder)); - assertEquals("holder.unit: SECOND not equal to vector unit: MILLISECOND", ex.getMessage()); - - writer.endList(); - vector.setValueCount(1); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.List.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - } - } - - @Test - public void testWriterGetFixedSizeBinaryField() throws Exception { - // Adapted from: TestComplexWriter.java:fixedSizeBinaryWriters - // test values - int numValues = 10; - int byteWidth = 9; - byte[][] values = new byte[numValues][byteWidth]; - for (int i = 0; i < numValues; i++) { - for (int j = 0; j < byteWidth; j++) { - values[i][j] = ((byte) i); - } - } - ArrowBuf[] bufs = new ArrowBuf[numValues]; - for (int i = 0; i < numValues; i++) { - bufs[i] = allocator.buffer(byteWidth); - bufs[i].setBytes(0, values[i]); - } - - try (final ListVector vector = ListVector.empty("list", allocator)) { - org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); - writer.allocate(); - - FixedSizeBinaryHolder binHolder = new FixedSizeBinaryHolder(); - binHolder.byteWidth = byteWidth; - writer.startList(); - for (int i = 0; i < numValues; i++) { - binHolder.buffer = bufs[i]; - writer.fixedSizeBinary().write(binHolder); - } - - // Writing with a different byteWidth should throw - // Note just reusing the last buffer value since that won't matter here anyway - binHolder.byteWidth = 3; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, () -> writer.fixedSizeBinary().write(binHolder)); - assertEquals("holder.byteWidth: 3 not equal to vector byteWidth: 9", ex.getMessage()); - - writer.endList(); - vector.setValueCount(1); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.FixedSizeBinary(byteWidth)), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.List.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - } - AutoCloseables.close(bufs); - } - - @Test - public void testClose() throws Exception { - try (final ListVector vector = ListVector.empty("list", allocator)) { - - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writer.startList(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endList(); - vector.setValueCount(2); - - assertTrue(vector.getBufferSize() > 0); - assertTrue(vector.getDataVector().getBufferSize() > 0); - - writer.close(); - assertEquals(0, vector.getBufferSize()); - assertEquals(0, vector.getDataVector().getBufferSize()); - } - } - - @Test - public void testGetBufferSizeFor() { - try (final ListVector vector = ListVector.empty("list", allocator)) { - - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writeIntValues(writer, new int[] {1, 2}); - writeIntValues(writer, new int[] {3, 4}); - writeIntValues(writer, new int[] {5, 6}); - writeIntValues(writer, new int[] {7, 8, 9, 10}); - writeIntValues(writer, new int[] {11, 12, 13, 14}); - writer.setValueCount(5); - - IntVector dataVector = (IntVector) vector.getDataVector(); - int[] indices = new int[] {0, 2, 4, 6, 10, 14}; - - for (int valueCount = 1; valueCount <= 5; valueCount++) { - int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); - int offsetBufferSize = (valueCount + 1) * BaseRepeatedValueVector.OFFSET_WIDTH; - - int expectedSize = - validityBufferSize - + offsetBufferSize - + dataVector.getBufferSizeFor(indices[valueCount]); - assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); - } - } - } - - @Test - public void testIsEmpty() { - try (final ListVector vector = ListVector.empty("list", allocator)) { - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - - // set values [1,2], null, [], [5,6] - writeIntValues(writer, new int[] {1, 2}); - writer.setPosition(2); - writeIntValues(writer, new int[] {}); - writeIntValues(writer, new int[] {5, 6}); - writer.setValueCount(4); - - assertFalse(vector.isEmpty(0)); - assertTrue(vector.isNull(1)); - assertTrue(vector.isEmpty(1)); - assertFalse(vector.isNull(2)); - assertTrue(vector.isEmpty(2)); - assertFalse(vector.isEmpty(3)); - } - } - - @Test - public void testTotalCapacity() { - final FieldType type = FieldType.nullable(MinorType.INT.getType()); - try (final ListVector vector = new ListVector("list", allocator, type, null)) { - // Force the child vector to be allocated based on the type - // (this is a bad API: we have to track and repeat the type twice) - vector.addOrGetVector(type); - - // Specify the allocation size but do not actually allocate - vector.setInitialTotalCapacity(10, 100); - - // Finally actually do the allocation - vector.allocateNewSafe(); - - // Note: allocator rounds up and can be greater than the requested allocation. - assertTrue(vector.getValueCapacity() >= 10); - assertTrue(vector.getDataVector().getValueCapacity() >= 100); - } - } - - @Test - public void testGetTransferPairWithField() { - try (ListVector fromVector = ListVector.empty("input", allocator)) { - UnionListWriter writer = fromVector.getWriter(); - writer.allocate(); - writer.setPosition(0); // optional - writer.startList(); - writer.bigInt().writeBigInt(1); - writer.bigInt().writeBigInt(2); - writer.bigInt().writeBigInt(3); - writer.endList(); - writer.setValueCount(1); - final TransferPair transferPair = - fromVector.getTransferPair(fromVector.getField(), allocator); - final ListVector toVector = (ListVector) transferPair.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(toVector.getField(), fromVector.getField()); - } - } - - private void writeIntValues(UnionListWriter writer, int[] values) { - writer.startList(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endList(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java deleted file mode 100644 index 639585fc48d0a..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java +++ /dev/null @@ -1,2226 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.BaseRepeatedValueVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.impl.UnionListViewWriter; -import org.apache.arrow.vector.holders.DurationHolder; -import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestListViewVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testBasicListViewVector() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - - /* allocate memory */ - listViewWriter.allocate(); - - /* write the first list at index 0 */ - listViewWriter.setPosition(0); - listViewWriter.startListView(); - - listViewWriter.bigInt().writeBigInt(12); - listViewWriter.bigInt().writeBigInt(-7); - listViewWriter.bigInt().writeBigInt(25); - listViewWriter.endListView(); - - /* the second list at index 1 is null (we are not setting any)*/ - - /* write the third list at index 2 */ - listViewWriter.setPosition(2); - listViewWriter.startListView(); - - listViewWriter.bigInt().writeBigInt(0); - listViewWriter.bigInt().writeBigInt(-127); - listViewWriter.bigInt().writeBigInt(127); - listViewWriter.bigInt().writeBigInt(50); - listViewWriter.endListView(); - - /* write the fourth list at index 3 (empty list) */ - listViewWriter.setPosition(3); - listViewWriter.startListView(); - listViewWriter.endListView(); - - /* write the fifth list at index 4 */ - listViewWriter.setPosition(4); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(1); - listViewWriter.bigInt().writeBigInt(2); - listViewWriter.bigInt().writeBigInt(3); - listViewWriter.bigInt().writeBigInt(4); - listViewWriter.endListView(); - - listViewWriter.setValueCount(5); - // check value count - assertEquals(5, listViewVector.getValueCount()); - - /* get vector at index 0 -- the value is a BigIntVector*/ - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - final FieldVector dataVec = listViewVector.getDataVector(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check data vector - assertEquals(12, ((BigIntVector) dataVec).get(0)); - assertEquals(-7, ((BigIntVector) dataVec).get(1)); - assertEquals(25, ((BigIntVector) dataVec).get(2)); - assertEquals(0, ((BigIntVector) dataVec).get(3)); - assertEquals(-127, ((BigIntVector) dataVec).get(4)); - assertEquals(127, ((BigIntVector) dataVec).get(5)); - assertEquals(50, ((BigIntVector) dataVec).get(6)); - assertEquals(1, ((BigIntVector) dataVec).get(7)); - assertEquals(2, ((BigIntVector) dataVec).get(8)); - assertEquals(3, ((BigIntVector) dataVec).get(9)); - assertEquals(4, ((BigIntVector) dataVec).get(10)); - - listViewVector.validate(); - } - } - - @Test - public void testImplicitNullVectors() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - /* allocate memory */ - listViewWriter.allocate(); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - /* write the first list at index 0 */ - listViewWriter.setPosition(0); - listViewWriter.startList(); - - listViewWriter.bigInt().writeBigInt(12); - listViewWriter.bigInt().writeBigInt(-7); - listViewWriter.bigInt().writeBigInt(25); - listViewWriter.endListView(); - - int offSet0 = offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH); - int size0 = sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH); - - // after the first list is written, - // the initial offset must be 0, - // the size must be 3 (as there are 3 elements in the array), - // the lastSet must be 0 since, the first list is written at index 0. - - assertEquals(0, offSet0); - assertEquals(3, size0); - - listViewWriter.setPosition(5); - listViewWriter.startListView(); - - // writing the 6th list at index 5, - // and the list items from index 1 through 4 are not populated. - // but since there is a gap between the 0th and 5th list, in terms - // of buffer allocation, the offset and size buffers must be updated - // to reflect the implicit null vectors. - - for (int i = 1; i < 5; i++) { - int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); - int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); - // Since the list is not written, the offset and size must equal to child vector's size - // i.e., 3, and size should be 0 as the list is not written. - // And the last set value is the value currently being written, which is 5. - assertEquals(0, offSet); - assertEquals(0, size); - } - - listViewWriter.bigInt().writeBigInt(12); - listViewWriter.bigInt().writeBigInt(25); - listViewWriter.endListView(); - - int offSet5 = offSetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH); - int size5 = sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH); - - assertEquals(3, offSet5); - assertEquals(2, size5); - - listViewWriter.setPosition(10); - listViewWriter.startListView(); - - // writing the 11th list at index 10, - // and the list items from index 6 through 10 are not populated. - // but since there is a gap between the 5th and 11th list, in terms - // of buffer allocation, the offset and size buffers must be updated - // to reflect the implicit null vectors. - for (int i = 6; i < 10; i++) { - int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); - int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); - // Since the list is not written, the offset and size must equal to 0 - // and size should be 0 as the list is not written. - // And the last set value is the value currently being written, which is 10. - assertEquals(0, offSet); - assertEquals(0, size); - } - - listViewWriter.bigInt().writeBigInt(12); - listViewWriter.endListView(); - - int offSet11 = offSetBuffer.getInt(10 * BaseRepeatedValueViewVector.OFFSET_WIDTH); - int size11 = sizeBuffer.getInt(10 * BaseRepeatedValueViewVector.SIZE_WIDTH); - - assertEquals(5, offSet11); - assertEquals(1, size11); - - listViewVector.setValueCount(11); - - listViewVector.validate(); - } - } - - @Test - public void testNestedListViewVector() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - - /* allocate memory */ - listViewWriter.allocate(); - - /* the dataVector that backs a listVector will also be a - * listVector for this test. - */ - - /* write one or more inner lists at index 0 */ - listViewWriter.setPosition(0); - listViewWriter.startListView(); - - listViewWriter.listView().startListView(); - listViewWriter.listView().bigInt().writeBigInt(50); - listViewWriter.listView().bigInt().writeBigInt(100); - listViewWriter.listView().bigInt().writeBigInt(200); - listViewWriter.listView().endListView(); - - listViewWriter.listView().startListView(); - listViewWriter.listView().bigInt().writeBigInt(75); - listViewWriter.listView().bigInt().writeBigInt(125); - listViewWriter.listView().bigInt().writeBigInt(150); - listViewWriter.listView().bigInt().writeBigInt(175); - listViewWriter.listView().endListView(); - - listViewWriter.endListView(); - - /* write one or more inner lists at index 1 */ - listViewWriter.setPosition(1); - listViewWriter.startListView(); - - listViewWriter.listView().startListView(); - listViewWriter.listView().bigInt().writeBigInt(10); - listViewWriter.listView().endListView(); - - listViewWriter.listView().startListView(); - listViewWriter.listView().bigInt().writeBigInt(15); - listViewWriter.listView().bigInt().writeBigInt(20); - listViewWriter.listView().endListView(); - - listViewWriter.listView().startListView(); - listViewWriter.listView().bigInt().writeBigInt(25); - listViewWriter.listView().bigInt().writeBigInt(30); - listViewWriter.listView().bigInt().writeBigInt(35); - listViewWriter.listView().endListView(); - - listViewWriter.endListView(); - - listViewVector.setValueCount(2); - - // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] - - assertEquals(2, listViewVector.getValueCount()); - - /* get listViewVector value at index 0 -- the value itself is a listViewVector */ - Object result = listViewVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ - assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - assertEquals(Long.valueOf(150), list.get(2)); - assertEquals(Long.valueOf(175), list.get(3)); - - /* get listViewVector value at index 1 -- the value itself is a listViewVector */ - result = listViewVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ - assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(10), list.get(0)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(2); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(listViewVector.isNull(0)); - assertFalse(listViewVector.isNull(1)); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - listViewVector.validate(); - } - } - - @Test - public void testNestedListVector() throws Exception { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - - MinorType listType = MinorType.LISTVIEW; - MinorType scalarType = MinorType.BIGINT; - - listViewVector.addOrGetVector(FieldType.nullable(listType.getType())); - - ListViewVector innerList1 = (ListViewVector) listViewVector.getDataVector(); - innerList1.addOrGetVector(FieldType.nullable(listType.getType())); - - ListViewVector innerList2 = (ListViewVector) innerList1.getDataVector(); - innerList2.addOrGetVector(FieldType.nullable(listType.getType())); - - ListViewVector innerList3 = (ListViewVector) innerList2.getDataVector(); - innerList3.addOrGetVector(FieldType.nullable(listType.getType())); - - ListViewVector innerList4 = (ListViewVector) innerList3.getDataVector(); - innerList4.addOrGetVector(FieldType.nullable(listType.getType())); - - ListViewVector innerList5 = (ListViewVector) innerList4.getDataVector(); - innerList5.addOrGetVector(FieldType.nullable(listType.getType())); - - ListViewVector innerList6 = (ListViewVector) innerList5.getDataVector(); - innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); - - listViewVector.setInitialCapacity(128); - - listViewVector.validate(); - } - } - - private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) { - for (int i = 0; i < bufValues.length; i++) { - buffer.setInt(i * bufWidth, bufValues[i]); - } - } - - /* - * Setting up the buffers directly needs to be validated with the base method used in - * the ListVector class where we use the approach of startListView(), - * write to the child vector and endListView(). - *

    - * To support this, we have to consider the following scenarios; - *

    - * 1. Only using directly buffer-based inserts. - * 2. Default list insertion followed by buffer-based inserts. - * 3. Buffer-based inserts followed by default list insertion. - */ - - /* Setting up buffers directly would require the following steps to be taken - * 0. Allocate buffers in listViewVector by calling `allocateNew` method. - * 1. Initialize the child vector using `initializeChildrenFromFields` method. - * 2. Set values in the child vector. - * 3. Set validity, offset and size buffers using `setValidity`, - * `setOffset` and `setSize` methods. - * 4. Set value count using `setValueCount` method. - */ - @Test - public void testBasicListViewSet() { - - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - listViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); - Field field = new Field("child-vector", fieldType, null); - listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = listViewVector.getDataVector(); - fieldVector.clear(); - - BigIntVector childVector = (BigIntVector) fieldVector; - childVector.allocateNew(7); - - childVector.set(0, 12); - childVector.set(1, -7); - childVector.set(2, 25); - childVector.set(3, 0); - childVector.set(4, -127); - childVector.set(5, 127); - childVector.set(6, 50); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - listViewVector.setOffset(0, 0); - listViewVector.setOffset(1, 3); - listViewVector.setOffset(2, 3); - listViewVector.setOffset(3, 7); - - listViewVector.setSize(0, 3); - listViewVector.setSize(1, 0); - listViewVector.setSize(2, 4); - listViewVector.setSize(3, 0); - - listViewVector.setValidity(0, 1); - listViewVector.setValidity(1, 0); - listViewVector.setValidity(2, 1); - listViewVector.setValidity(3, 1); - - // Set value count using `setValueCount` method. - listViewVector.setValueCount(4); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - - listViewVector.validate(); - } - } - - @Test - public void testBasicListViewSetNested() { - // Expected listview - // [[[50,100,200],[75,125,150,175]],[[10],[15,20],[25,30,35]]] - - // Setting child vector - // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]] - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - listViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - FieldType fieldType = new FieldType(true, new ArrowType.ListView(), null, null); - FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); - Field childField = new Field("child-vector", childFieldType, null); - List children = new ArrayList<>(); - children.add(childField); - Field field = new Field("child-vector", fieldType, children); - listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = listViewVector.getDataVector(); - fieldVector.clear(); - - ListViewVector childVector = (ListViewVector) fieldVector; - UnionListViewWriter listViewWriter = childVector.getWriter(); - listViewWriter.allocate(); - - listViewWriter.setPosition(0); - listViewWriter.startListView(); - - listViewWriter.bigInt().writeBigInt(50); - listViewWriter.bigInt().writeBigInt(100); - listViewWriter.bigInt().writeBigInt(200); - - listViewWriter.endListView(); - - listViewWriter.setPosition(1); - listViewWriter.startListView(); - - listViewWriter.bigInt().writeBigInt(75); - listViewWriter.bigInt().writeBigInt(125); - listViewWriter.bigInt().writeBigInt(150); - listViewWriter.bigInt().writeBigInt(175); - - listViewWriter.endListView(); - - listViewWriter.setPosition(2); - listViewWriter.startListView(); - - listViewWriter.bigInt().writeBigInt(10); - - listViewWriter.endListView(); - - listViewWriter.startListView(); - listViewWriter.setPosition(3); - - listViewWriter.bigInt().writeBigInt(15); - listViewWriter.bigInt().writeBigInt(20); - - listViewWriter.endListView(); - - listViewWriter.startListView(); - listViewWriter.setPosition(4); - - listViewWriter.bigInt().writeBigInt(25); - listViewWriter.bigInt().writeBigInt(30); - listViewWriter.bigInt().writeBigInt(35); - - listViewWriter.endListView(); - - childVector.setValueCount(5); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - - listViewVector.setValidity(0, 1); - listViewVector.setValidity(1, 1); - - listViewVector.setOffset(0, 0); - listViewVector.setOffset(1, 2); - - listViewVector.setSize(0, 2); - listViewVector.setSize(1, 3); - - // Set value count using `setValueCount` method. - listViewVector.setValueCount(2); - - assertEquals(2, listViewVector.getValueCount()); - - /* get listViewVector value at index 0 -- the value itself is a listViewVector */ - Object result = listViewVector.getObject(0); - ArrayList> resultSet = (ArrayList>) result; - ArrayList list; - - assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ - assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ - assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - assertEquals(Long.valueOf(150), list.get(2)); - assertEquals(Long.valueOf(175), list.get(3)); - - /* get listViewVector value at index 1 -- the value itself is a listViewVector */ - result = listViewVector.getObject(1); - resultSet = (ArrayList>) result; - - assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ - assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ - assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ - assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ - - list = resultSet.get(0); - assertEquals(Long.valueOf(10), list.get(0)); - - list = resultSet.get(1); - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - list = resultSet.get(2); - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(listViewVector.isNull(0)); - assertFalse(listViewVector.isNull(1)); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - listViewVector.validate(); - } - } - - @Test - public void testBasicListViewSetWithListViewWriter() { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - listViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); - Field field = new Field("child-vector", fieldType, null); - listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = listViewVector.getDataVector(); - fieldVector.clear(); - - BigIntVector childVector = (BigIntVector) fieldVector; - childVector.allocateNew(7); - - childVector.set(0, 12); - childVector.set(1, -7); - childVector.set(2, 25); - childVector.set(3, 0); - childVector.set(4, -127); - childVector.set(5, 127); - childVector.set(6, 50); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - - listViewVector.setValidity(0, 1); - listViewVector.setValidity(1, 0); - listViewVector.setValidity(2, 1); - listViewVector.setValidity(3, 1); - - listViewVector.setOffset(0, 0); - listViewVector.setOffset(1, 3); - listViewVector.setOffset(2, 3); - listViewVector.setOffset(3, 7); - - listViewVector.setSize(0, 3); - listViewVector.setSize(1, 0); - listViewVector.setSize(2, 4); - listViewVector.setSize(3, 0); - - // Set value count using `setValueCount` method. - listViewVector.setValueCount(4); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - - listViewWriter.setPosition(4); - listViewWriter.startListView(); - - listViewWriter.bigInt().writeBigInt(121); - listViewWriter.bigInt().writeBigInt(-71); - listViewWriter.bigInt().writeBigInt(251); - listViewWriter.endListView(); - - listViewVector.setValueCount(5); - - // check offset buffer - assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check values - assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); - assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); - assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); - assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); - - listViewVector.validate(); - } - } - - @Test - public void testGetBufferAddress() throws Exception { - try (ListViewVector listViewVector = ListViewVector.empty("vector", allocator)) { - - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - boolean error = false; - - listViewWriter.allocate(); - - listViewWriter.setPosition(0); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(50); - listViewWriter.bigInt().writeBigInt(100); - listViewWriter.bigInt().writeBigInt(200); - listViewWriter.endListView(); - - listViewWriter.setPosition(1); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(250); - listViewWriter.bigInt().writeBigInt(300); - listViewWriter.endListView(); - - listViewVector.setValueCount(2); - - /* check listVector contents */ - Object result = listViewVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(50), resultSet.get(0)); - assertEquals(Long.valueOf(100), resultSet.get(1)); - assertEquals(Long.valueOf(200), resultSet.get(2)); - - result = listViewVector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(250), resultSet.get(0)); - assertEquals(Long.valueOf(300), resultSet.get(1)); - - List buffers = listViewVector.getFieldBuffers(); - - long bitAddress = listViewVector.getValidityBufferAddress(); - long offsetAddress = listViewVector.getOffsetBufferAddress(); - long sizeAddress = listViewVector.getSizeBufferAddress(); - - try { - listViewVector.getDataBufferAddress(); - } catch (UnsupportedOperationException ue) { - error = true; - } finally { - assertTrue(error); - } - - assertEquals(3, buffers.size()); - assertEquals(bitAddress, buffers.get(0).memoryAddress()); - assertEquals(offsetAddress, buffers.get(1).memoryAddress()); - assertEquals(sizeAddress, buffers.get(2).memoryAddress()); - - /* (3+2)/2 */ - assertEquals(2.5, listViewVector.getDensity(), 0); - listViewVector.validate(); - } - } - - @Test - public void testConsistentChildName() throws Exception { - try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { - String emptyListStr = listViewVector.getField().toString(); - assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); - - listViewVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - String emptyVectorStr = listViewVector.getField().toString(); - assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); - } - } - - @Test - public void testSetInitialCapacity() { - try (final ListViewVector vector = ListViewVector.empty("", allocator)) { - vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - - vector.setInitialCapacity(512); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 512); - - vector.setInitialCapacity(512, 4); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); - - vector.setInitialCapacity(512, 0.1); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 51); - - vector.setInitialCapacity(512, 0.01); - vector.allocateNew(); - assertEquals(512, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 5); - - vector.setInitialCapacity(5, 0.1); - vector.allocateNew(); - assertEquals(8, vector.getValueCapacity()); - assertTrue(vector.getDataVector().getValueCapacity() >= 1); - - vector.validate(); - } - } - - @Test - public void testClearAndReuse() { - try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { - BigIntVector bigIntVector = - (BigIntVector) - vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); - vector.setInitialCapacity(10); - vector.allocateNew(); - - vector.startNewValue(0); - bigIntVector.setSafe(0, 7); - vector.endValue(0, 1); - vector.startNewValue(1); - bigIntVector.setSafe(1, 8); - vector.endValue(1, 1); - vector.setValueCount(2); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(Long.valueOf(7), resultSet.get(0)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(8), resultSet.get(0)); - - // Clear and release the buffers to trigger a realloc when adding next value - vector.clear(); - - // The list vector should reuse a buffer when reallocating the offset buffer - vector.startNewValue(0); - bigIntVector.setSafe(0, 7); - vector.endValue(0, 1); - vector.startNewValue(1); - bigIntVector.setSafe(1, 8); - vector.endValue(1, 1); - vector.setValueCount(2); - - result = vector.getObject(0); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(7), resultSet.get(0)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(Long.valueOf(8), resultSet.get(0)); - - vector.validate(); - } - } - - @Test - public void testWriterGetField() { - // adopted from ListVector test cases - try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { - - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writer.startListView(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endListView(); - vector.setValueCount(2); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Int(32, true)), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.ListView.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - - vector.validate(); - } - } - - @Test - public void testWriterUsingHolderGetTimestampMilliTZField() { - // adopted from ListVector test cases - try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { - org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); - writer.allocate(); - - TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); - holder.timezone = "SomeFakeTimeZone"; - writer.startListView(); - holder.value = 12341234L; - writer.timeStampMilliTZ().write(holder); - holder.value = 55555L; - writer.timeStampMilliTZ().write(holder); - - // Writing with a different timezone should throw - holder.timezone = "AsdfTimeZone"; - holder.value = 77777; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, () -> writer.timeStampMilliTZ().write(holder)); - assertEquals( - "holder.timezone: AsdfTimeZone not equal to vector timezone: SomeFakeTimeZone", - ex.getMessage()); - - writer.endListView(); - vector.setValueCount(1); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "SomeFakeTimeZone")), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.ListView.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - - vector.validate(); - } - } - - @Test - public void testWriterGetDurationField() { - // adopted from ListVector test cases - try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { - org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); - writer.allocate(); - - DurationHolder durationHolder = new DurationHolder(); - durationHolder.unit = TimeUnit.MILLISECOND; - - writer.startListView(); - durationHolder.value = 812374L; - writer.duration().write(durationHolder); - durationHolder.value = 143451L; - writer.duration().write(durationHolder); - - // Writing with a different unit should throw - durationHolder.unit = TimeUnit.SECOND; - durationHolder.value = 8888888; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, () -> writer.duration().write(durationHolder)); - assertEquals("holder.unit: SECOND not equal to vector unit: MILLISECOND", ex.getMessage()); - - writer.endListView(); - vector.setValueCount(1); - - Field expectedDataField = - new Field( - BaseRepeatedValueVector.DATA_VECTOR_NAME, - FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), - null); - Field expectedField = - new Field( - vector.getName(), - FieldType.nullable(ArrowType.ListView.INSTANCE), - Arrays.asList(expectedDataField)); - - assertEquals(expectedField, writer.getField()); - - vector.validate(); - } - } - - @Test - public void testClose() throws Exception { - try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { - - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writer.startListView(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.endListView(); - vector.setValueCount(2); - - assertTrue(vector.getBufferSize() > 0); - assertTrue(vector.getDataVector().getBufferSize() > 0); - - writer.close(); - assertEquals(0, vector.getBufferSize()); - assertEquals(0, vector.getDataVector().getBufferSize()); - - vector.validate(); - } - } - - @Test - public void testGetBufferSizeFor() { - try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { - - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - // set some values - writeIntValues(writer, new int[] {1, 2}); - writeIntValues(writer, new int[] {3, 4}); - writeIntValues(writer, new int[] {5, 6}); - writeIntValues(writer, new int[] {7, 8, 9, 10}); - writeIntValues(writer, new int[] {11, 12, 13, 14}); - writer.setValueCount(5); - - IntVector dataVector = (IntVector) vector.getDataVector(); - int[] indices = new int[] {0, 2, 4, 6, 10, 14}; - - for (int valueCount = 1; valueCount <= 5; valueCount++) { - int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); - int offsetBufferSize = valueCount * BaseRepeatedValueViewVector.OFFSET_WIDTH; - int sizeBufferSize = valueCount * BaseRepeatedValueViewVector.SIZE_WIDTH; - - int expectedSize = - validityBufferSize - + offsetBufferSize - + sizeBufferSize - + dataVector.getBufferSizeFor(indices[valueCount]); - assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); - } - vector.validate(); - } - } - - @Test - public void testIsEmpty() { - try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - // set values [1,2], null, [], [5,6] - writeIntValues(writer, new int[] {1, 2}); - writer.setPosition(2); - writeIntValues(writer, new int[] {}); - writeIntValues(writer, new int[] {5, 6}); - writer.setValueCount(4); - - assertFalse(vector.isEmpty(0)); - assertTrue(vector.isNull(1)); - assertTrue(vector.isEmpty(1)); - assertFalse(vector.isNull(2)); - assertTrue(vector.isEmpty(2)); - assertFalse(vector.isEmpty(3)); - - vector.validate(); - } - } - - @Test - public void testTotalCapacity() { - // adopted from ListVector test cases - final FieldType type = FieldType.nullable(MinorType.INT.getType()); - try (final ListViewVector vector = new ListViewVector("listview", allocator, type, null)) { - // Force the child vector to be allocated based on the type - // (this is a bad API: we have to track and repeat the type twice) - vector.addOrGetVector(type); - - // Specify the allocation size but do not allocate - vector.setInitialTotalCapacity(10, 100); - - // Finally, actually do the allocation - vector.allocateNewSafe(); - - // Note: allocator rounds up and can be greater than the requested allocation. - assertTrue(vector.getValueCapacity() >= 10); - assertTrue(vector.getDataVector().getValueCapacity() >= 100); - - vector.validate(); - } - } - - @Test - public void testSetNull1() { - try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.endListView(); - - vector.setNull(1); - - writer.setPosition(2); - writer.startListView(); - writer.bigInt().writeBigInt(30); - writer.bigInt().writeBigInt(40); - writer.endListView(); - - vector.setNull(3); - vector.setNull(4); - - writer.setPosition(5); - writer.startListView(); - writer.bigInt().writeBigInt(50); - writer.bigInt().writeBigInt(60); - writer.endListView(); - - vector.setValueCount(6); - - assertFalse(vector.isNull(0)); - assertTrue(vector.isNull(1)); - assertFalse(vector.isNull(2)); - assertTrue(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertFalse(vector.isNull(5)); - - // validate buffers - - final ArrowBuf validityBuffer = vector.getValidityBuffer(); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = vector.getSizeBuffer(); - - assertEquals(1, BitVectorHelper.get(validityBuffer, 0)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 1)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 2)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 3)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); - - assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(4, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // validate values - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(10), resultSet.get(0)); - assertEquals(Long.valueOf(20), resultSet.get(1)); - - result = vector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(30), resultSet.get(0)); - assertEquals(Long.valueOf(40), resultSet.get(1)); - - result = vector.getObject(5); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(50), resultSet.get(0)); - assertEquals(Long.valueOf(60), resultSet.get(1)); - - vector.validate(); - } - } - - @Test - public void testSetNull2() { - try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { - // validate setting nulls first and then writing values - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - vector.setNull(0); - vector.setNull(2); - vector.setNull(4); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.bigInt().writeBigInt(30); - writer.endListView(); - - writer.setPosition(3); - writer.startListView(); - writer.bigInt().writeBigInt(40); - writer.bigInt().writeBigInt(50); - writer.endListView(); - - writer.setPosition(5); - writer.startListView(); - writer.bigInt().writeBigInt(60); - writer.bigInt().writeBigInt(70); - writer.bigInt().writeBigInt(80); - writer.endListView(); - - vector.setValueCount(6); - - assertTrue(vector.isNull(0)); - assertFalse(vector.isNull(1)); - assertTrue(vector.isNull(2)); - assertFalse(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertFalse(vector.isNull(5)); - - // validate buffers - - final ArrowBuf validityBuffer = vector.getValidityBuffer(); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = vector.getSizeBuffer(); - - assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); - - assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // validate values - - Object result = vector.getObject(1); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(10), resultSet.get(0)); - assertEquals(Long.valueOf(20), resultSet.get(1)); - assertEquals(Long.valueOf(30), resultSet.get(2)); - - result = vector.getObject(3); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(40), resultSet.get(0)); - assertEquals(Long.valueOf(50), resultSet.get(1)); - - result = vector.getObject(5); - resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(60), resultSet.get(0)); - assertEquals(Long.valueOf(70), resultSet.get(1)); - assertEquals(Long.valueOf(80), resultSet.get(2)); - - vector.validate(); - } - } - - @Test - public void testSetNull3() { - try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { - // validate setting values first and then writing nulls - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.bigInt().writeBigInt(30); - writer.endListView(); - - writer.setPosition(3); - writer.startListView(); - writer.bigInt().writeBigInt(40); - writer.bigInt().writeBigInt(50); - writer.endListView(); - - writer.setPosition(5); - writer.startListView(); - writer.bigInt().writeBigInt(60); - writer.bigInt().writeBigInt(70); - writer.bigInt().writeBigInt(80); - writer.endListView(); - - vector.setNull(0); - vector.setNull(2); - vector.setNull(4); - - vector.setValueCount(6); - - assertTrue(vector.isNull(0)); - assertFalse(vector.isNull(1)); - assertTrue(vector.isNull(2)); - assertFalse(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertFalse(vector.isNull(5)); - - // validate buffers - - final ArrowBuf validityBuffer = vector.getValidityBuffer(); - final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = vector.getSizeBuffer(); - - assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); - assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); - assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); - - assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // validate values - - Object result = vector.getObject(1); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(10), resultSet.get(0)); - assertEquals(Long.valueOf(20), resultSet.get(1)); - assertEquals(Long.valueOf(30), resultSet.get(2)); - - result = vector.getObject(3); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(40), resultSet.get(0)); - assertEquals(Long.valueOf(50), resultSet.get(1)); - - result = vector.getObject(5); - resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(60), resultSet.get(0)); - assertEquals(Long.valueOf(70), resultSet.get(1)); - assertEquals(Long.valueOf(80), resultSet.get(2)); - - vector.validate(); - } - } - - @Test - public void testOverWrite1() { - try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.bigInt().writeBigInt(30); - writer.endListView(); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(40); - writer.bigInt().writeBigInt(50); - writer.endListView(); - - vector.setValueCount(2); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(60); - writer.bigInt().writeBigInt(70); - writer.endListView(); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(80); - writer.bigInt().writeBigInt(90); - writer.endListView(); - - vector.setValueCount(2); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(60), resultSet.get(0)); - assertEquals(Long.valueOf(70), resultSet.get(1)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(80), resultSet.get(0)); - assertEquals(Long.valueOf(90), resultSet.get(1)); - - vector.validate(); - } - } - - @Test - public void testOverwriteWithNull() { - try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - ArrowBuf sizeBuffer = vector.getSizeBuffer(); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(10); - writer.bigInt().writeBigInt(20); - writer.bigInt().writeBigInt(30); - writer.endListView(); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(40); - writer.bigInt().writeBigInt(50); - writer.endListView(); - - vector.setValueCount(2); - - assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - vector.setNull(0); - - assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - vector.setNull(1); - - assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - assertTrue(vector.isNull(0)); - assertTrue(vector.isNull(1)); - - writer.setPosition(0); - writer.startListView(); - writer.bigInt().writeBigInt(60); - writer.bigInt().writeBigInt(70); - writer.endListView(); - - assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - writer.setPosition(1); - writer.startListView(); - writer.bigInt().writeBigInt(80); - writer.bigInt().writeBigInt(90); - writer.endListView(); - - assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - vector.setValueCount(2); - - assertFalse(vector.isNull(0)); - assertFalse(vector.isNull(1)); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(60), resultSet.get(0)); - assertEquals(Long.valueOf(70), resultSet.get(1)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(80), resultSet.get(0)); - assertEquals(Long.valueOf(90), resultSet.get(1)); - - vector.validate(); - } - } - - @Test - public void testOutOfOrderOffset1() { - // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - listViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), null, null); - Field field = new Field("child-vector", fieldType, null); - listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = listViewVector.getDataVector(); - fieldVector.clear(); - - SmallIntVector childVector = (SmallIntVector) fieldVector; - - childVector.allocateNew(7); - - childVector.set(0, 0); - childVector.set(1, -127); - childVector.set(2, 127); - childVector.set(3, 50); - childVector.set(4, 12); - childVector.set(5, -7); - childVector.set(6, 25); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - listViewVector.setValidity(0, 1); - listViewVector.setValidity(1, 0); - listViewVector.setValidity(2, 1); - listViewVector.setValidity(3, 1); - listViewVector.setValidity(4, 1); - - listViewVector.setOffset(0, 4); - listViewVector.setOffset(1, 7); - listViewVector.setOffset(2, 0); - listViewVector.setOffset(3, 0); - listViewVector.setOffset(4, 3); - - listViewVector.setSize(0, 3); - listViewVector.setSize(1, 0); - listViewVector.setSize(2, 4); - listViewVector.setSize(3, 0); - listViewVector.setSize(4, 2); - - // Set value count using `setValueCount` method. - listViewVector.setValueCount(5); - - final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); - - // check offset buffer - assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check child vector - assertEquals(0, ((SmallIntVector) listViewVector.getDataVector()).get(0)); - assertEquals(-127, ((SmallIntVector) listViewVector.getDataVector()).get(1)); - assertEquals(127, ((SmallIntVector) listViewVector.getDataVector()).get(2)); - assertEquals(50, ((SmallIntVector) listViewVector.getDataVector()).get(3)); - assertEquals(12, ((SmallIntVector) listViewVector.getDataVector()).get(4)); - assertEquals(-7, ((SmallIntVector) listViewVector.getDataVector()).get(5)); - assertEquals(25, ((SmallIntVector) listViewVector.getDataVector()).get(6)); - - // check values - Object result = listViewVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Short.valueOf("12"), resultSet.get(0)); - assertEquals(Short.valueOf("-7"), resultSet.get(1)); - assertEquals(Short.valueOf("25"), resultSet.get(2)); - - assertTrue(listViewVector.isNull(1)); - - result = listViewVector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(4, resultSet.size()); - assertEquals(Short.valueOf("0"), resultSet.get(0)); - assertEquals(Short.valueOf("-127"), resultSet.get(1)); - assertEquals(Short.valueOf("127"), resultSet.get(2)); - assertEquals(Short.valueOf("50"), resultSet.get(3)); - - assertTrue(listViewVector.isEmpty(3)); - - result = listViewVector.getObject(4); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Short.valueOf("50"), resultSet.get(0)); - assertEquals(Short.valueOf("12"), resultSet.get(1)); - - listViewVector.validate(); - } - } - - private int validateSizeBufferAndCalculateMinOffset( - int start, - int splitLength, - ArrowBuf fromOffsetBuffer, - ArrowBuf fromSizeBuffer, - ArrowBuf toSizeBuffer) { - int minOffset = fromOffsetBuffer.getInt((long) start * ListViewVector.OFFSET_WIDTH); - int fromDataLength; - int toDataLength; - - for (int i = 0; i < splitLength; i++) { - fromDataLength = fromSizeBuffer.getInt((long) (start + i) * ListViewVector.SIZE_WIDTH); - toDataLength = toSizeBuffer.getInt((long) (i) * ListViewVector.SIZE_WIDTH); - - /* validate size */ - assertEquals( - fromDataLength, - toDataLength, - "Different data lengths at index: " + i + " and start: " + start); - - /* calculate minimum offset */ - int currentOffset = fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH); - if (currentOffset < minOffset) { - minOffset = currentOffset; - } - } - - return minOffset; - } - - private void validateOffsetBuffer( - int start, - int splitLength, - ArrowBuf fromOffsetBuffer, - ArrowBuf toOffsetBuffer, - int minOffset) { - int offset1; - int offset2; - - for (int i = 0; i < splitLength; i++) { - offset1 = fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH); - offset2 = toOffsetBuffer.getInt((long) (i) * ListViewVector.OFFSET_WIDTH); - assertEquals( - offset1 - minOffset, - offset2, - "Different offset values at index: " + i + " and start: " + start); - } - } - - private void validateDataBuffer( - int start, - int splitLength, - ArrowBuf fromOffsetBuffer, - ArrowBuf fromSizeBuffer, - BigIntVector fromDataVector, - ArrowBuf toOffsetBuffer, - BigIntVector toDataVector) { - int dataLength; - Long fromValue; - for (int i = 0; i < splitLength; i++) { - dataLength = fromSizeBuffer.getInt((long) (start + i) * ListViewVector.SIZE_WIDTH); - for (int j = 0; j < dataLength; j++) { - fromValue = - fromDataVector.getObject( - (fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH) + j)); - Long toValue = - toDataVector.getObject( - (toOffsetBuffer.getInt((long) i * ListViewVector.OFFSET_WIDTH) + j)); - assertEquals( - fromValue, toValue, "Different data values at index: " + i + " and start: " + start); - } - } - } - - /** - * Validate split and transfer of data from fromVector to toVector. Note that this method assumes - * that the child vector is BigIntVector. - * - * @param start start index - * @param splitLength length of data to split and transfer - * @param fromVector fromVector - * @param toVector toVector - */ - private void validateSplitAndTransfer( - TransferPair transferPair, - int start, - int splitLength, - ListViewVector fromVector, - ListViewVector toVector) { - - transferPair.splitAndTransfer(start, splitLength); - - /* get offsetBuffer of toVector */ - final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); - - /* get sizeBuffer of toVector */ - final ArrowBuf toSizeBuffer = toVector.getSizeBuffer(); - - /* get dataVector of toVector */ - BigIntVector toDataVector = (BigIntVector) toVector.getDataVector(); - - /* get offsetBuffer of toVector */ - final ArrowBuf fromOffsetBuffer = fromVector.getOffsetBuffer(); - - /* get sizeBuffer of toVector */ - final ArrowBuf fromSizeBuffer = fromVector.getSizeBuffer(); - - /* get dataVector of toVector */ - BigIntVector fromDataVector = (BigIntVector) fromVector.getDataVector(); - - /* validate size buffers */ - int minOffset = - validateSizeBufferAndCalculateMinOffset( - start, splitLength, fromOffsetBuffer, fromSizeBuffer, toSizeBuffer); - /* validate offset buffers */ - validateOffsetBuffer(start, splitLength, fromOffsetBuffer, toOffsetBuffer, minOffset); - /* validate data */ - validateDataBuffer( - start, - splitLength, - fromOffsetBuffer, - fromSizeBuffer, - fromDataVector, - toOffsetBuffer, - toDataVector); - } - - @Test - public void testSplitAndTransfer() throws Exception { - try (ListViewVector fromVector = ListViewVector.empty("sourceVector", allocator)) { - - /* Explicitly add the dataVector */ - MinorType type = MinorType.BIGINT; - fromVector.addOrGetVector(FieldType.nullable(type.getType())); - - UnionListViewWriter listViewWriter = fromVector.getWriter(); - - /* allocate memory */ - listViewWriter.allocate(); - - /* populate data */ - listViewWriter.setPosition(0); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(10); - listViewWriter.bigInt().writeBigInt(11); - listViewWriter.bigInt().writeBigInt(12); - listViewWriter.endListView(); - - listViewWriter.setPosition(1); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(13); - listViewWriter.bigInt().writeBigInt(14); - listViewWriter.endListView(); - - listViewWriter.setPosition(2); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(15); - listViewWriter.bigInt().writeBigInt(16); - listViewWriter.bigInt().writeBigInt(17); - listViewWriter.bigInt().writeBigInt(18); - listViewWriter.endListView(); - - listViewWriter.setPosition(3); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(19); - listViewWriter.endListView(); - - listViewWriter.setPosition(4); - listViewWriter.startListView(); - listViewWriter.bigInt().writeBigInt(20); - listViewWriter.bigInt().writeBigInt(21); - listViewWriter.bigInt().writeBigInt(22); - listViewWriter.bigInt().writeBigInt(23); - listViewWriter.endListView(); - - fromVector.setValueCount(5); - - /* get offset buffer */ - final ArrowBuf offsetBuffer = fromVector.getOffsetBuffer(); - - /* get size buffer */ - final ArrowBuf sizeBuffer = fromVector.getSizeBuffer(); - - /* get dataVector */ - BigIntVector dataVector = (BigIntVector) fromVector.getDataVector(); - - /* check the vector output */ - - int index = 0; - int offset; - int size = 0; - Long actual; - - /* index 0 */ - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(0), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(10), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(11), actual); - offset++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(12), actual); - assertEquals( - Integer.toString(3), - Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); - - /* index 1 */ - index++; - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(3), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(13), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(14), actual); - size++; - assertEquals( - Integer.toString(size), - Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); - - /* index 2 */ - size = 0; - index++; - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(5), Integer.toString(offset)); - size++; - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(15), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(16), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(17), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(18), actual); - assertEquals( - Integer.toString(size), - Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); - - /* index 3 */ - size = 0; - index++; - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(9), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(19), actual); - size++; - assertEquals( - Integer.toString(size), - Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); - - /* index 4 */ - size = 0; - index++; - assertFalse(fromVector.isNull(index)); - offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH); - assertEquals(Integer.toString(10), Integer.toString(offset)); - - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(20), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(21), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(22), actual); - offset++; - size++; - actual = dataVector.getObject(offset); - assertEquals(Long.valueOf(23), actual); - size++; - assertEquals( - Integer.toString(size), - Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH))); - - /* do split and transfer */ - try (ListViewVector toVector = ListViewVector.empty("toVector", allocator)) { - int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}}; - TransferPair transferPair = fromVector.makeTransferPair(toVector); - - for (final int[] transferLength : transferLengths) { - int start = transferLength[0]; - int splitLength = transferLength[1]; - validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector); - } - } - } - } - - @Test - public void testOutOfOrderOffsetSplitAndTransfer() { - // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] - try (ListViewVector fromVector = ListViewVector.empty("fromVector", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - fromVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); - Field field = new Field("child-vector", fieldType, null); - fromVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = fromVector.getDataVector(); - fieldVector.clear(); - - BigIntVector childVector = (BigIntVector) fieldVector; - - childVector.allocateNew(7); - - childVector.set(0, 0); - childVector.set(1, -127); - childVector.set(2, 127); - childVector.set(3, 50); - childVector.set(4, 12); - childVector.set(5, -7); - childVector.set(6, 25); - - childVector.setValueCount(7); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - fromVector.setValidity(0, 1); - fromVector.setValidity(1, 0); - fromVector.setValidity(2, 1); - fromVector.setValidity(3, 1); - fromVector.setValidity(4, 1); - - fromVector.setOffset(0, 4); - fromVector.setOffset(1, 7); - fromVector.setOffset(2, 0); - fromVector.setOffset(3, 0); - fromVector.setOffset(4, 3); - - fromVector.setSize(0, 3); - fromVector.setSize(1, 0); - fromVector.setSize(2, 4); - fromVector.setSize(3, 0); - fromVector.setSize(4, 2); - - // Set value count using `setValueCount` method. - fromVector.setValueCount(5); - - final ArrowBuf offSetBuffer = fromVector.getOffsetBuffer(); - final ArrowBuf sizeBuffer = fromVector.getSizeBuffer(); - - // check offset buffer - assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); - - // check size buffer - assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); - - // check child vector - assertEquals(0, ((BigIntVector) fromVector.getDataVector()).get(0)); - assertEquals(-127, ((BigIntVector) fromVector.getDataVector()).get(1)); - assertEquals(127, ((BigIntVector) fromVector.getDataVector()).get(2)); - assertEquals(50, ((BigIntVector) fromVector.getDataVector()).get(3)); - assertEquals(12, ((BigIntVector) fromVector.getDataVector()).get(4)); - assertEquals(-7, ((BigIntVector) fromVector.getDataVector()).get(5)); - assertEquals(25, ((BigIntVector) fromVector.getDataVector()).get(6)); - - // check values - Object result = fromVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - assertEquals(Long.valueOf(12), resultSet.get(0)); - assertEquals(Long.valueOf(-7), resultSet.get(1)); - assertEquals(Long.valueOf(25), resultSet.get(2)); - - assertTrue(fromVector.isNull(1)); - - result = fromVector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(4, resultSet.size()); - assertEquals(Long.valueOf(0), resultSet.get(0)); - assertEquals(Long.valueOf(-127), resultSet.get(1)); - assertEquals(Long.valueOf(127), resultSet.get(2)); - assertEquals(Long.valueOf(50), resultSet.get(3)); - - assertTrue(fromVector.isEmpty(3)); - - result = fromVector.getObject(4); - resultSet = (ArrayList) result; - assertEquals(2, resultSet.size()); - assertEquals(Long.valueOf(50), resultSet.get(0)); - assertEquals(Long.valueOf(12), resultSet.get(1)); - - fromVector.validate(); - - /* do split and transfer */ - try (ListViewVector toVector = ListViewVector.empty("toVector", allocator)) { - int[][] transferLengths = {{2, 3}, {0, 1}, {0, 3}}; - TransferPair transferPair = fromVector.makeTransferPair(toVector); - - for (final int[] transferLength : transferLengths) { - int start = transferLength[0]; - int splitLength = transferLength[1]; - validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector); - } - } - } - } - - @Test - public void testRangeChildVector1() { - /* - * Non-overlapping ranges - * offsets: [0, 2] - * sizes: [4, 1] - * values: [0, 1, 2, 3] - * - * vector: [[0, 1, 2, 3], [2]] - * */ - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - listViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null); - Field field = new Field("child-vector", fieldType, null); - listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = listViewVector.getDataVector(); - fieldVector.clear(); - - IntVector childVector = (IntVector) fieldVector; - - childVector.allocateNew(8); - - childVector.set(0, 0); - childVector.set(1, 1); - childVector.set(2, 2); - childVector.set(3, 3); - childVector.set(4, 4); - childVector.set(5, 5); - childVector.set(6, 6); - childVector.set(7, 7); - - childVector.setValueCount(8); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - listViewVector.setValidity(0, 1); - listViewVector.setValidity(1, 1); - - listViewVector.setOffset(0, 0); - listViewVector.setOffset(1, 2); - - listViewVector.setSize(0, 4); - listViewVector.setSize(1, 1); - - assertEquals(8, listViewVector.getDataVector().getValueCount()); - - listViewVector.setValueCount(2); - assertEquals(4, listViewVector.getDataVector().getValueCount()); - - IntVector childVector1 = (IntVector) listViewVector.getDataVector(); - final ArrowBuf dataBuffer = childVector1.getDataBuffer(); - final ArrowBuf validityBuffer = childVector1.getValidityBuffer(); - - // yet the underneath buffer contains the original buffer - for (int i = 0; i < validityBuffer.capacity(); i++) { - assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH)); - } - } - } - - @Test - public void testRangeChildVector2() { - /* - * Overlapping ranges - * offsets: [0, 2] - * sizes: [3, 1] - * values: [0, 1, 2, 3] - * - * vector: [[1, 2, 3], [2]] - * */ - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - // Allocate buffers in listViewVector by calling `allocateNew` method. - listViewVector.allocateNew(); - - // Initialize the child vector using `initializeChildrenFromFields` method. - - FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null); - Field field = new Field("child-vector", fieldType, null); - listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); - - // Set values in the child vector. - FieldVector fieldVector = listViewVector.getDataVector(); - fieldVector.clear(); - - IntVector childVector = (IntVector) fieldVector; - - childVector.allocateNew(8); - - childVector.set(0, 0); - childVector.set(1, 1); - childVector.set(2, 2); - childVector.set(3, 3); - childVector.set(4, 4); - childVector.set(5, 5); - childVector.set(6, 6); - childVector.set(7, 7); - - childVector.setValueCount(8); - - // Set validity, offset and size buffers using `setValidity`, - // `setOffset` and `setSize` methods. - listViewVector.setValidity(0, 1); - listViewVector.setValidity(1, 1); - - listViewVector.setOffset(0, 1); - listViewVector.setOffset(1, 2); - - listViewVector.setSize(0, 3); - listViewVector.setSize(1, 1); - - assertEquals(8, listViewVector.getDataVector().getValueCount()); - - listViewVector.setValueCount(2); - assertEquals(4, listViewVector.getDataVector().getValueCount()); - - IntVector childVector1 = (IntVector) listViewVector.getDataVector(); - final ArrowBuf dataBuffer = childVector1.getDataBuffer(); - final ArrowBuf validityBuffer = childVector1.getValidityBuffer(); - - // yet the underneath buffer contains the original buffer - for (int i = 0; i < validityBuffer.capacity(); i++) { - assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH)); - } - } - } - - private void writeIntValues(UnionListViewWriter writer, int[] values) { - writer.startListView(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endListView(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java deleted file mode 100644 index a4197c50b5bff..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ /dev/null @@ -1,1244 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.impl.UnionMapReader; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestMapVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - public T getResultKey(Map resultStruct) { - assertTrue(resultStruct.containsKey(MapVector.KEY_NAME)); - return resultStruct.get(MapVector.KEY_NAME); - } - - public T getResultValue(Map resultStruct) { - assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME)); - return resultStruct.get(MapVector.VALUE_NAME); - } - - @Test - public void testBasicOperation() { - int count = 5; - try (MapVector mapVector = MapVector.empty("map", allocator, false)) { - mapVector.allocateNew(); - UnionMapWriter mapWriter = mapVector.getWriter(); - for (int i = 0; i < count; i++) { - mapWriter.startMap(); - for (int j = 0; j < i + 1; j++) { - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(j); - mapWriter.value().integer().writeInt(j); - mapWriter.endEntry(); - } - mapWriter.endMap(); - } - mapWriter.setValueCount(count); - UnionMapReader mapReader = mapVector.getReader(); - for (int i = 0; i < count; i++) { - mapReader.setPosition(i); - for (int j = 0; j < i + 1; j++) { - mapReader.next(); - assertEquals(j, mapReader.key().readLong().longValue(), "record: " + i); - assertEquals(j, mapReader.value().readInteger().intValue()); - } - } - } - } - - @Test - public void testBasicOperationNulls() { - int count = 6; - try (MapVector mapVector = MapVector.empty("map", allocator, false)) { - mapVector.allocateNew(); - UnionMapWriter mapWriter = mapVector.getWriter(); - for (int i = 0; i < count; i++) { - // i == 1 is a NULL - if (i != 1) { - mapWriter.setPosition(i); - mapWriter.startMap(); - // i == 3 is an empty map - if (i != 3) { - for (int j = 0; j < i + 1; j++) { - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(j); - // i == 5 maps to a NULL value - if (i != 5) { - mapWriter.value().integer().writeInt(j); - } - mapWriter.endEntry(); - } - } - mapWriter.endMap(); - } - } - mapWriter.setValueCount(count); - UnionMapReader mapReader = mapVector.getReader(); - for (int i = 0; i < count; i++) { - mapReader.setPosition(i); - if (i == 1) { - assertFalse(mapReader.isSet()); - } else { - if (i == 3) { - JsonStringArrayList result = (JsonStringArrayList) mapReader.readObject(); - assertTrue(result.isEmpty()); - } else { - for (int j = 0; j < i + 1; j++) { - mapReader.next(); - assertEquals(j, mapReader.key().readLong().longValue(), "record: " + i); - if (i == 5) { - assertFalse(mapReader.value().isSet()); - } else { - assertEquals(j, mapReader.value().readInteger().intValue()); - } - } - } - } - } - } - } - - @Test - public void testCopyFrom() throws Exception { - try (MapVector inVector = MapVector.empty("input", allocator, false); - MapVector outVector = MapVector.empty("output", allocator, false)) { - UnionMapWriter writer = inVector.getWriter(); - writer.allocate(); - - // populate input vector with the following records - // {1 -> 11, 2 -> 22, 3 -> 33} - // null - // {2 -> null} - writer.setPosition(0); // optional - writer.startMap(); - writer.startEntry(); - writer.key().bigInt().writeBigInt(1); - writer.value().bigInt().writeBigInt(11); - writer.endEntry(); - writer.startEntry(); - writer.key().bigInt().writeBigInt(2); - writer.value().bigInt().writeBigInt(22); - writer.endEntry(); - writer.startEntry(); - writer.key().bigInt().writeBigInt(3); - writer.value().bigInt().writeBigInt(33); - writer.endEntry(); - writer.endMap(); - - writer.setPosition(2); - writer.startMap(); - writer.startEntry(); - writer.key().bigInt().writeBigInt(2); - writer.endEntry(); - writer.endMap(); - - writer.setValueCount(3); - - // copy values from input to output - outVector.allocateNew(); - for (int i = 0; i < 3; i++) { - outVector.copyFrom(i, i, inVector); - } - outVector.setValueCount(3); - - // assert the output vector is correct - FieldReader reader = outVector.getReader(); - assertTrue(reader.isSet(), "shouldn't be null"); - reader.setPosition(1); - assertFalse(reader.isSet(), "should be null"); - reader.setPosition(2); - assertTrue(reader.isSet(), "shouldn't be null"); - - /* index 0 */ - Object result = outVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - assertEquals(3, resultSet.size()); - Map resultStruct = (Map) resultSet.get(0); - assertEquals(1L, getResultKey(resultStruct)); - assertEquals(11L, getResultValue(resultStruct)); - resultStruct = (Map) resultSet.get(1); - assertEquals(2L, getResultKey(resultStruct)); - assertEquals(22L, getResultValue(resultStruct)); - resultStruct = (Map) resultSet.get(2); - assertEquals(3L, getResultKey(resultStruct)); - assertEquals(33L, getResultValue(resultStruct)); - - /* index 1 */ - result = outVector.getObject(1); - assertNull(result); - - /* index 2 */ - result = outVector.getObject(2); - resultSet = (ArrayList) result; - assertEquals(1, resultSet.size()); - resultStruct = (Map) resultSet.get(0); - assertEquals(2L, getResultKey(resultStruct)); - assertFalse(resultStruct.containsKey(MapVector.VALUE_NAME)); - } - } - - @Test - public void testSplitAndTransfer() throws Exception { - try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) { - - /* Explicitly add the map child vectors */ - FieldType type = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - AddOrGetResult addResult = mapVector.addOrGetVector(type); - FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null); - FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType()); - addResult.getVector().addOrGet(MapVector.KEY_NAME, keyType, BigIntVector.class); - addResult.getVector().addOrGet(MapVector.VALUE_NAME, valueType, Float8Vector.class); - - UnionMapWriter mapWriter = mapVector.getWriter(); - - /* allocate memory */ - mapWriter.allocate(); - - /* populate data */ - mapWriter.setPosition(0); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(10); - mapWriter.value().float8().writeFloat8(1.0); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(11); - mapWriter.value().float8().writeFloat8(1.1); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(12); - mapWriter.value().float8().writeFloat8(1.2); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapWriter.setPosition(1); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(13); - mapWriter.value().float8().writeFloat8(1.3); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(14); - mapWriter.value().float8().writeFloat8(1.4); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapWriter.setPosition(2); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(15); - mapWriter.value().float8().writeFloat8(1.5); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(16); - mapWriter.value().float8().writeFloat8(1.6); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(17); - mapWriter.value().float8().writeFloat8(1.7); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(18); - mapWriter.value().float8().writeFloat8(1.8); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapWriter.setPosition(3); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(19); - mapWriter.value().float8().writeFloat8(1.9); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapWriter.setPosition(4); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(20); - mapWriter.value().float8().writeFloat8(2.0); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(21); - mapWriter.value().float8().writeFloat8(2.1); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(22); - mapWriter.value().float8().writeFloat8(2.2); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(23); - mapWriter.value().float8().writeFloat8(2.3); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapVector.setValueCount(5); - - assertEquals(4, mapVector.getLastSet()); - - /* get offset buffer */ - final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer(); - - /* get dataVector */ - StructVector dataVector = (StructVector) mapVector.getDataVector(); - - /* check the vector output */ - int index = 0; - int offset; - Map result; - - /* index 0 */ - assertFalse(mapVector.isNull(index)); - offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH); - assertEquals(Integer.toString(0), Integer.toString(offset)); - - result = dataVector.getObject(offset); - assertEquals(10L, getResultKey(result)); - assertEquals(1.0, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(11L, getResultKey(result)); - assertEquals(1.1, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(12L, getResultKey(result)); - assertEquals(1.2, getResultValue(result)); - - /* index 1 */ - index++; - assertFalse(mapVector.isNull(index)); - offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH); - assertEquals(Integer.toString(3), Integer.toString(offset)); - - result = dataVector.getObject(offset); - assertEquals(13L, getResultKey(result)); - assertEquals(1.3, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(14L, getResultKey(result)); - assertEquals(1.4, getResultValue(result)); - - /* index 2 */ - index++; - assertFalse(mapVector.isNull(index)); - offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH); - assertEquals(Integer.toString(5), Integer.toString(offset)); - - result = dataVector.getObject(offset); - assertEquals(15L, getResultKey(result)); - assertEquals(1.5, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(16L, getResultKey(result)); - assertEquals(1.6, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(17L, getResultKey(result)); - assertEquals(1.7, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(18L, getResultKey(result)); - assertEquals(1.8, getResultValue(result)); - - /* index 3 */ - index++; - assertFalse(mapVector.isNull(index)); - offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH); - assertEquals(Integer.toString(9), Integer.toString(offset)); - - result = dataVector.getObject(offset); - assertEquals(19L, getResultKey(result)); - assertEquals(1.9, getResultValue(result)); - - /* index 4 */ - index++; - assertFalse(mapVector.isNull(index)); - offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH); - assertEquals(Integer.toString(10), Integer.toString(offset)); - - result = dataVector.getObject(offset); - assertEquals(20L, getResultKey(result)); - assertEquals(2.0, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(21L, getResultKey(result)); - assertEquals(2.1, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(22L, getResultKey(result)); - assertEquals(2.2, getResultValue(result)); - offset++; - result = dataVector.getObject(offset); - assertEquals(23L, getResultKey(result)); - assertEquals(2.3, getResultValue(result)); - - /* index 5 */ - index++; - assertTrue(mapVector.isNull(index)); - offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH); - assertEquals(Integer.toString(14), Integer.toString(offset)); - - /* do split and transfer */ - try (MapVector toVector = MapVector.empty("toVector", allocator, false)) { - - TransferPair transferPair = mapVector.makeTransferPair(toVector); - - int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}}; - - for (final int[] transferLength : transferLengths) { - int start = transferLength[0]; - int splitLength = transferLength[1]; - - int dataLength1 = 0; - int dataLength2 = 0; - - int offset1 = 0; - int offset2 = 0; - - transferPair.splitAndTransfer(start, splitLength); - - /* get offsetBuffer of toVector */ - final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); - - /* get dataVector of toVector */ - StructVector dataVector1 = (StructVector) toVector.getDataVector(); - - for (int i = 0; i < splitLength; i++) { - dataLength1 = - offsetBuffer.getInt((start + i + 1) * MapVector.OFFSET_WIDTH) - - offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH); - dataLength2 = - toOffsetBuffer.getInt((i + 1) * MapVector.OFFSET_WIDTH) - - toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH); - - assertEquals( - dataLength1, - dataLength2, - "Different data lengths at index: " + i + " and start: " + start); - - offset1 = offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH); - offset2 = toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH); - - for (int j = 0; j < dataLength1; j++) { - assertEquals( - dataVector.getObject(offset1), - dataVector1.getObject(offset2), - "Different data at indexes: " + offset1 + " and " + offset2); - - offset1++; - offset2++; - } - } - } - } - } - } - - @Test - public void testMapWithListValue() throws Exception { - try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) { - - UnionMapWriter mapWriter = mapVector.getWriter(); - ListWriter valueWriter; - - /* allocate memory */ - mapWriter.allocate(); - - /* the dataVector that backs a listVector will also be a - * listVector for this test. - */ - - /* write one or more maps index 0 */ - mapWriter.setPosition(0); - mapWriter.startMap(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(1); - valueWriter = mapWriter.value().list(); - valueWriter.startList(); - valueWriter.bigInt().writeBigInt(50); - valueWriter.bigInt().writeBigInt(100); - valueWriter.bigInt().writeBigInt(200); - valueWriter.endList(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(2); - valueWriter = mapWriter.value().list(); - valueWriter.startList(); - valueWriter.bigInt().writeBigInt(75); - valueWriter.bigInt().writeBigInt(125); - valueWriter.bigInt().writeBigInt(150); - valueWriter.bigInt().writeBigInt(175); - valueWriter.endList(); - mapWriter.endEntry(); - - mapWriter.endMap(); - - /* write one or more maps at index 1 */ - mapWriter.setPosition(1); - mapWriter.startMap(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(3); - valueWriter = mapWriter.value().list(); - valueWriter.startList(); - valueWriter.bigInt().writeBigInt(10); - valueWriter.endList(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(4); - valueWriter = mapWriter.value().list(); - valueWriter.startList(); - valueWriter.bigInt().writeBigInt(15); - valueWriter.bigInt().writeBigInt(20); - valueWriter.endList(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(5); - valueWriter = mapWriter.value().list(); - valueWriter.startList(); - valueWriter.bigInt().writeBigInt(25); - valueWriter.bigInt().writeBigInt(30); - valueWriter.bigInt().writeBigInt(35); - valueWriter.endList(); - mapWriter.endEntry(); - - mapWriter.endMap(); - - assertEquals(1, mapVector.getLastSet()); - - mapWriter.setValueCount(2); - - assertEquals(2, mapVector.getValueCount()); - - // Get mapVector element at index 0 - Object result = mapVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - - // 2 map entries at index 0 - assertEquals(2, resultSet.size()); - - // First Map entry - Map resultStruct = (Map) resultSet.get(0); - assertEquals(1L, getResultKey(resultStruct)); - ArrayList list = (ArrayList) getResultValue(resultStruct); - assertEquals(3, list.size()); // value is a list with 3 elements - assertEquals(Long.valueOf(50), list.get(0)); - assertEquals(Long.valueOf(100), list.get(1)); - assertEquals(Long.valueOf(200), list.get(2)); - - // Second Map entry - resultStruct = (Map) resultSet.get(1); - list = (ArrayList) getResultValue(resultStruct); - assertEquals(4, list.size()); // value is a list with 4 elements - assertEquals(Long.valueOf(75), list.get(0)); - assertEquals(Long.valueOf(125), list.get(1)); - assertEquals(Long.valueOf(150), list.get(2)); - assertEquals(Long.valueOf(175), list.get(3)); - - // Get mapVector element at index 1 - result = mapVector.getObject(1); - resultSet = (ArrayList) result; - - // First Map entry - resultStruct = (Map) resultSet.get(0); - assertEquals(3L, getResultKey(resultStruct)); - list = (ArrayList) getResultValue(resultStruct); - assertEquals(1, list.size()); // value is a list with 1 element - assertEquals(Long.valueOf(10), list.get(0)); - - // Second Map entry - resultStruct = (Map) resultSet.get(1); - assertEquals(4L, getResultKey(resultStruct)); - list = (ArrayList) getResultValue(resultStruct); - assertEquals(2, list.size()); // value is a list with 1 element - assertEquals(Long.valueOf(15), list.get(0)); - assertEquals(Long.valueOf(20), list.get(1)); - - // Third Map entry - resultStruct = (Map) resultSet.get(2); - assertEquals(5L, getResultKey(resultStruct)); - list = (ArrayList) getResultValue(resultStruct); - assertEquals(3, list.size()); // value is a list with 1 element - assertEquals(Long.valueOf(25), list.get(0)); - assertEquals(Long.valueOf(30), list.get(1)); - assertEquals(Long.valueOf(35), list.get(2)); - - /* check underlying bitVector */ - assertFalse(mapVector.isNull(0)); - assertFalse(mapVector.isNull(1)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer(); - - /* mapVector has 2 entries at index 0 and 3 entries at index 1 */ - assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH)); - } - } - - @Test - public void testMapWithMapValue() throws Exception { - try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) { - - UnionMapWriter mapWriter = mapVector.getWriter(); - MapWriter valueWriter; - - // we are essentially writing Map> - // populate map vector with the following four records - // [ - // null, - // [1:[50: 100, 200:400], 2:[75: 175, 150: 250]], - // [3:[10: 20], 4:[15: 20], 5:[25: 30, 35: null]], - // [8:[15: 30, 10: 20]] - // ] - - /* write null at index 0 */ - mapWriter.setPosition(0); - mapWriter.writeNull(); - - /* write one or more maps at index 1 */ - mapWriter.setPosition(1); - mapWriter.startMap(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(1); - valueWriter = mapWriter.value().map(false); - valueWriter.startMap(); - writeEntry(valueWriter, 50, 100L); - writeEntry(valueWriter, 200, 400L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(2); - valueWriter = mapWriter.value().map(false); - valueWriter.startMap(); - writeEntry(valueWriter, 75, 175L); - writeEntry(valueWriter, 150, 250L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.endMap(); - - /* write one or more maps at index 2 */ - mapWriter.setPosition(2); - mapWriter.startMap(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(3); - valueWriter = mapWriter.value().map(true); - valueWriter.startMap(); - writeEntry(valueWriter, 10, 20L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(4); - valueWriter = mapWriter.value().map(false); - valueWriter.startMap(); - writeEntry(valueWriter, 15, 20L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(5); - valueWriter = mapWriter.value().map(false); - valueWriter.startMap(); - writeEntry(valueWriter, 25, 30L); - writeEntry(valueWriter, 35, (Long) null); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.endMap(); - - /* write one or more maps at index 3 */ - mapWriter.setPosition(3); - mapWriter.startMap(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(8); - valueWriter = mapWriter.value().map(); - valueWriter.startMap(); - writeEntry(valueWriter, 15, 30L); - writeEntry(valueWriter, 10, 20L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.endMap(); - - assertEquals(3, mapVector.getLastSet()); - - mapWriter.setValueCount(4); - - assertEquals(4, mapVector.getValueCount()); - - // Get mapVector element at index 0 - Object result = mapVector.getObject(0); - assertNull(result); - - // Get mapVector element at index 1 - result = mapVector.getObject(1); - ArrayList resultSet = (ArrayList) result; - - // 2 map entries at index 0 - assertEquals(2, resultSet.size()); - - // First Map entry - Map resultStruct = (Map) resultSet.get(0); - assertEquals(1L, getResultKey(resultStruct)); - ArrayList> list = (ArrayList>) getResultValue(resultStruct); - assertEquals(2, list.size()); // value is a list of 2 two maps - Map innerMap = list.get(0); - assertEquals(50L, getResultKey(innerMap)); - assertEquals(100L, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(200L, getResultKey(innerMap)); - assertEquals(400L, getResultValue(innerMap)); - - // Second Map entry - resultStruct = (Map) resultSet.get(1); - assertEquals(2L, getResultKey(resultStruct)); - list = (ArrayList>) getResultValue(resultStruct); - assertEquals(2, list.size()); // value is a list of two maps - innerMap = list.get(0); - assertEquals(75L, getResultKey(innerMap)); - assertEquals(175L, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(150L, getResultKey(innerMap)); - assertEquals(250L, getResultValue(innerMap)); - - // Get mapVector element at index 2 - result = mapVector.getObject(2); - resultSet = (ArrayList) result; - - // 3 map entries at index 1 - assertEquals(3, resultSet.size()); - - // First Map entry - resultStruct = (Map) resultSet.get(0); - assertEquals(3L, getResultKey(resultStruct)); - list = (ArrayList>) getResultValue(resultStruct); - assertEquals(1, list.size()); // value is a list of maps with 1 element - innerMap = list.get(0); - assertEquals(10L, getResultKey(innerMap)); - assertEquals(20L, getResultValue(innerMap)); - - // Second Map entry - resultStruct = (Map) resultSet.get(1); - assertEquals(4L, getResultKey(resultStruct)); - list = (ArrayList>) getResultValue(resultStruct); - assertEquals(1, list.size()); // value is a list of maps with 1 element - innerMap = list.get(0); - assertEquals(15L, getResultKey(innerMap)); - assertEquals(20L, getResultValue(innerMap)); - - // Third Map entry - resultStruct = (Map) resultSet.get(2); - assertEquals(5L, getResultKey(resultStruct)); - list = (ArrayList>) getResultValue(resultStruct); - assertEquals(2, list.size()); // value is a list of maps with 2 elements - innerMap = list.get(0); - assertEquals(25L, getResultKey(innerMap)); - assertEquals(30L, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(35L, getResultKey(innerMap)); - assertNull(innerMap.get(MapVector.VALUE_NAME)); - - // Get mapVector element at index 3 - result = mapVector.getObject(3); - resultSet = (ArrayList) result; - - // only 1 map entry at index 3 - assertEquals(1, resultSet.size()); - - resultStruct = (Map) resultSet.get(0); - assertEquals(8L, getResultKey(resultStruct)); - list = (ArrayList>) getResultValue(resultStruct); - assertEquals(2, list.size()); // value is a list of 2 maps - innerMap = list.get(0); - assertEquals(15L, getResultKey(innerMap)); - assertEquals(30L, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(10L, getResultKey(innerMap)); - assertEquals(20L, getResultValue(innerMap)); - - /* check underlying bitVector */ - assertTrue(mapVector.isNull(0)); - assertFalse(mapVector.isNull(1)); - assertFalse(mapVector.isNull(2)); - assertFalse(mapVector.isNull(3)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer(); - - // mapVector has 0 entries at index 0, 2 entries at index 1, 3 entries at index 2, - // and 1 entry at index 3 - assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH)); - assertEquals(0, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH)); - assertEquals(2, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH)); - assertEquals(5, offsetBuffer.getInt(3 * MapVector.OFFSET_WIDTH)); - assertEquals(6, offsetBuffer.getInt(4 * MapVector.OFFSET_WIDTH)); - } - } - - @Test - public void testMapWithMapKeyAndMapValue() throws Exception { - try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) { - - UnionMapWriter mapWriter = mapVector.getWriter(); - MapWriter keyWriter; - MapWriter valueWriter; - - // we are essentially writing Map, Map> - // populate map vector with the following two records - // [ - // [[5: 10, 20: 40]:[50: 100, 200: 400], [50: 100]:[75: 175, 150: 250]], - // [[1: 2]:[10: 20], [30: 40]:[15: 20], [50: 60, 70: null]:[25: 30, 35: null], [5: null]: - // null] - // ] - - mapWriter.setPosition(0); - mapWriter.startMap(); - - mapWriter.startEntry(); - keyWriter = mapWriter.key().map(false); - keyWriter.startMap(); - writeEntry(keyWriter, 5, 10); - writeEntry(keyWriter, 20, 40); - keyWriter.endMap(); - valueWriter = mapWriter.value().map(false); - valueWriter.startMap(); - writeEntry(valueWriter, 50, 100L); - writeEntry(valueWriter, 200, 400L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - keyWriter = mapWriter.key().map(false); - keyWriter.startMap(); - writeEntry(keyWriter, 50, 100); - keyWriter.endMap(); - valueWriter = mapWriter.value().map(false); - valueWriter.startMap(); - writeEntry(valueWriter, 75, 175L); - writeEntry(valueWriter, 150, 250L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.endMap(); - - /* write one or more maps at index 1 */ - mapWriter.setPosition(1); - mapWriter.startMap(); - - mapWriter.startEntry(); - keyWriter = mapWriter.key().map(false); - keyWriter.startMap(); - writeEntry(keyWriter, 1, 2); - keyWriter.endMap(); - valueWriter = mapWriter.value().map(true); - valueWriter.startMap(); - writeEntry(valueWriter, 10, 20L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - keyWriter = mapWriter.key().map(false); - keyWriter.startMap(); - writeEntry(keyWriter, 30, 40); - keyWriter.endMap(); - valueWriter = mapWriter.value().map(false); - valueWriter.startMap(); - writeEntry(valueWriter, 15, 20L); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - keyWriter = mapWriter.key().map(false); - keyWriter.startMap(); - writeEntry(keyWriter, 50, 60); - writeEntry(keyWriter, 70, (Integer) null); - keyWriter.endMap(); - valueWriter = mapWriter.value().map(false); - valueWriter.startMap(); - writeEntry(valueWriter, 25, 30L); - writeEntry(valueWriter, 35, (Long) null); - valueWriter.endMap(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - keyWriter = mapWriter.key().map(false); - keyWriter.startMap(); - writeEntry(keyWriter, 5, (Integer) null); - keyWriter.endMap(); - valueWriter = mapWriter.value().map(false); - valueWriter.writeNull(); - mapWriter.endEntry(); - - mapWriter.endMap(); - - assertEquals(1, mapVector.getLastSet()); - - mapWriter.setValueCount(2); - - assertEquals(2, mapVector.getValueCount()); - - // Get mapVector element at index 0 - Object result = mapVector.getObject(0); - ArrayList resultSet = (ArrayList) result; - - // 2 map entries at index 0 - assertEquals(2, resultSet.size()); - - // First Map entry - Map>> resultStruct = (Map>>) resultSet.get(0); - ArrayList> list = getResultKey(resultStruct); - assertEquals(2, list.size()); // key is a list of 2 two maps - Map innerMap = list.get(0); - assertEquals(5, getResultKey(innerMap)); - assertEquals(10, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(20, getResultKey(innerMap)); - assertEquals(40, getResultValue(innerMap)); - - list = getResultValue(resultStruct); - assertEquals(2, list.size()); // value is a list of 2 two maps - innerMap = list.get(0); - assertEquals(50L, getResultKey(innerMap)); - assertEquals(100L, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(200L, getResultKey(innerMap)); - assertEquals(400L, getResultValue(innerMap)); - - // Second Map entry - resultStruct = (Map>>) resultSet.get(1); - list = getResultKey(resultStruct); - assertEquals(1, list.size()); // key is a list of 1 two map - innerMap = list.get(0); - assertEquals(50, getResultKey(innerMap)); - assertEquals(100, getResultValue(innerMap)); - - list = getResultValue(resultStruct); - assertEquals(2, list.size()); // value is a list of two maps - innerMap = list.get(0); - assertEquals(75L, getResultKey(innerMap)); - assertEquals(175L, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(150L, getResultKey(innerMap)); - assertEquals(250L, getResultValue(innerMap)); - - // Get mapVector element at index 1 - result = mapVector.getObject(1); - resultSet = (ArrayList) result; - - // 4 map entries at index 1 - assertEquals(4, resultSet.size()); - - // First Map entry - resultStruct = (Map>>) resultSet.get(0); - list = getResultKey(resultStruct); - assertEquals(1, list.size()); // key is a list of 1 map - innerMap = list.get(0); - assertEquals(1, getResultKey(innerMap)); - assertEquals(2, getResultValue(innerMap)); - - list = getResultValue(resultStruct); - assertEquals(1, list.size()); // value is a list of maps with 1 element - innerMap = list.get(0); - assertEquals(10L, getResultKey(innerMap)); - assertEquals(20L, getResultValue(innerMap)); - - // Second Map entry - resultStruct = (Map>>) resultSet.get(1); - list = getResultKey(resultStruct); - assertEquals(1, list.size()); // key is a list of 1 map - innerMap = list.get(0); - assertEquals(30, getResultKey(innerMap)); - assertEquals(40, getResultValue(innerMap)); - - list = getResultValue(resultStruct); - assertEquals(1, list.size()); // value is a list of maps with 1 element - innerMap = list.get(0); - assertEquals(15L, getResultKey(innerMap)); - assertEquals(20L, getResultValue(innerMap)); - - // Third Map entry - resultStruct = (Map>>) resultSet.get(2); - list = getResultKey(resultStruct); - assertEquals(2, list.size()); // key is a list of two maps - innerMap = list.get(0); - assertEquals(50, getResultKey(innerMap)); - assertEquals(60, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(70, getResultKey(innerMap)); - assertNull(innerMap.get(MapVector.VALUE_NAME)); - - list = getResultValue(resultStruct); - assertEquals(2, list.size()); // value is a list of maps with 2 elements - innerMap = list.get(0); - assertEquals(25L, getResultKey(innerMap)); - assertEquals(30L, getResultValue(innerMap)); - innerMap = list.get(1); - assertEquals(35L, getResultKey(innerMap)); - assertNull(innerMap.get(MapVector.VALUE_NAME)); - - // Fourth Map entry - resultStruct = (Map>>) resultSet.get(3); - list = getResultKey(resultStruct); - assertEquals(1, list.size()); // key is a list of two maps - innerMap = list.get(0); - assertEquals(5, getResultKey(innerMap)); - assertNull(innerMap.get(MapVector.VALUE_NAME)); - - assertNull(resultStruct.get(MapVector.VALUE_NAME)); - - /* check underlying bitVector */ - assertFalse(mapVector.isNull(0)); - assertFalse(mapVector.isNull(1)); - - /* check underlying offsets */ - final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer(); - - /* mapVector has 2 entries at index 0 and 4 entries at index 1 */ - assertEquals(0, offsetBuffer.getInt(0)); - assertEquals(2, offsetBuffer.getInt(MapVector.OFFSET_WIDTH)); - assertEquals(6, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH)); - } - } - - private void writeEntry(MapWriter writer, long key, Long value) { - writer.startEntry(); - writer.key().bigInt().writeBigInt(key); - if (value != null) { - writer.value().bigInt().writeBigInt(value); - } - writer.endEntry(); - } - - private void writeEntry(MapWriter writer, int key, Integer value) { - writer.startEntry(); - writer.key().integer().writeInt(key); - if (value != null) { - writer.value().integer().writeInt(value); - } - writer.endEntry(); - } - - @Test - public void testClearAndReuse() { - try (final MapVector vector = MapVector.empty("map", allocator, false)) { - vector.allocateNew(); - UnionMapWriter mapWriter = vector.getWriter(); - - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(1); - mapWriter.value().integer().writeInt(11); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(2); - mapWriter.value().integer().writeInt(22); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapWriter.setValueCount(2); - - Object result = vector.getObject(0); - ArrayList resultSet = (ArrayList) result; - Map resultStruct = (Map) resultSet.get(0); - assertEquals(1L, getResultKey(resultStruct)); - assertEquals(11, getResultValue(resultStruct)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - resultStruct = (Map) resultSet.get(0); - assertEquals(2L, getResultKey(resultStruct)); - assertEquals(22, getResultValue(resultStruct)); - - // Clear and release the buffers to trigger a realloc when adding next value - vector.clear(); - mapWriter = new UnionMapWriter(vector); - - // The map vector should reuse a buffer when reallocating the offset buffer - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(3); - mapWriter.value().integer().writeInt(33); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(4); - mapWriter.value().integer().writeInt(44); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(5); - mapWriter.value().integer().writeInt(55); - mapWriter.endEntry(); - mapWriter.endMap(); - - mapWriter.setValueCount(2); - - result = vector.getObject(0); - resultSet = (ArrayList) result; - resultStruct = (Map) resultSet.get(0); - assertEquals(3L, getResultKey(resultStruct)); - assertEquals(33, getResultValue(resultStruct)); - resultStruct = (Map) resultSet.get(1); - assertEquals(4L, getResultKey(resultStruct)); - assertEquals(44, getResultValue(resultStruct)); - - result = vector.getObject(1); - resultSet = (ArrayList) result; - resultStruct = (Map) resultSet.get(0); - assertEquals(5L, getResultKey(resultStruct)); - assertEquals(55, getResultValue(resultStruct)); - } - } - - @Test - public void testGetTransferPair() { - try (MapVector mapVector = MapVector.empty("mapVector", allocator, false)) { - - FieldType type = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - AddOrGetResult addResult = mapVector.addOrGetVector(type); - FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null); - FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType()); - addResult.getVector().addOrGet(MapVector.KEY_NAME, keyType, BigIntVector.class); - addResult.getVector().addOrGet(MapVector.VALUE_NAME, valueType, Float8Vector.class); - mapVector.allocateNew(); - mapVector.setValueCount(0); - - assertEquals(-1, mapVector.getLastSet()); - TransferPair tp = mapVector.getTransferPair(mapVector.getName(), allocator, null); - tp.transfer(); - ValueVector vector = tp.getTo(); - assertSame(vector.getClass(), mapVector.getClass()); - vector.clear(); - } - } - - @Test - public void testGetTransferPairWithField() { - try (MapVector mapVector = MapVector.empty("mapVector", allocator, false)) { - - FieldType type = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - AddOrGetResult addResult = mapVector.addOrGetVector(type); - FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null); - FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType()); - addResult.getVector().addOrGet(MapVector.KEY_NAME, keyType, BigIntVector.class); - addResult.getVector().addOrGet(MapVector.VALUE_NAME, valueType, Float8Vector.class); - mapVector.allocateNew(); - mapVector.setValueCount(0); - - assertEquals(-1, mapVector.getLastSet()); - TransferPair tp = mapVector.getTransferPair(mapVector.getField(), allocator); - tp.transfer(); - MapVector toVector = (MapVector) tp.getTo(); - assertSame(toVector.getField(), mapVector.getField()); - toVector.clear(); - } - } - - @Test - public void testGetTransferPairWithFieldAndCallBack() { - SchemaChangeCallBack callBack = new SchemaChangeCallBack(); - try (MapVector mapVector = MapVector.empty("mapVector", allocator, false)) { - - FieldType type = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - AddOrGetResult addResult = mapVector.addOrGetVector(type); - FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null); - FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType()); - addResult.getVector().addOrGet(MapVector.KEY_NAME, keyType, BigIntVector.class); - addResult.getVector().addOrGet(MapVector.VALUE_NAME, valueType, Float8Vector.class); - mapVector.allocateNew(); - mapVector.setValueCount(0); - - assertEquals(-1, mapVector.getLastSet()); - TransferPair tp = mapVector.getTransferPair(mapVector.getField(), allocator, callBack); - tp.transfer(); - MapVector toVector = (MapVector) tp.getTo(); - assertSame(toVector.getField(), mapVector.getField()); - toVector.clear(); - } - } - - @Test - public void testMakeTransferPairPreserveNullability() { - Field intField = new Field("int", FieldType.notNullable(MinorType.INT.getType()), null); - List fields = Collections.singletonList(intField); - Field structField = - new Field("struct", FieldType.notNullable(ArrowType.Struct.INSTANCE), fields); - Field structField2 = - new Field("struct", FieldType.notNullable(ArrowType.Struct.INSTANCE), fields); - FieldVector vec = structField.createVector(allocator); - - TransferPair tp = vec.getTransferPair(structField2, allocator); - tp.transfer(); - - FieldVector res = (FieldVector) tp.getTo(); - - assertEquals(intField, vec.getField().getChildren().get(0)); - assertEquals(intField, res.getField().getChildren().get(0)); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java b/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java deleted file mode 100644 index 1715aa4344ceb..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.lang.reflect.Field; -import java.net.URLClassLoader; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link NullCheckingForGet}. */ -public class TestNullCheckingForGet { - - /** - * Get a copy of the current class loader. - * - * @return the newly created class loader. - */ - private ClassLoader copyClassLoader() { - ClassLoader curClassLoader = this.getClass().getClassLoader(); - if (curClassLoader instanceof URLClassLoader) { - // for Java 1.8 - return new URLClassLoader(((URLClassLoader) curClassLoader).getURLs(), null); - } - - // for Java 1.9 and Java 11. - return null; - } - - /** - * Get the value of flag {@link NullCheckingForGet#NULL_CHECKING_ENABLED}. - * - * @param classLoader the class loader from which to get the flag value. - * @return value of the flag. - */ - private boolean getFlagValue(ClassLoader classLoader) throws Exception { - Class clazz = classLoader.loadClass("org.apache.arrow.vector.NullCheckingForGet"); - Field field = clazz.getField("NULL_CHECKING_ENABLED"); - return (Boolean) field.get(null); - } - - /** - * Ensure the flag for null checking is enabled by default. This will protect users from JVM - * crashes. - */ - @Test - public void testDefaultValue() throws Exception { - ClassLoader classLoader = copyClassLoader(); - if (classLoader != null) { - boolean nullCheckingEnabled = getFlagValue(classLoader); - assertTrue(nullCheckingEnabled); - } - } - - /** - * Test setting the null checking flag by the system property. - * - * @throws Exception if loading class {@link NullCheckingForGet#NULL_CHECKING_ENABLED} fails. - */ - @Test - public void testEnableSysProperty() throws Exception { - String sysProperty = System.getProperty("arrow.enable_null_check_for_get"); - System.setProperty("arrow.enable_null_check_for_get", "false"); - - ClassLoader classLoader = copyClassLoader(); - if (classLoader != null) { - boolean nullCheckingEnabled = getFlagValue(classLoader); - assertFalse(nullCheckingEnabled); - } - - // restore system property - if (sysProperty != null) { - System.setProperty("arrow.enable_null_check_for_get", sysProperty); - } else { - System.clearProperty("arrow.enable_null_check_for_get"); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java deleted file mode 100644 index 9fd9b580b361f..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertInstanceOf; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collections; -import java.util.stream.Stream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.extension.InvalidExtensionMetadataException; -import org.apache.arrow.vector.extension.OpaqueType; -import org.apache.arrow.vector.extension.OpaqueVector; -import org.apache.arrow.vector.ipc.ArrowStreamReader; -import org.apache.arrow.vector.ipc.ArrowStreamWriter; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; -import org.junit.jupiter.params.provider.ValueSource; - -class TestOpaqueExtensionType { - BufferAllocator allocator; - - @BeforeEach - void beforeEach() { - allocator = new RootAllocator(); - } - - @AfterEach - void afterEach() { - allocator.close(); - } - - @ParameterizedTest - @ValueSource( - strings = { - "{\"type_name\": \"\", \"vendor_name\": \"\"}", - "{\"type_name\": \"\", \"vendor_name\": \"\", \"extra_field\": 42}", - "{\"type_name\": \"array\", \"vendor_name\": \"postgresql\"}", - "{\"type_name\": \"foo.bar\", \"vendor_name\": \"postgresql\"}", - }) - void testDeserializeValid(String serialized) { - ArrowType storageType = Types.MinorType.NULL.getType(); - OpaqueType type = new OpaqueType(storageType, "", ""); - - assertDoesNotThrow(() -> type.deserialize(storageType, serialized)); - } - - @ParameterizedTest - @ValueSource( - strings = { - "", - "{\"type_name\": \"\"}", - "{\"vendor_name\": \"\"}", - "{\"type_name\": null, \"vendor_name\": \"\"}", - "{\"type_name\": \"\", \"vendor_name\": null}", - "{\"type_name\": 42, \"vendor_name\": \"\"}", - "{\"type_name\": \"\", \"vendor_name\": 42}", - "{\"type_name\": \"\", \"vendor_name\": \"\"", - }) - void testDeserializeInvalid(String serialized) { - ArrowType storageType = Types.MinorType.NULL.getType(); - OpaqueType type = new OpaqueType(storageType, "", ""); - - assertThrows( - InvalidExtensionMetadataException.class, () -> type.deserialize(storageType, serialized)); - } - - @ParameterizedTest - @MethodSource("storageType") - void testRoundTrip(ArrowType storageType) { - OpaqueType type = new OpaqueType(storageType, "foo", "bar"); - assertEquals(storageType, type.storageType()); - assertEquals("foo", type.typeName()); - if (storageType.isComplex()) { - assertThrows( - UnsupportedOperationException.class, - () -> type.getNewVector("name", FieldType.nullable(type), allocator)); - } else { - assertDoesNotThrow(() -> type.getNewVector("name", FieldType.nullable(type), allocator)) - .close(); - } - - String serialized = assertDoesNotThrow(type::serialize); - OpaqueType holder = new OpaqueType(Types.MinorType.NULL.getType(), "", ""); - OpaqueType deserialized = (OpaqueType) holder.deserialize(storageType, serialized); - assertEquals(type, deserialized); - assertNotEquals(holder, deserialized); - } - - @ParameterizedTest - @MethodSource("storageType") - void testIpcRoundTrip(ArrowType storageType) { - OpaqueType.ensureRegistered(); - - OpaqueType type = new OpaqueType(storageType, "foo", "bar"); - Schema schema = new Schema(Collections.singletonList(Field.nullable("unknown", type))); - byte[] serialized = schema.serializeAsMessage(); - Schema deseralized = Schema.deserializeMessage(ByteBuffer.wrap(serialized)); - assertEquals(schema, deseralized); - } - - @Test - void testVectorType() throws IOException { - OpaqueType.ensureRegistered(); - - ArrowType storageType = Types.MinorType.VARBINARY.getType(); - OpaqueType type = new OpaqueType(storageType, "foo", "bar"); - try (FieldVector vector = type.getNewVector("field", FieldType.nullable(type), allocator)) { - OpaqueVector opaque = assertInstanceOf(OpaqueVector.class, vector); - assertEquals("field", opaque.getField().getName()); - assertEquals(type, opaque.getField().getType()); - - VarBinaryVector binary = - assertInstanceOf(VarBinaryVector.class, opaque.getUnderlyingVector()); - binary.setSafe(0, new byte[] {0, 1, 2, 3}); - binary.setNull(1); - opaque.setValueCount(2); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (VectorSchemaRoot root = new VectorSchemaRoot(Collections.singletonList(opaque)); - ArrowStreamWriter writer = - new ArrowStreamWriter(root, new DictionaryProvider.MapDictionaryProvider(), baos)) { - writer.start(); - writer.writeBatch(); - } - - try (ArrowStreamReader reader = - new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()), allocator)) { - assertTrue(reader.loadNextBatch()); - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - assertEquals(2, root.getRowCount()); - assertEquals(new Schema(Collections.singletonList(opaque.getField())), root.getSchema()); - - OpaqueVector actual = assertInstanceOf(OpaqueVector.class, root.getVector("field")); - assertFalse(actual.isNull(0)); - assertTrue(actual.isNull(1)); - assertArrayEquals(new byte[] {0, 1, 2, 3}, (byte[]) actual.getObject(0)); - assertNull(actual.getObject(1)); - } - } - } - - static Stream storageType() { - return Stream.of( - Types.MinorType.NULL.getType(), - Types.MinorType.BIGINT.getType(), - Types.MinorType.BIT.getType(), - Types.MinorType.VARBINARY.getType(), - Types.MinorType.VARCHAR.getType(), - Types.MinorType.LIST.getType(), - new ArrowType.Decimal(12, 4, 128)); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java deleted file mode 100644 index 19d6535e97158..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertThrows; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.OutOfMemoryException; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** This class tests cases where we expect to receive {@link OutOfMemoryException}. */ -public class TestOutOfMemoryForValueVector { - - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(200); // Start with low memory limit - } - - @Test - public void variableWidthVectorAllocateNew() { - assertThrows( - OutOfMemoryException.class, - () -> { - try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(); - } - }); - } - - @Test - public void variableWidthVectorAllocateNewCustom() { - assertThrows( - OutOfMemoryException.class, - () -> { - try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(2342, 234); - } - }); - } - - @Test - public void fixedWidthVectorAllocateNew() { - assertThrows( - OutOfMemoryException.class, - () -> { - try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(); - } - }); - } - - @Test - public void fixedWidthVectorAllocateNewCustom() { - assertThrows( - OutOfMemoryException.class, - () -> { - try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(2342); - } - }); - } - - @AfterEach - public void terminate() { - allocator.close(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java deleted file mode 100644 index 13f3a4a15fe97..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** - * This class tests that OversizedAllocationException occurs when a large memory is allocated for a - * vector. Typically, arrow allows the allocation of the size of at most Integer.MAX_VALUE, but this - * might cause OOM in tests. Thus, the max allocation size is limited to 1 KB in this class. Please - * see the surefire option in pom.xml. - */ -public class TestOversizedAllocationForValueVector { - - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testFixedVectorReallocation() { - assertThrows( - OversizedAllocationException.class, - () -> { - final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: buffer size = max value capacity - final int expectedValueCapacity = - checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4); - try { - vector.allocateNew(expectedValueCapacity); - assertEquals(expectedValueCapacity, vector.getValueCapacity()); - vector.reAlloc(); - assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); - } finally { - vector.close(); - } - - // common case: value count < max value capacity - try { - vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8)); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this should throw an IOOB - } finally { - vector.close(); - } - }); - } - - @Test - public void testBitVectorReallocation() { - assertThrows( - OversizedAllocationException.class, - () -> { - final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: buffer size ~ max value capacity - final int expectedValueCapacity = 1 << 29; - try { - vector.allocateNew(expectedValueCapacity); - assertEquals(expectedValueCapacity, vector.getValueCapacity()); - vector.reAlloc(); - assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); - } finally { - vector.close(); - } - - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(expectedValueCapacity); - for (int i = 0; i < 3; i++) { - vector.reAlloc(); // expand buffer size - } - assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); - vector.reAlloc(); // buffer size ~ max allocation - assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); - vector.reAlloc(); // overflow - } finally { - vector.close(); - } - }); - } - - @Test - public void testVariableVectorReallocation() { - assertThrows( - OversizedAllocationException.class, - () -> { - final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: value count = MAX_VALUE_ALLOCATION - final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; - final int expectedOffsetSize = 10; - try { - vector.allocateNew(expectedAllocationInBytes, 10); - assertTrue(expectedOffsetSize <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); - vector.reAlloc(); - assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); - } finally { - vector.close(); - } - - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this tests if it overflows - } finally { - vector.close(); - } - }); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java b/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java deleted file mode 100644 index 37f59f82cbd2f..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; - -import java.time.Duration; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.Period; -import java.time.temporal.ChronoUnit; -import org.junit.jupiter.api.Test; - -public class TestPeriodDuration { - - @Test - public void testBasics() { - PeriodDuration pd1 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(123)); - PeriodDuration pdEq1 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(123)); - PeriodDuration pd2 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(12)); - PeriodDuration pd3 = new PeriodDuration(Period.of(-1, -2, -3), Duration.ofNanos(-123)); - - assertEquals(pd1, pdEq1); - assertEquals(pd1.hashCode(), pdEq1.hashCode()); - - assertNotEquals(pd1, pd2); - assertNotEquals(pd1.hashCode(), pd2.hashCode()); - assertNotEquals(pd1, pd3); - assertNotEquals(pd1.hashCode(), pd3.hashCode()); - } - - @Test - public void testToISO8601IntervalString() { - assertEquals("P0D", new PeriodDuration(Period.ZERO, Duration.ZERO).toISO8601IntervalString()); - assertEquals( - "P1Y2M3D", new PeriodDuration(Period.of(1, 2, 3), Duration.ZERO).toISO8601IntervalString()); - assertEquals( - "PT0.000000123S", - new PeriodDuration(Period.ZERO, Duration.ofNanos(123)).toISO8601IntervalString()); - assertEquals( - "PT1.000000123S", - new PeriodDuration(Period.ZERO, Duration.ofSeconds(1).withNanos(123)) - .toISO8601IntervalString()); - assertEquals( - "PT1H1.000000123S", - new PeriodDuration(Period.ZERO, Duration.ofSeconds(3601).withNanos(123)) - .toISO8601IntervalString()); - assertEquals( - "PT24H1M1.000000123S", - new PeriodDuration(Period.ZERO, Duration.ofSeconds(86461).withNanos(123)) - .toISO8601IntervalString()); - assertEquals( - "P1Y2M3DT24H1M1.000000123S", - new PeriodDuration(Period.of(1, 2, 3), Duration.ofSeconds(86461).withNanos(123)) - .toISO8601IntervalString()); - - assertEquals( - "P-1Y-2M-3D", - new PeriodDuration(Period.of(-1, -2, -3), Duration.ZERO).toISO8601IntervalString()); - assertEquals( - "PT-0.000000123S", - new PeriodDuration(Period.ZERO, Duration.ofNanos(-123)).toISO8601IntervalString()); - assertEquals( - "PT-24H-1M-0.999999877S", - new PeriodDuration(Period.ZERO, Duration.ofSeconds(-86461).withNanos(123)) - .toISO8601IntervalString()); - assertEquals( - "P-1Y-2M-3DT-0.999999877S", - new PeriodDuration(Period.of(-1, -2, -3), Duration.ofSeconds(-1).withNanos(123)) - .toISO8601IntervalString()); - } - - @Test - public void testTemporalAccessor() { - LocalDate date = LocalDate.of(2024, 1, 2); - PeriodDuration pd1 = new PeriodDuration(Period.ofYears(1), Duration.ZERO); - assertEquals(LocalDate.of(2025, 1, 2), pd1.addTo(date)); - - LocalDateTime dateTime = LocalDateTime.of(2024, 1, 2, 3, 4); - PeriodDuration pd2 = new PeriodDuration(Period.ZERO, Duration.ofMinutes(1)); - assertEquals(LocalDateTime.of(2024, 1, 2, 3, 3), pd2.subtractFrom(dateTime)); - - PeriodDuration pd3 = - new PeriodDuration(Period.of(1, 2, 3), Duration.ofSeconds(86461).withNanos(123)); - assertEquals(pd3.get(ChronoUnit.YEARS), 1); - assertEquals(pd3.get(ChronoUnit.MONTHS), 2); - assertEquals(pd3.get(ChronoUnit.DAYS), 3); - assertEquals(pd3.get(ChronoUnit.SECONDS), 86461); - assertEquals(pd3.get(ChronoUnit.NANOS), 123); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java deleted file mode 100644 index adf51c07301f3..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.List; -import java.util.function.Function; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestRunEndEncodedVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testInitializeChildrenFromFields() { - final FieldType valueType = FieldType.notNullable(Types.MinorType.BIGINT.getType()); - final FieldType runEndType = FieldType.notNullable(Types.MinorType.INT.getType()); - final Field valueField = new Field("value", valueType, null); - final Field runEndField = new Field("ree", runEndType, null); - - try (RunEndEncodedVector reeVector = RunEndEncodedVector.empty("empty", allocator)) { - reeVector.initializeChildrenFromFields(List.of(runEndField, valueField)); - reeVector.validate(); - } - } - - /** Create REE vector with constant value. */ - @Test - public void testConstantValueVector() { - final Field runEndEncodedField = createBigIntRunEndEncodedField("constant"); - int logicalValueCount = 100; - - // constant vector - try (RunEndEncodedVector reeVector = - new RunEndEncodedVector(runEndEncodedField, allocator, null)) { - Long value = 65536L; - setConstantVector(reeVector, value, logicalValueCount); - assertEquals(logicalValueCount, reeVector.getValueCount()); - for (int i = 0; i < logicalValueCount; i++) { - assertEquals(value, reeVector.getObject(i)); - } - } - - // constant null vector - try (RunEndEncodedVector reeVector = - new RunEndEncodedVector(runEndEncodedField, allocator, null)) { - setConstantVector(reeVector, null, logicalValueCount); - assertEquals(logicalValueCount, reeVector.getValueCount()); - // Null count is always 0 for run-end encoded array - assertEquals(0, reeVector.getNullCount()); - for (int i = 0; i < logicalValueCount; i++) { - assertTrue(reeVector.isNull(i)); - assertNull(reeVector.getObject(i)); - } - } - } - - @Test - public void testBasicRunEndEncodedVector() { - try (RunEndEncodedVector reeVector = - new RunEndEncodedVector(createBigIntRunEndEncodedField("basic"), allocator, null)) { - - // Create REE vector representing: - // [null, 2, 2, null, null, null, 4, 4, 4, 4, null, null, null, null, null]. - int runCount = 5; - final int logicalValueCount = - setBasicVector(reeVector, runCount, i -> i % 2 == 0 ? null : i + 1, i -> i + 1); - - assertEquals(15, reeVector.getValueCount()); - checkBasic(runCount, reeVector); - // test index out of bound - assertThrows(IndexOutOfBoundsException.class, () -> reeVector.getObject(-1)); - assertThrows(IndexOutOfBoundsException.class, () -> reeVector.getObject(logicalValueCount)); - } - } - - private static void checkBasic(int runCount, RunEndEncodedVector reeVector) { - int index = 0; - for (int run = 0; run < runCount; run++) { - long expectedRunValue = (long) run + 1; - for (int j = 0; j <= run; j++) { - if (run % 2 == 0) { - assertNull(reeVector.getObject(index)); - } else { - assertEquals(expectedRunValue, reeVector.getObject(index)); - } - index++; - } - } - } - - @Test - public void testRangeCompare() { - // test compare same constant vector - RunEndEncodedVector constantVector = - new RunEndEncodedVector(createBigIntRunEndEncodedField("constant"), allocator, null); - int logicalValueCount = 15; - - setConstantVector(constantVector, 1L, logicalValueCount); - - assertTrue( - constantVector.accept( - new RangeEqualsVisitor(constantVector, constantVector), - new Range(0, 0, logicalValueCount))); - assertTrue( - constantVector.accept( - new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 1, 14))); - assertTrue( - constantVector.accept( - new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 2, 13))); - assertFalse( - constantVector.accept( - new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 10, 10))); - assertFalse( - constantVector.accept( - new RangeEqualsVisitor(constantVector, constantVector), new Range(10, 1, 10))); - - // Create REE vector representing: [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]. - RunEndEncodedVector reeVector = - new RunEndEncodedVector(createBigIntRunEndEncodedField("basic"), allocator, null); - setBasicVector(reeVector, 5, i -> i + 1, i -> i + 1); - - assertTrue( - reeVector.accept( - new RangeEqualsVisitor(reeVector, reeVector), new Range(0, 0, logicalValueCount))); - assertTrue( - reeVector.accept( - new RangeEqualsVisitor(reeVector, reeVector), new Range(2, 2, logicalValueCount - 2))); - assertFalse( - reeVector.accept( - new RangeEqualsVisitor(reeVector, reeVector), new Range(1, 2, logicalValueCount - 2))); - - assertFalse( - reeVector.accept( - new RangeEqualsVisitor(reeVector, constantVector), new Range(0, 0, logicalValueCount))); - - // Create REE vector representing: [2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]. - RunEndEncodedVector reeVector2 = - new RunEndEncodedVector(createBigIntRunEndEncodedField("basic"), allocator, null); - setBasicVector(reeVector2, 4, i -> i + 2, i -> i + 2); - - assertTrue( - reeVector.accept( - new RangeEqualsVisitor(reeVector, reeVector2), new Range(1, 0, logicalValueCount - 1))); - - constantVector.close(); - reeVector.close(); - reeVector2.close(); - } - - private static Field createBigIntRunEndEncodedField(String fieldName) { - final FieldType valueType = FieldType.notNullable(Types.MinorType.BIGINT.getType()); - final FieldType runEndType = FieldType.notNullable(Types.MinorType.INT.getType()); - - final Field valueField = new Field("value", valueType, null); - final Field runEndField = new Field("ree", runEndType, null); - - return new Field( - fieldName, FieldType.notNullable(RunEndEncoded.INSTANCE), List.of(runEndField, valueField)); - } - - private static void setConstantVector( - RunEndEncodedVector constantVector, Long value, long logicalValueCount) { - setBasicVector(constantVector, 1, i -> value, i -> logicalValueCount); - } - - private static int setBasicVector( - RunEndEncodedVector reeVector, - int runCount, - Function runValueSupplier, - Function runLengthSupplier) { - reeVector.allocateNew(); - reeVector.setInitialCapacity(runCount); - int end = 0; - for (int i = 0; i < runCount; i++) { - Long runValue = runValueSupplier.apply((long) i); - if (runValue == null) { - reeVector.getValuesVector().setNull(i); - } else { - ((BigIntVector) reeVector.getValuesVector()).set(i, runValue); - } - - Long runLength = runLengthSupplier.apply((long) i); - assert runLength != null && runLength > 0; - end += runLength; - ((IntVector) reeVector.getRunEndsVector()).set(i, end); - } - - final int logicalValueCount = end; - reeVector.getValuesVector().setValueCount(runCount); - reeVector.getRunEndsVector().setValueCount(runCount); - reeVector.setValueCount(logicalValueCount); - return logicalValueCount; - } - - @Test - public void testTransfer() { - // constant vector - try (RunEndEncodedVector reeVector = - new RunEndEncodedVector(createBigIntRunEndEncodedField("constant"), allocator, null)) { - Long value = 65536L; - int logicalValueCount = 100; - setConstantVector(reeVector, value, logicalValueCount); - assertEquals(logicalValueCount, reeVector.getValueCount()); - for (int i = 0; i < logicalValueCount; i++) { - assertEquals(value, reeVector.getObject(i)); - } - - TransferPair transferPair = reeVector.getTransferPair(allocator); - transferPair.transfer(); - assertEquals(0, reeVector.getValueCount()); - assertEquals(0, reeVector.getValuesVector().getValueCount()); - assertEquals(0, reeVector.getRunEndsVector().getValueCount()); - try (RunEndEncodedVector toVector = (RunEndEncodedVector) transferPair.getTo()) { - assertEquals(logicalValueCount, toVector.getValueCount()); - for (int i = 0; i < logicalValueCount; i++) { - assertEquals(value, toVector.getObject(i)); - } - } - } - - // basic run end encoded vector - try (RunEndEncodedVector reeVector = - new RunEndEncodedVector(createBigIntRunEndEncodedField("basic"), allocator, null)) { - // Create REE vector representing: - // [null, 2, 2, null, null, null, 4, 4, 4, 4, null, null, null, null, null]. - int runCount = 5; - final int logicalValueCount = - setBasicVector(reeVector, runCount, i -> i % 2 == 0 ? null : i + 1, i -> i + 1); - - assertEquals(15, reeVector.getValueCount()); - checkBasic(runCount, reeVector); - - TransferPair transferPair = reeVector.getTransferPair(allocator); - transferPair.transfer(); - assertEquals(0, reeVector.getValueCount()); - assertEquals(0, reeVector.getValuesVector().getValueCount()); - assertEquals(0, reeVector.getRunEndsVector().getValueCount()); - try (RunEndEncodedVector toVector = (RunEndEncodedVector) transferPair.getTo()) { - assertEquals(logicalValueCount, toVector.getValueCount()); - checkBasic(runCount, toVector); - } - } - } - - @Test - public void testSplitAndTransfer() { - // test compare same constant vector - try (RunEndEncodedVector constantVector = - new RunEndEncodedVector(createBigIntRunEndEncodedField("constant"), allocator, null)) { - int logicalValueCount = 15; - - setConstantVector(constantVector, 1L, logicalValueCount); - - try (RunEndEncodedVector toVector = RunEndEncodedVector.empty("constant", allocator)) { - TransferPair transferPair = constantVector.makeTransferPair(toVector); - int startIndex = 1; - int transferLength = 10; - transferPair.splitAndTransfer(startIndex, transferLength); - - toVector.validate(); - assertEquals(transferLength, toVector.getValueCount()); - assertTrue( - constantVector.accept( - new RangeEqualsVisitor(constantVector, toVector), new Range(1, 0, transferLength))); - } - } - - try (RunEndEncodedVector reeVector = - new RunEndEncodedVector(createBigIntRunEndEncodedField("ree"), allocator, null)) { - - setBasicVector(reeVector, 5, i -> i + 1, i -> i + 1); - - int[][] transferConfigs = {{0, 0}, {0, 1}, {0, 9}, {1, 0}, {1, 10}, {1, 14}}; - - try (RunEndEncodedVector toVector = RunEndEncodedVector.empty("ree", allocator)) { - TransferPair transferPair = reeVector.makeTransferPair(toVector); - for (final int[] transferConfig : transferConfigs) { - int startIndex = transferConfig[0]; - int transferLength = transferConfig[1]; - transferPair.splitAndTransfer(startIndex, transferLength); - - toVector.validate(); - assertEquals(transferLength, toVector.getValueCount()); - assertTrue( - reeVector.accept( - new RangeEqualsVisitor(reeVector, toVector), - new Range(startIndex, 0, transferLength))); - } - } - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java deleted file mode 100644 index adf4eba10cb39..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ /dev/null @@ -1,980 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static java.util.Arrays.asList; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestSplitAndTransfer { - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - private void populateVarcharVector( - final VarCharVector vector, int valueCount, String[] compareArray) { - for (int i = 0; i < valueCount; i += 3) { - final String s = String.format("%010d", i); - vector.set(i, s.getBytes(StandardCharsets.UTF_8)); - if (compareArray != null) { - compareArray[i] = s; - } - } - vector.setValueCount(valueCount); - } - - private void populateBaseVariableWidthViewVector( - final BaseVariableWidthViewVector vector, int valueCount, String[] compareArray) { - for (int i = 0; i < valueCount; i += 3) { - final String s = String.format("%010d", i); - vector.set(i, s.getBytes(StandardCharsets.UTF_8)); - if (compareArray != null) { - compareArray[i] = s; - } - } - vector.setValueCount(valueCount); - } - - private void populateIntVector(final IntVector vector, int valueCount) { - for (int i = 0; i < valueCount; i++) { - vector.set(i, i); - } - vector.setValueCount(valueCount); - } - - private void populateDenseUnionVector(final DenseUnionVector vector, int valueCount) { - VarCharVector varCharVector = - vector.addOrGet("varchar", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - BigIntVector intVector = - vector.addOrGet("int", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - for (int i = 0; i < valueCount; i++) { - vector.setTypeId(i, (byte) (i % 2)); - if (i % 2 == 0) { - final String s = String.format("%010d", i); - varCharVector.setSafe(i / 2, s.getBytes(StandardCharsets.UTF_8)); - } else { - intVector.setSafe(i / 2, i); - } - } - vector.setValueCount(valueCount); - } - - @Test - public void testWithEmptyVector() { - // MapVector use TransferImpl from ListVector - ListVector listVector = ListVector.empty("", allocator); - TransferPair transferPair = listVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // BaseFixedWidthVector - IntVector intVector = new IntVector("", allocator); - transferPair = intVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // BaseVariableWidthVector - VarCharVector varCharVector = new VarCharVector("", allocator); - transferPair = varCharVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // BaseVariableWidthViewVector: ViewVarCharVector - ViewVarCharVector viewVarCharVector = new ViewVarCharVector("", allocator); - transferPair = viewVarCharVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // BaseVariableWidthVector: ViewVarBinaryVector - ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("", allocator); - transferPair = viewVarBinaryVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // BaseLargeVariableWidthVector - LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator); - transferPair = largeVarCharVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // StructVector - StructVector structVector = StructVector.empty("", allocator); - transferPair = structVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // FixedSizeListVector - FixedSizeListVector fixedSizeListVector = FixedSizeListVector.empty("", 0, allocator); - transferPair = fixedSizeListVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // FixedSizeBinaryVector - FixedSizeBinaryVector fixedSizeBinaryVector = new FixedSizeBinaryVector("", allocator, 4); - transferPair = fixedSizeBinaryVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // UnionVector - UnionVector unionVector = UnionVector.empty("", allocator); - transferPair = unionVector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - // DenseUnionVector - DenseUnionVector duv = DenseUnionVector.empty("", allocator); - transferPair = duv.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, transferPair.getTo().getValueCount()); - - // non empty from vector - - // BaseFixedWidthVector - IntVector fromIntVector = new IntVector("", allocator); - fromIntVector.allocateNew(100); - populateIntVector(fromIntVector, 100); - transferPair = fromIntVector.getTransferPair(allocator); - IntVector toIntVector = (IntVector) transferPair.getTo(); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, toIntVector.getValueCount()); - - transferPair.splitAndTransfer(50, 0); - assertEquals(0, toIntVector.getValueCount()); - - transferPair.splitAndTransfer(100, 0); - assertEquals(0, toIntVector.getValueCount()); - fromIntVector.clear(); - toIntVector.clear(); - - // DenseUnionVector - DenseUnionVector fromDuv = DenseUnionVector.empty("", allocator); - populateDenseUnionVector(fromDuv, 100); - transferPair = fromDuv.getTransferPair(allocator); - DenseUnionVector toDUV = (DenseUnionVector) transferPair.getTo(); - transferPair.splitAndTransfer(0, 0); - assertEquals(0, toDUV.getValueCount()); - - transferPair.splitAndTransfer(50, 0); - assertEquals(0, toDUV.getValueCount()); - - transferPair.splitAndTransfer(100, 0); - assertEquals(0, toDUV.getValueCount()); - fromDuv.clear(); - toDUV.clear(); - } - - @Test - public void testWithNullVector() { - int valueCount = 123; - int startIndex = 10; - NullVector fromNullVector = new NullVector("nullVector"); - fromNullVector.setValueCount(valueCount); - TransferPair transferPair = fromNullVector.getTransferPair(fromNullVector.getAllocator()); - transferPair.splitAndTransfer(startIndex, valueCount - startIndex); - NullVector toNullVector = (NullVector) transferPair.getTo(); - - assertEquals(valueCount - startIndex, toNullVector.getValueCount()); - // no allocations to clear for NullVector - } - - @Test - public void testWithZeroVector() { - ZeroVector fromZeroVector = new ZeroVector("zeroVector"); - TransferPair transferPair = fromZeroVector.getTransferPair(fromZeroVector.getAllocator()); - transferPair.splitAndTransfer(0, 0); - ZeroVector toZeroVector = (ZeroVector) transferPair.getTo(); - - assertEquals(0, toZeroVector.getValueCount()); - // no allocations to clear for ZeroVector - } - - @Test - public void testListVectorWithEmptyMapVector() { - // List not null>> - int valueCount = 1; - List children = new ArrayList<>(); - children.add(new Field("key", FieldType.notNullable(new ArrowType.Utf8()), null)); - children.add(new Field("value", FieldType.nullable(new ArrowType.Utf8()), null)); - Field structField = - new Field("entries", FieldType.notNullable(ArrowType.Struct.INSTANCE), children); - - Field mapField = - new Field("element", FieldType.notNullable(new ArrowType.Map(false)), asList(structField)); - - Field listField = new Field("list", FieldType.nullable(new ArrowType.List()), asList(mapField)); - - ListVector fromListVector = (ListVector) listField.createVector(allocator); - fromListVector.allocateNew(); - fromListVector.setValueCount(valueCount); - - // child vector is empty - MapVector dataVector = (MapVector) fromListVector.getDataVector(); - dataVector.allocateNew(); - // unset capacity to mimic observed failure mode - dataVector.getOffsetBuffer().capacity(0); - - TransferPair transferPair = fromListVector.getTransferPair(fromListVector.getAllocator()); - transferPair.splitAndTransfer(0, valueCount); - ListVector toListVector = (ListVector) transferPair.getTo(); - - assertEquals(valueCount, toListVector.getValueCount()); - fromListVector.clear(); - toListVector.clear(); - } - - @Test /* VarCharVector */ - public void test() throws Exception { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { - varCharVector.allocateNew(10000, 1000); - - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; - - populateVarcharVector(varCharVector, valueCount, compareArray); - - final TransferPair tp = varCharVector.getTransferPair(allocator); - final VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); - final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; - - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - for (int i = 0; i < length; i++) { - final boolean expectedSet = ((start + i) % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); - assertFalse(newVarCharVector.isNull(i)); - assertArrayEquals(expectedValue, newVarCharVector.get(i)); - } else { - assertTrue(newVarCharVector.isNull(i)); - } - } - newVarCharVector.clear(); - } - } - } - - private void testView(BaseVariableWidthViewVector vector) { - vector.allocateNew(10000, 1000); - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; - - populateBaseVariableWidthViewVector(vector, valueCount, compareArray); - - final TransferPair tp = vector.getTransferPair(allocator); - final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); - ; - final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; - - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - for (int i = 0; i < length; i++) { - final boolean expectedSet = ((start + i) % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); - assertFalse(newVector.isNull(i)); - assertArrayEquals(expectedValue, newVector.get(i)); - } else { - assertTrue(newVector.isNull(i)); - } - } - newVector.clear(); - } - } - - @Test - public void testUtf8View() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - testView(viewVarCharVector); - } - } - - @Test - public void testBinaryView() throws Exception { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator)) { - testView(viewVarBinaryVector); - } - } - - @Test - public void testMemoryConstrainedTransfer() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { - allocator.setLimit(32768); /* set limit of 32KB */ - - varCharVector.allocateNew(10000, 1000); - - final int valueCount = 1000; - - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.getTransferPair(allocator); - final VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); - final int[][] startLengths = {{0, 700}, {700, 299}}; - - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - newVarCharVector.clear(); - } - } - } - - private void testMemoryConstrainedTransferInViews(BaseVariableWidthViewVector vector) { - // Here we have the target vector being transferred with a long string - // hence, the data buffer will be allocated. - // The default data buffer allocation takes - // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * - // BaseVariableWidthViewVector.ELEMENT_SIZE - // set limit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * - // BaseVariableWidthViewVector.ELEMENT_SIZE - final int setLimit = - BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION - * BaseVariableWidthViewVector.ELEMENT_SIZE; - allocator.setLimit(setLimit); - - vector.allocateNew(16000, 1000); - - final int valueCount = 1000; - - populateBaseVariableWidthViewVector(vector, valueCount, null); - - final TransferPair tp = vector.getTransferPair(allocator); - final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); - - final int[][] startLengths = {{0, 700}, {700, 299}}; - - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - newVector.clear(); - } - } - - @Test - public void testMemoryConstrainedTransferInUtf8Views() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - testMemoryConstrainedTransferInViews(viewVarCharVector); - } - } - - @Test - public void testMemoryConstrainedTransferInBinaryViews() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator)) { - testMemoryConstrainedTransferInViews(viewVarBinaryVector); - } - } - - @Test - public void testTransfer() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { - varCharVector.allocateNew(10000, 1000); - - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; - populateVarcharVector(varCharVector, valueCount, compareArray); - - final TransferPair tp = varCharVector.getTransferPair(allocator); - final VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); - tp.transfer(); - - assertEquals(0, varCharVector.valueCount); - assertEquals(valueCount, newVarCharVector.valueCount); - - for (int i = 0; i < valueCount; i++) { - final boolean expectedSet = (i % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8); - assertFalse(newVarCharVector.isNull(i)); - assertArrayEquals(expectedValue, newVarCharVector.get(i)); - } else { - assertTrue(newVarCharVector.isNull(i)); - } - } - - newVarCharVector.clear(); - } - } - - private void testTransferInViews(BaseVariableWidthViewVector vector) { - vector.allocateNew(16000, 1000); - - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; - populateBaseVariableWidthViewVector(vector, valueCount, compareArray); - - final TransferPair tp = vector.getTransferPair(allocator); - final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); - tp.transfer(); - - assertEquals(0, vector.valueCount); - assertEquals(valueCount, newVector.valueCount); - - for (int i = 0; i < valueCount; i++) { - final boolean expectedSet = (i % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8); - assertFalse(newVector.isNull(i)); - assertArrayEquals(expectedValue, newVector.get(i)); - } else { - assertTrue(newVector.isNull(i)); - } - } - - newVector.clear(); - } - - @Test - public void testTransferInUtf8Views() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - testTransferInViews(viewVarCharVector); - } - } - - @Test - public void testTransferInBinaryViews() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator)) { - testTransferInViews(viewVarBinaryVector); - } - } - - @Test - public void testCopyValueSafe() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); - final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { - varCharVector.allocateNew(10000, 1000); - - final int valueCount = 500; - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - - // new vector memory is not pre-allocated, we expect copyValueSafe work fine. - for (int i = 0; i < valueCount; i++) { - tp.copyValueSafe(i, i); - } - newVarCharVector.setValueCount(valueCount); - - for (int i = 0; i < valueCount; i++) { - final boolean expectedSet = (i % 3) == 0; - if (expectedSet) { - assertFalse(varCharVector.isNull(i)); - assertFalse(newVarCharVector.isNull(i)); - assertArrayEquals(varCharVector.get(i), newVarCharVector.get(i)); - } else { - assertTrue(newVarCharVector.isNull(i)); - } - } - } - } - - @Test - public void testSplitAndTransferNon() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { - - varCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.getTransferPair(allocator); - VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); - - tp.splitAndTransfer(0, 0); - assertEquals(0, newVarCharVector.getValueCount()); - - newVarCharVector.clear(); - } - } - - private void testSplitAndTransferNonInViews(BaseVariableWidthViewVector vector) { - vector.allocateNew(16000, 1000); - final int valueCount = 500; - populateBaseVariableWidthViewVector(vector, valueCount, null); - - final TransferPair tp = vector.getTransferPair(allocator); - BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); - - tp.splitAndTransfer(0, 0); - assertEquals(0, newVector.getValueCount()); - - newVector.clear(); - } - - @Test - public void testSplitAndTransferNonInUtf8Views() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - testSplitAndTransferNonInViews(viewVarCharVector); - } - } - - @Test - public void testSplitAndTransferNonInBinaryViews() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator)) { - testSplitAndTransferNonInViews(viewVarBinaryVector); - } - } - - @Test - public void testSplitAndTransferAll() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { - - varCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.getTransferPair(allocator); - VarCharVector newVarCharVector = (VarCharVector) tp.getTo(); - - tp.splitAndTransfer(0, valueCount); - assertEquals(valueCount, newVarCharVector.getValueCount()); - - newVarCharVector.clear(); - } - } - - private void testSplitAndTransferAllInViews(BaseVariableWidthViewVector vector) { - vector.allocateNew(16000, 1000); - final int valueCount = 500; - populateBaseVariableWidthViewVector(vector, valueCount, null); - - final TransferPair tp = vector.getTransferPair(allocator); - BaseVariableWidthViewVector newViewVarCharVector = (BaseVariableWidthViewVector) tp.getTo(); - - tp.splitAndTransfer(0, valueCount); - assertEquals(valueCount, newViewVarCharVector.getValueCount()); - - newViewVarCharVector.clear(); - } - - @Test - public void testSplitAndTransferAllInUtf8Views() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - testSplitAndTransferAllInViews(viewVarCharVector); - } - } - - @Test - public void testSplitAndTransferAllInBinaryViews() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator)) { - testSplitAndTransferAllInViews(viewVarBinaryVector); - } - } - - @Test - public void testInvalidStartIndex() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); - final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { - - varCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - - IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10)); - - assertEquals( - "Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); - - newVarCharVector.clear(); - } - } - - private void testInvalidStartIndexInViews( - BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { - vector.allocateNew(16000, 1000); - final int valueCount = 500; - populateBaseVariableWidthViewVector(vector, valueCount, null); - - final TransferPair tp = vector.makeTransferPair(newVector); - - IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10)); - - assertEquals( - "Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); - - newVector.clear(); - } - - @Test - public void testInvalidStartIndexInUtf8Views() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = - new ViewVarCharVector("newvector", allocator)) { - testInvalidStartIndexInViews(viewVarCharVector, newViewVarCharVector); - } - } - - @Test - public void testInvalidStartIndexInBinaryViews() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator); - final ViewVarBinaryVector newViewVarBinaryVector = - new ViewVarBinaryVector("newvector", allocator)) { - testInvalidStartIndexInViews(viewVarBinaryVector, newViewVarBinaryVector); - } - } - - @Test - public void testInvalidLength() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); - final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { - - varCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - - IllegalArgumentException e = - assertThrows( - IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2)); - - assertEquals( - "Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); - - newVarCharVector.clear(); - } - } - - private void testInvalidLengthInViews( - BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { - vector.allocateNew(16000, 1000); - final int valueCount = 500; - populateBaseVariableWidthViewVector(vector, valueCount, null); - - final TransferPair tp = vector.makeTransferPair(newVector); - - IllegalArgumentException e = - assertThrows(IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2)); - - assertEquals( - "Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); - - newVector.clear(); - } - - @Test - public void testInvalidLengthInUtf8Views() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = - new ViewVarCharVector("newvector", allocator)) { - testInvalidLengthInViews(viewVarCharVector, newViewVarCharVector); - } - } - - @Test - public void testInvalidLengthInBinaryViews() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator); - final ViewVarBinaryVector newViewVarBinaryVector = - new ViewVarBinaryVector("newvector", allocator)) { - testInvalidLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); - } - } - - @Test - public void testZeroStartIndexAndLength() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); - final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { - - varCharVector.allocateNew(0, 0); - final int valueCount = 0; - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newVarCharVector.getValueCount()); - - newVarCharVector.clear(); - } - } - - private void testZeroStartIndexAndLengthInViews( - BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { - vector.allocateNew(0, 0); - final int valueCount = 0; - populateBaseVariableWidthViewVector(vector, valueCount, null); - - final TransferPair tp = vector.makeTransferPair(newVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newVector.getValueCount()); - - newVector.clear(); - } - - @Test - public void testZeroStartIndexAndLengthInUtf8Views() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = - new ViewVarCharVector("newvector", allocator)) { - testZeroStartIndexAndLengthInViews(viewVarCharVector, newViewVarCharVector); - } - } - - @Test - public void testZeroStartIndexAndLengthInBinaryViews() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator); - final ViewVarBinaryVector newViewVarBinaryVector = - new ViewVarBinaryVector("newvector", allocator)) { - testZeroStartIndexAndLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); - } - } - - @Test - public void testZeroLength() { - try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); - final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) { - - varCharVector.allocateNew(10000, 1000); - final int valueCount = 500; - populateVarcharVector(varCharVector, valueCount, null); - - final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - - tp.splitAndTransfer(500, 0); - assertEquals(0, newVarCharVector.getValueCount()); - - newVarCharVector.clear(); - } - } - - private void testZeroLengthInViews( - BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { - vector.allocateNew(16000, 1000); - final int valueCount = 500; - populateBaseVariableWidthViewVector(vector, valueCount, null); - - final TransferPair tp = vector.makeTransferPair(newVector); - - tp.splitAndTransfer(500, 0); - assertEquals(0, newVector.getValueCount()); - - newVector.clear(); - } - - @Test - public void testZeroLengthInUtf8Views() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = - new ViewVarCharVector("newvector", allocator)) { - testZeroLengthInViews(viewVarCharVector, newViewVarCharVector); - } - } - - @Test - public void testZeroLengthInBinaryViews() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator); - final ViewVarBinaryVector newViewVarBinaryVector = - new ViewVarBinaryVector("newvector", allocator)) { - testZeroLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); - } - } - - @Test - public void testUnionVectorZeroStartIndexAndLength() { - try (final UnionVector unionVector = UnionVector.empty("myvector", allocator); - final UnionVector newUnionVector = UnionVector.empty("newvector", allocator)) { - - unionVector.allocateNew(); - final int valueCount = 0; - unionVector.setValueCount(valueCount); - - final TransferPair tp = unionVector.makeTransferPair(newUnionVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newUnionVector.getValueCount()); - - newUnionVector.clear(); - } - } - - @Test - public void testFixedWidthVectorZeroStartIndexAndLength() { - try (final IntVector intVector = new IntVector("myvector", allocator); - final IntVector newIntVector = new IntVector("newvector", allocator)) { - - intVector.allocateNew(0); - final int valueCount = 0; - intVector.setValueCount(valueCount); - - final TransferPair tp = intVector.makeTransferPair(newIntVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newIntVector.getValueCount()); - - newIntVector.clear(); - } - } - - @Test - public void testBitVectorZeroStartIndexAndLength() { - try (final BitVector bitVector = new BitVector("myvector", allocator); - final BitVector newBitVector = new BitVector("newvector", allocator)) { - - bitVector.allocateNew(0); - final int valueCount = 0; - bitVector.setValueCount(valueCount); - - final TransferPair tp = bitVector.makeTransferPair(newBitVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newBitVector.getValueCount()); - - newBitVector.clear(); - } - } - - @Test - public void testFixedSizeListVectorZeroStartIndexAndLength() { - try (final FixedSizeListVector listVector = FixedSizeListVector.empty("list", 4, allocator); - final FixedSizeListVector newListVector = - FixedSizeListVector.empty("newList", 4, allocator)) { - - listVector.allocateNew(); - final int valueCount = 0; - listVector.setValueCount(valueCount); - - final TransferPair tp = listVector.makeTransferPair(newListVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newListVector.getValueCount()); - - newListVector.clear(); - } - } - - @Test - public void testListVectorZeroStartIndexAndLength() { - try (final ListVector listVector = ListVector.empty("list", allocator); - final ListVector newListVector = ListVector.empty("newList", allocator)) { - - listVector.allocateNew(); - final int valueCount = 0; - listVector.setValueCount(valueCount); - - final TransferPair tp = listVector.makeTransferPair(newListVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newListVector.getValueCount()); - - newListVector.clear(); - } - } - - @Test - public void testLargeListViewVectorZeroStartIndexAndLength() { - try (final LargeListViewVector listVector = - LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector newListVector = LargeListViewVector.empty("newList", allocator)) { - - listVector.allocateNew(); - final int valueCount = 0; - listVector.setValueCount(valueCount); - - final TransferPair tp = listVector.makeTransferPair(newListVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newListVector.getValueCount()); - - newListVector.clear(); - } - } - - @Test - public void testStructVectorZeroStartIndexAndLength() { - Map metadata = new HashMap<>(); - metadata.put("k1", "v1"); - FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata); - try (final StructVector structVector = new StructVector("structvec", allocator, type, null); - final StructVector newStructVector = - new StructVector("newStructvec", allocator, type, null)) { - - structVector.allocateNew(); - final int valueCount = 0; - structVector.setValueCount(valueCount); - - final TransferPair tp = structVector.makeTransferPair(newStructVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newStructVector.getValueCount()); - - newStructVector.clear(); - } - } - - @Test - public void testMapVectorZeroStartIndexAndLength() { - Map metadata = new HashMap<>(); - metadata.put("k1", "v1"); - FieldType type = new FieldType(true, new ArrowType.Map(false), null, metadata); - try (final MapVector mapVector = new MapVector("mapVec", allocator, type, null); - final MapVector newMapVector = new MapVector("newMapVec", allocator, type, null)) { - - mapVector.allocateNew(); - final int valueCount = 0; - mapVector.setValueCount(valueCount); - - final TransferPair tp = mapVector.makeTransferPair(newMapVector); - - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newMapVector.getValueCount()); - - newMapVector.clear(); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java deleted file mode 100644 index 4ef0fbe2d9932..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.AbstractStructVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.writer.Float8Writer; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.holders.ComplexHolder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestStructVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testFieldMetadata() throws Exception { - Map metadata = new HashMap<>(); - metadata.put("k1", "v1"); - FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata); - try (StructVector vector = new StructVector("struct", allocator, type, null)) { - assertEquals(vector.getField().getMetadata(), type.getMetadata()); - } - } - - @Test - public void testMakeTransferPair() { - try (final StructVector s1 = StructVector.empty("s1", allocator); - final StructVector s2 = StructVector.empty("s2", allocator)) { - s1.addOrGet("struct_child", FieldType.nullable(MinorType.INT.getType()), IntVector.class); - s1.makeTransferPair(s2); - final FieldVector child = s1.getChild("struct_child"); - final FieldVector toChild = - s2.addOrGet("struct_child", child.getField().getFieldType(), child.getClass()); - assertEquals(0, toChild.getValueCapacity()); - assertEquals(0, toChild.getDataBuffer().capacity()); - assertEquals(0, toChild.getValidityBuffer().capacity()); - } - } - - @Test - public void testAllocateAfterReAlloc() throws Exception { - Map metadata = new HashMap<>(); - metadata.put("k1", "v1"); - FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata); - try (StructVector vector = new StructVector("struct", allocator, type, null)) { - MinorType childtype = MinorType.INT; - vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class); - - /* - * Allocate the default size, and then, reAlloc. This should double the allocation. - */ - vector.allocateNewSafe(); // Initial allocation - vector.reAlloc(); // Double the allocation size of self, and all children. - long savedValidityBufferCapacity = vector.getValidityBuffer().capacity(); - int savedValueCapacity = vector.getValueCapacity(); - - /* - * Clear and allocate again. - */ - vector.clear(); - vector.allocateNewSafe(); - - /* - * Verify that the buffer sizes haven't changed. - */ - assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity); - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } - - @Test - public void testReadNullValue() { - Map metadata = new HashMap<>(); - metadata.put("k1", "v1"); - FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata); - try (StructVector vector = new StructVector("struct", allocator, type, null)) { - MinorType childtype = MinorType.INT; - vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class); - vector.setValueCount(2); - - IntVector intVector = (IntVector) vector.getChild("intchild"); - intVector.setSafe(0, 100); - vector.setIndexDefined(0); - intVector.setNull(1); - vector.setNull(1); - - ComplexHolder holder = new ComplexHolder(); - vector.get(0, holder); - assertNotEquals(0, holder.isSet); - assertNotNull(holder.reader); - - vector.get(1, holder); - assertEquals(0, holder.isSet); - assertNull(holder.reader); - } - } - - @Test - public void testGetPrimitiveVectors() { - FieldType type = new FieldType(true, Struct.INSTANCE, null, null); - try (StructVector vector = new StructVector("struct", allocator, type, null)) { - - // add list vector - vector.addOrGet("list", FieldType.nullable(MinorType.LIST.getType()), ListVector.class); - ListVector listVector = vector.addOrGetList("list"); - listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - - // add union vector - vector.addOrGet("union", FieldType.nullable(MinorType.UNION.getType()), UnionVector.class); - UnionVector unionVector = vector.addOrGetUnion("union"); - unionVector.addVector(new BigIntVector("bigInt", allocator)); - unionVector.addVector(new SmallIntVector("smallInt", allocator)); - - // add varchar vector - vector.addOrGet( - "varchar", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class); - - List primitiveVectors = vector.getPrimitiveVectors(); - assertEquals(4, primitiveVectors.size()); - assertEquals(MinorType.INT, primitiveVectors.get(0).getMinorType()); - assertEquals(MinorType.BIGINT, primitiveVectors.get(1).getMinorType()); - assertEquals(MinorType.SMALLINT, primitiveVectors.get(2).getMinorType()); - assertEquals(MinorType.VARCHAR, primitiveVectors.get(3).getMinorType()); - } - } - - @Test - public void testAddOrGetComplexChildVectors() { - FieldType type = new FieldType(true, Struct.INSTANCE, null, null); - try (StructVector vector = new StructVector("struct", allocator, type, null)) { - - vector.addOrGetList("list"); - vector.addOrGetFixedSizeList("fixedList", 2); - vector.addOrGetUnion("union"); - vector.addOrGetStruct("struct"); - vector.addOrGetMap("map", true); - - List children = vector.getChildrenFromFields(); - assertEquals(5, children.size()); - assertEquals(MinorType.LIST, children.get(0).getMinorType()); - assertEquals(MinorType.FIXED_SIZE_LIST, children.get(1).getMinorType()); - assertEquals(MinorType.UNION, children.get(2).getMinorType()); - assertEquals(MinorType.STRUCT, children.get(3).getMinorType()); - assertEquals(MinorType.MAP, children.get(4).getMinorType()); - } - } - - @Test - public void testAddChildVectorsWithDuplicatedFieldNamesForConflictPolicyAppend() { - final FieldType type = new FieldType(true, Struct.INSTANCE, null, null); - try (StructVector vector = - new StructVector( - "struct", - allocator, - type, - null, - AbstractStructVector.ConflictPolicy.CONFLICT_APPEND, - true)) { - final List initFields = new ArrayList<>(); - - // Add a bit more fields to test against stability of the internal field - // ordering mechanism of StructVector - initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("int1", MinorType.INT.getType())); - initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("int2", MinorType.INT.getType())); - initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("int3", MinorType.INT.getType())); - initFields.add(Field.nullable("uncertain-type", MinorType.INT.getType())); - - // To ensure duplicated field names don't mess up the original field order - // in the struct vector - initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("uncertain-type", MinorType.VARCHAR.getType())); - - vector.initializeChildrenFromFields(initFields); - - List children = vector.getChildrenFromFields(); - assertEquals(11, children.size()); - assertEquals("varchar1", children.get(0).getName()); - assertEquals("int1", children.get(1).getName()); - assertEquals("varchar2", children.get(2).getName()); - assertEquals("int2", children.get(3).getName()); - assertEquals("varchar3", children.get(4).getName()); - assertEquals("int3", children.get(5).getName()); - assertEquals("uncertain-type", children.get(6).getName()); - assertEquals("varchar1", children.get(7).getName()); - assertEquals("varchar2", children.get(8).getName()); - assertEquals("varchar3", children.get(9).getName()); - assertEquals("uncertain-type", children.get(10).getName()); - assertEquals(MinorType.VARCHAR, children.get(0).getMinorType()); - assertEquals(MinorType.INT, children.get(1).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(2).getMinorType()); - assertEquals(MinorType.INT, children.get(3).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(4).getMinorType()); - assertEquals(MinorType.INT, children.get(5).getMinorType()); - assertEquals(MinorType.INT, children.get(6).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(7).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(8).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(9).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(10).getMinorType()); - } - } - - @Test - public void testAddChildVectorsWithDuplicatedFieldNamesForConflictPolicyReplace() { - final FieldType type = new FieldType(true, Struct.INSTANCE, null, null); - try (StructVector vector = - new StructVector( - "struct", - allocator, - type, - null, - AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, - true)) { - final List initFields = new ArrayList<>(); - - // Add a bit more fields to test against stability of the internal field - // ordering mechanism of StructVector - initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("int1", MinorType.INT.getType())); - initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("int2", MinorType.INT.getType())); - initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("int3", MinorType.INT.getType())); - initFields.add(Field.nullable("uncertain-type", MinorType.INT.getType())); - - // To ensure duplicated field names don't mess up the original field order - // in the struct vector - initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType())); - initFields.add(Field.nullable("uncertain-type", MinorType.VARCHAR.getType())); - - vector.initializeChildrenFromFields(initFields); - - List children = vector.getChildrenFromFields(); - assertEquals(7, children.size()); - assertEquals("varchar1", children.get(0).getName()); - assertEquals("int1", children.get(1).getName()); - assertEquals("varchar2", children.get(2).getName()); - assertEquals("int2", children.get(3).getName()); - assertEquals("varchar3", children.get(4).getName()); - assertEquals("int3", children.get(5).getName()); - assertEquals("uncertain-type", children.get(6).getName()); - assertEquals(MinorType.VARCHAR, children.get(0).getMinorType()); - assertEquals(MinorType.INT, children.get(1).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(2).getMinorType()); - assertEquals(MinorType.INT, children.get(3).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(4).getMinorType()); - assertEquals(MinorType.INT, children.get(5).getMinorType()); - assertEquals(MinorType.VARCHAR, children.get(6).getMinorType()); - } - } - - @Test - public void testTypedGetters() { - try (final StructVector s1 = StructVector.empty("s1", allocator)) { - s1.addOrGet("struct_child", FieldType.nullable(MinorType.INT.getType()), IntVector.class); - assertEquals(IntVector.class, s1.getChild("struct_child", IntVector.class).getClass()); - assertEquals(IntVector.class, s1.getVectorById(0, IntVector.class).getClass()); - } - } - - @Test - public void testGetTransferPair() { - try (final StructVector fromVector = simpleStructVector("s1", allocator)) { - TransferPair tp = fromVector.getTransferPair(fromVector.getField(), allocator); - final StructVector toVector = (StructVector) tp.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(toVector.getField(), fromVector.getField()); - toVector.clear(); - } - } - - @Test - public void testGetTransferPairWithFieldAndCallBack() { - SchemaChangeCallBack callBack = new SchemaChangeCallBack(); - try (final StructVector fromVector = simpleStructVector("s1", allocator)) { - TransferPair tp = fromVector.getTransferPair(fromVector.getField(), allocator, callBack); - final StructVector toVector = (StructVector) tp.getTo(); - // Field inside a new vector created by reusing a field should be the same in memory as the - // original field. - assertSame(toVector.getField(), fromVector.getField()); - toVector.clear(); - } - } - - private StructVector simpleStructVector(String name, BufferAllocator allocator) { - final String INT_COL = "struct_int_child"; - final String FLT_COL = "struct_flt_child"; - StructVector structVector = StructVector.empty(name, allocator); - final int size = 6; // number of structs - - NullableStructWriter structWriter = structVector.getWriter(); - structVector.addOrGet( - INT_COL, FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class); - structVector.addOrGet( - FLT_COL, FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class); - structVector.allocateNew(); - IntWriter intWriter = structWriter.integer(INT_COL); - Float8Writer float8Writer = structWriter.float8(FLT_COL); - - for (int i = 0; i < size; i++) { - structWriter.setPosition(i); - structWriter.start(); - intWriter.writeInt(i); - float8Writer.writeFloat8(i * .1); - structWriter.end(); - } - - structWriter.setValueCount(size); - - return structVector; - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java deleted file mode 100644 index 75c6df1d6a496..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.Random; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestTypeLayout { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testTypeBufferCount() { - ArrowType type = new ArrowType.Int(8, true); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Union(UnionMode.Sparse, new int[2]); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Union(UnionMode.Dense, new int[1]); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Struct(); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.List(); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.FixedSizeList(5); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Map(false); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Decimal(10, 10, 128); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Decimal(10, 10, 256); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.FixedSizeBinary(5); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Bool(); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Binary(); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Utf8(); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Null(); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Date(DateUnit.DAY); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Interval(IntervalUnit.DAY_TIME); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - - type = new ArrowType.Duration(TimeUnit.MILLISECOND); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - } - - private String generateRandomString(int length) { - Random random = new Random(); - StringBuilder sb = new StringBuilder(length); - for (int i = 0; i < length; i++) { - sb.append(random.nextInt(10)); // 0-9 - } - return sb.toString(); - } - - @Test - public void testTypeBufferCountInVectorsWithVariadicBuffers() { - // empty vector - try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - ArrowType type = viewVarCharVector.getMinorType().getType(); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - } - // vector with long strings - try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(32, 6); - - viewVarCharVector.setSafe(0, generateRandomString(8).getBytes()); - viewVarCharVector.setSafe(1, generateRandomString(12).getBytes()); - viewVarCharVector.setSafe(2, generateRandomString(14).getBytes()); - viewVarCharVector.setSafe(3, generateRandomString(18).getBytes()); - viewVarCharVector.setSafe(4, generateRandomString(22).getBytes()); - viewVarCharVector.setSafe(5, generateRandomString(24).getBytes()); - - viewVarCharVector.setValueCount(6); - - ArrowType type = viewVarCharVector.getMinorType().getType(); - assertEquals( - TypeLayout.getTypeBufferCount(type), - TypeLayout.getTypeLayout(type).getBufferLayouts().size()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java deleted file mode 100644 index 6c05073c16844..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java +++ /dev/null @@ -1,553 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.VectorWithOrdinal; -import org.apache.arrow.vector.complex.impl.UnionWriter; -import org.apache.arrow.vector.holders.NullableBitHolder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestUnionVector { - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testUnionVector() throws Exception { - - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 100; - uInt4Holder.isSet = 1; - - try (UnionVector unionVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - unionVector.allocateNew(); - - // write some data - unionVector.setType(0, MinorType.UINT4); - unionVector.setSafe(0, uInt4Holder); - unionVector.setType(2, MinorType.UINT4); - unionVector.setSafe(2, uInt4Holder); - unionVector.setValueCount(4); - - // check that what we wrote is correct - assertEquals(4, unionVector.getValueCount()); - - assertEquals(false, unionVector.isNull(0)); - assertEquals(100, unionVector.getObject(0)); - - assertNull(unionVector.getObject(1)); - - assertEquals(false, unionVector.isNull(2)); - assertEquals(100, unionVector.getObject(2)); - - assertNull(unionVector.getObject(3)); - } - } - - @Test - public void testUnionVectorMapValue() throws Exception { - try (UnionVector unionVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - unionVector.allocateNew(); - - UnionWriter writer = (UnionWriter) unionVector.getWriter(); - - // populate map vector with the following two records - // [ - // null, - // [[1: 2], [3: 4], [5: null]] - // ] - - writer.setPosition(0); - writer.writeNull(); - - writer.setPosition(1); - writer.startMap(); - - writer.startEntry(); - writer.key().integer().writeInt(1); - writer.value().integer().writeInt(2); - writer.endEntry(); - - writer.startEntry(); - writer.key().integer().writeInt(3); - writer.value().integer().writeInt(4); - writer.endEntry(); - - writer.startEntry(); - writer.key().integer().writeInt(5); - writer.endEntry(); - - writer.endMap(); - - unionVector.setValueCount(2); - - // check that what we wrote is correct - assertEquals(2, unionVector.getValueCount()); - - // first entry - assertNull(unionVector.getObject(0)); - - // second entry - List> resultList = (List>) unionVector.getObject(1); - assertEquals(3, resultList.size()); - - Map resultMap = resultList.get(0); - assertEquals(1, (int) resultMap.get(MapVector.KEY_NAME)); - assertEquals(2, (int) resultMap.get(MapVector.VALUE_NAME)); - - resultMap = resultList.get(1); - assertEquals(3, (int) resultMap.get(MapVector.KEY_NAME)); - assertEquals(4, (int) resultMap.get(MapVector.VALUE_NAME)); - - resultMap = resultList.get(2); - assertEquals(5, (int) resultMap.get(MapVector.KEY_NAME)); - assertNull(resultMap.get(MapVector.VALUE_NAME)); - } - } - - @Test - public void testTransfer() throws Exception { - try (UnionVector srcVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - srcVector.allocateNew(); - - // write some data - srcVector.setType(0, MinorType.INT); - srcVector.setSafe(0, newIntHolder(5)); - srcVector.setType(1, MinorType.BIT); - srcVector.setSafe(1, newBitHolder(false)); - srcVector.setType(3, MinorType.INT); - srcVector.setSafe(3, newIntHolder(10)); - srcVector.setType(5, MinorType.BIT); - srcVector.setSafe(5, newBitHolder(false)); - srcVector.setValueCount(6); - - try (UnionVector destVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - TransferPair pair = srcVector.makeTransferPair(destVector); - - // Creating the transfer should transfer the type of the field at least. - assertEquals(srcVector.getField(), destVector.getField()); - - // transfer - pair.transfer(); - - assertEquals(srcVector.getField(), destVector.getField()); - - // now check the values are transferred - assertEquals(6, destVector.getValueCount()); - - assertFalse(destVector.isNull(0)); - assertEquals(5, destVector.getObject(0)); - - assertFalse(destVector.isNull(1)); - assertEquals(false, destVector.getObject(1)); - - assertNull(destVector.getObject(2)); - - assertFalse(destVector.isNull(3)); - assertEquals(10, destVector.getObject(3)); - - assertNull(destVector.getObject(4)); - - assertFalse(destVector.isNull(5)); - assertEquals(false, destVector.getObject(5)); - } - } - } - - @Test - public void testSplitAndTransfer() throws Exception { - try (UnionVector sourceVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - - sourceVector.allocateNew(); - - /* populate the UnionVector */ - sourceVector.setType(0, MinorType.INT); - sourceVector.setSafe(0, newIntHolder(5)); - sourceVector.setType(1, MinorType.INT); - sourceVector.setSafe(1, newIntHolder(10)); - sourceVector.setType(2, MinorType.INT); - sourceVector.setSafe(2, newIntHolder(15)); - sourceVector.setType(3, MinorType.INT); - sourceVector.setSafe(3, newIntHolder(20)); - sourceVector.setType(4, MinorType.INT); - sourceVector.setSafe(4, newIntHolder(25)); - sourceVector.setType(5, MinorType.INT); - sourceVector.setSafe(5, newIntHolder(30)); - sourceVector.setType(6, MinorType.INT); - sourceVector.setSafe(6, newIntHolder(35)); - sourceVector.setType(7, MinorType.INT); - sourceVector.setSafe(7, newIntHolder(40)); - sourceVector.setType(8, MinorType.INT); - sourceVector.setSafe(8, newIntHolder(45)); - sourceVector.setType(9, MinorType.INT); - sourceVector.setSafe(9, newIntHolder(50)); - sourceVector.setValueCount(10); - - /* check the vector output */ - assertEquals(10, sourceVector.getValueCount()); - assertEquals(false, sourceVector.isNull(0)); - assertEquals(5, sourceVector.getObject(0)); - assertEquals(false, sourceVector.isNull(1)); - assertEquals(10, sourceVector.getObject(1)); - assertEquals(false, sourceVector.isNull(2)); - assertEquals(15, sourceVector.getObject(2)); - assertEquals(false, sourceVector.isNull(3)); - assertEquals(20, sourceVector.getObject(3)); - assertEquals(false, sourceVector.isNull(4)); - assertEquals(25, sourceVector.getObject(4)); - assertEquals(false, sourceVector.isNull(5)); - assertEquals(30, sourceVector.getObject(5)); - assertEquals(false, sourceVector.isNull(6)); - assertEquals(35, sourceVector.getObject(6)); - assertEquals(false, sourceVector.isNull(7)); - assertEquals(40, sourceVector.getObject(7)); - assertEquals(false, sourceVector.isNull(8)); - assertEquals(45, sourceVector.getObject(8)); - assertEquals(false, sourceVector.isNull(9)); - assertEquals(50, sourceVector.getObject(9)); - - try (UnionVector toVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - - final TransferPair transferPair = sourceVector.makeTransferPair(toVector); - - final int[][] transferLengths = {{0, 3}, {3, 1}, {4, 2}, {6, 1}, {7, 1}, {8, 2}}; - - for (final int[] transferLength : transferLengths) { - final int start = transferLength[0]; - final int length = transferLength[1]; - - transferPair.splitAndTransfer(start, length); - - /* check the toVector output after doing the splitAndTransfer */ - for (int i = 0; i < length; i++) { - assertEquals( - sourceVector.getObject(start + i), - toVector.getObject(i), - "Different data at indexes: " + (start + i) + "and " + i); - } - } - } - } - } - - @Test - public void testSplitAndTransferWithMixedVectors() throws Exception { - try (UnionVector sourceVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - - sourceVector.allocateNew(); - - /* populate the UnionVector */ - sourceVector.setType(0, MinorType.INT); - sourceVector.setSafe(0, newIntHolder(5)); - - sourceVector.setType(1, MinorType.FLOAT4); - sourceVector.setSafe(1, newFloat4Holder(5.5f)); - - sourceVector.setType(2, MinorType.INT); - sourceVector.setSafe(2, newIntHolder(10)); - - sourceVector.setType(3, MinorType.FLOAT4); - sourceVector.setSafe(3, newFloat4Holder(10.5f)); - - sourceVector.setType(4, MinorType.INT); - sourceVector.setSafe(4, newIntHolder(15)); - - sourceVector.setType(5, MinorType.FLOAT4); - sourceVector.setSafe(5, newFloat4Holder(15.5f)); - - sourceVector.setType(6, MinorType.INT); - sourceVector.setSafe(6, newIntHolder(20)); - - sourceVector.setType(7, MinorType.FLOAT4); - sourceVector.setSafe(7, newFloat4Holder(20.5f)); - - sourceVector.setType(8, MinorType.INT); - sourceVector.setSafe(8, newIntHolder(30)); - - sourceVector.setType(9, MinorType.FLOAT4); - sourceVector.setSafe(9, newFloat4Holder(30.5f)); - sourceVector.setValueCount(10); - - /* check the vector output */ - assertEquals(10, sourceVector.getValueCount()); - assertEquals(false, sourceVector.isNull(0)); - assertEquals(5, sourceVector.getObject(0)); - assertEquals(false, sourceVector.isNull(1)); - assertEquals(5.5f, sourceVector.getObject(1)); - assertEquals(false, sourceVector.isNull(2)); - assertEquals(10, sourceVector.getObject(2)); - assertEquals(false, sourceVector.isNull(3)); - assertEquals(10.5f, sourceVector.getObject(3)); - assertEquals(false, sourceVector.isNull(4)); - assertEquals(15, sourceVector.getObject(4)); - assertEquals(false, sourceVector.isNull(5)); - assertEquals(15.5f, sourceVector.getObject(5)); - assertEquals(false, sourceVector.isNull(6)); - assertEquals(20, sourceVector.getObject(6)); - assertEquals(false, sourceVector.isNull(7)); - assertEquals(20.5f, sourceVector.getObject(7)); - assertEquals(false, sourceVector.isNull(8)); - assertEquals(30, sourceVector.getObject(8)); - assertEquals(false, sourceVector.isNull(9)); - assertEquals(30.5f, sourceVector.getObject(9)); - - try (UnionVector toVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - - final TransferPair transferPair = sourceVector.makeTransferPair(toVector); - - final int[][] transferLengths = {{0, 2}, {2, 1}, {3, 2}, {5, 3}, {8, 2}}; - - for (final int[] transferLength : transferLengths) { - final int start = transferLength[0]; - final int length = transferLength[1]; - - transferPair.splitAndTransfer(start, length); - - /* check the toVector output after doing the splitAndTransfer */ - for (int i = 0; i < length; i++) { - assertEquals( - sourceVector.getObject(start + i), - toVector.getObject(i), - "Different values at index: " + i); - } - } - } - } - } - - @Test - public void testGetFieldTypeInfo() throws Exception { - Map metadata = new HashMap<>(); - metadata.put("key1", "value1"); - - int[] typeIds = new int[2]; - typeIds[0] = MinorType.INT.ordinal(); - typeIds[1] = MinorType.VARCHAR.ordinal(); - - List children = new ArrayList<>(); - children.add(new Field("int", FieldType.nullable(MinorType.INT.getType()), null)); - children.add(new Field("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), null)); - - final FieldType fieldType = - new FieldType( - false, new ArrowType.Union(UnionMode.Sparse, typeIds), /*dictionary=*/ null, metadata); - final Field field = new Field("union", fieldType, children); - - MinorType minorType = MinorType.UNION; - UnionVector vector = (UnionVector) minorType.getNewVector(field, allocator, null); - vector.initializeChildrenFromFields(children); - - assertTrue(vector.getField().equals(field)); - - // Union has 2 child vectors - assertEquals(2, vector.size()); - - // Check child field 0 - VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int"); - assertEquals(0, intChild.ordinal); - assertEquals(intChild.vector.getField(), children.get(0)); - - // Check child field 1 - VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar"); - assertEquals(1, varcharChild.ordinal); - assertEquals(varcharChild.vector.getField(), children.get(1)); - } - - @Test - public void testGetBufferAddress() throws Exception { - try (UnionVector vector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - boolean error = false; - - vector.allocateNew(); - - /* populate the UnionVector */ - vector.setType(0, MinorType.INT); - vector.setSafe(0, newIntHolder(5)); - - vector.setType(1, MinorType.FLOAT4); - vector.setSafe(1, newFloat4Holder(5.5f)); - - vector.setType(2, MinorType.INT); - vector.setSafe(2, newIntHolder(10)); - - vector.setType(3, MinorType.FLOAT4); - vector.setSafe(3, newFloat4Holder(10.5f)); - - vector.setValueCount(10); - - /* check the vector output */ - assertEquals(10, vector.getValueCount()); - assertEquals(false, vector.isNull(0)); - assertEquals(5, vector.getObject(0)); - assertEquals(false, vector.isNull(1)); - assertEquals(5.5f, vector.getObject(1)); - assertEquals(false, vector.isNull(2)); - assertEquals(10, vector.getObject(2)); - assertEquals(false, vector.isNull(3)); - assertEquals(10.5f, vector.getObject(3)); - - List buffers = vector.getFieldBuffers(); - - try { - vector.getOffsetBufferAddress(); - } catch (UnsupportedOperationException ue) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - try { - vector.getDataBufferAddress(); - } catch (UnsupportedOperationException ue) { - error = true; - } finally { - assertTrue(error); - } - - assertEquals(1, buffers.size()); - } - } - - @Test - public void testSetGetNull() { - try (UnionVector srcVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - srcVector.allocateNew(); - - final NullableIntHolder holder = new NullableIntHolder(); - holder.isSet = 1; - holder.value = 5; - - // write some data - srcVector.setType(0, MinorType.INT); - srcVector.setSafe(0, holder); - - assertFalse(srcVector.isNull(0)); - - holder.isSet = 0; - srcVector.setSafe(0, holder); - - assertNull(srcVector.getObject(0)); - } - } - - @Test - public void testCreateNewVectorWithoutTypeExceptionThrown() { - try (UnionVector vector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - IllegalArgumentException e1 = - assertThrows(IllegalArgumentException.class, () -> vector.getTimeStampMilliTZVector()); - assertEquals( - "No TimeStampMilliTZ present. Provide ArrowType argument to create a new vector", - e1.getMessage()); - - IllegalArgumentException e2 = - assertThrows(IllegalArgumentException.class, () -> vector.getDurationVector()); - assertEquals( - "No Duration present. Provide ArrowType argument to create a new vector", - e2.getMessage()); - - IllegalArgumentException e3 = - assertThrows(IllegalArgumentException.class, () -> vector.getFixedSizeBinaryVector()); - assertEquals( - "No FixedSizeBinary present. Provide ArrowType argument to create a new vector", - e3.getMessage()); - - IllegalArgumentException e4 = - assertThrows(IllegalArgumentException.class, () -> vector.getDecimalVector()); - assertEquals( - "No Decimal present. Provide ArrowType argument to create a new vector", e4.getMessage()); - } - } - - private static NullableIntHolder newIntHolder(int value) { - final NullableIntHolder holder = new NullableIntHolder(); - holder.isSet = 1; - holder.value = value; - return holder; - } - - private static NullableBitHolder newBitHolder(boolean value) { - final NullableBitHolder holder = new NullableBitHolder(); - holder.isSet = 1; - holder.value = value ? 1 : 0; - return holder; - } - - private static NullableFloat4Holder newFloat4Holder(float value) { - final NullableFloat4Holder holder = new NullableFloat4Holder(); - holder.isSet = 1; - holder.value = value; - return holder; - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java deleted file mode 100644 index 3845652ad0280..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; - -public class TestUtils { - - public static VarCharVector newVarCharVector(String name, BufferAllocator allocator) { - return (VarCharVector) - FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector(name, allocator, null); - } - - public static ViewVarCharVector newViewVarCharVector(String name, BufferAllocator allocator) { - return (ViewVarCharVector) - FieldType.nullable(new ArrowType.Utf8View()).createNewSingleVector(name, allocator, null); - } - - public static VarBinaryVector newVarBinaryVector(String name, BufferAllocator allocator) { - return (VarBinaryVector) - FieldType.nullable(new ArrowType.Binary()).createNewSingleVector(name, allocator, null); - } - - public static ViewVarBinaryVector newViewVarBinaryVector(String name, BufferAllocator allocator) { - return (ViewVarBinaryVector) - FieldType.nullable(new ArrowType.BinaryView()).createNewSingleVector(name, allocator, null); - } - - public static T newVector( - Class c, String name, ArrowType type, BufferAllocator allocator) { - return c.cast(FieldType.nullable(type).createNewSingleVector(name, allocator, null)); - } - - public static T newVector( - Class c, String name, MinorType type, BufferAllocator allocator) { - return c.cast(FieldType.nullable(type.getType()).createNewSingleVector(name, allocator, null)); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java deleted file mode 100644 index 83e470ae2581d..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ /dev/null @@ -1,3749 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.TestUtils.newVarBinaryVector; -import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.apache.arrow.vector.TestUtils.newVector; -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.ByteBuffer; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.rounding.DefaultRoundingPolicy; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.holders.NullableVarBinaryHolder; -import org.apache.arrow.vector.holders.NullableVarCharHolder; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.OversizedAllocationException; -import org.apache.arrow.vector.util.ReusableByteArray; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestValueVector { - - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - private static final Charset utf8Charset = StandardCharsets.UTF_8; - private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset); - private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); - private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); - private static final byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset); - private static final byte[] STR5 = "EEE5".getBytes(utf8Charset); - private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset); - private static final int MAX_VALUE_COUNT = - (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7); - private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2); - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - /* - * Tests for Fixed-Width vectors - * - * Covered types as of now - * - * -- UInt4Vector - * -- IntVector - * -- Float4Vector - * -- Float8Vector - * - * -- UInt4Vector - * -- IntVector - * -- Float4Vector - * - * TODO: - * - * -- SmallIntVector - * -- BigIntVector - * -- TinyIntVector - */ - - @Test /* UInt4Vector */ - public void testFixedType1() { - - // Create a new value vector for 1024 integers. - try (final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator)) { - - boolean error = false; - - vector.allocateNew(1024); - int initialCapacity = vector.getValueCapacity(); - assertTrue(initialCapacity >= 1024); - - // Put and set a few values - vector.setSafe(0, 100); - vector.setSafe(1, 101); - vector.setSafe(100, 102); - vector.setSafe(1022, 103); - vector.setSafe(1023, 104); - - assertEquals(100, vector.get(0)); - assertEquals(101, vector.get(1)); - assertEquals(102, vector.get(100)); - assertEquals(103, vector.get(1022)); - assertEquals(104, vector.get(1023)); - - try { - vector.set(initialCapacity, 10000); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - try { - vector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* this should trigger a realloc() */ - vector.setSafe(initialCapacity, 10000); - - /* underlying buffer should now be able to store double the number of values */ - assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - - /* check vector data after realloc */ - assertEquals(100, vector.get(0)); - assertEquals(101, vector.get(1)); - assertEquals(102, vector.get(100)); - assertEquals(103, vector.get(1022)); - assertEquals(104, vector.get(1023)); - assertEquals(10000, vector.get(initialCapacity)); - - /* reset the vector */ - int capacityBeforeReset = vector.getValueCapacity(); - vector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeReset, vector.getValueCapacity()); - - /* vector data should have been zeroed out */ - for (int i = 0; i < capacityBeforeReset; i++) { - // TODO: test vector.get(i) is 0 after unsafe get added - assertTrue(vector.isNull(i), "non-zero data not expected at index: " + i); - } - } - } - - @Test - public void testNoOverFlowWithUINT() { - try (final UInt8Vector uInt8Vector = new UInt8Vector("uint8", allocator); - final UInt4Vector uInt4Vector = new UInt4Vector("uint4", allocator); - final UInt1Vector uInt1Vector = new UInt1Vector("uint1", allocator)) { - - long[] longValues = new long[] {Long.MIN_VALUE, Long.MAX_VALUE, -1L}; - uInt8Vector.allocateNew(3); - uInt8Vector.setValueCount(3); - for (int i = 0; i < longValues.length; i++) { - uInt8Vector.set(i, longValues[i]); - long readValue = uInt8Vector.getObjectNoOverflow(i).longValue(); - assertEquals(readValue, longValues[i]); - } - - int[] intValues = new int[] {Integer.MIN_VALUE, Integer.MAX_VALUE, -1}; - uInt4Vector.allocateNew(3); - uInt4Vector.setValueCount(3); - for (int i = 0; i < intValues.length; i++) { - uInt4Vector.set(i, intValues[i]); - int actualValue = (int) UInt4Vector.getNoOverflow(uInt4Vector.getDataBuffer(), i); - assertEquals(intValues[i], actualValue); - } - - byte[] byteValues = new byte[] {Byte.MIN_VALUE, Byte.MAX_VALUE, -1}; - uInt1Vector.allocateNew(3); - uInt1Vector.setValueCount(3); - for (int i = 0; i < byteValues.length; i++) { - uInt1Vector.set(i, byteValues[i]); - byte actualValue = (byte) UInt1Vector.getNoOverflow(uInt1Vector.getDataBuffer(), i); - assertEquals(byteValues[i], actualValue); - } - } - } - - @Test /* IntVector */ - public void testFixedType2() { - try (final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - boolean error = false; - int initialCapacity = 16; - - /* we should not throw exception for these values of capacity */ - intVector.setInitialCapacity(MAX_VALUE_COUNT - 1); - intVector.setInitialCapacity(MAX_VALUE_COUNT); - - try { - intVector.setInitialCapacity(MAX_VALUE_COUNT * 2); - } catch (OversizedAllocationException oe) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - intVector.setInitialCapacity(initialCapacity); - /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ - assertEquals(0, intVector.getValueCapacity()); - - /* allocate 64 bytes (16 * 4) */ - intVector.allocateNew(); - /* underlying buffer should be able to store 16 values */ - assertTrue(intVector.getValueCapacity() >= initialCapacity); - initialCapacity = intVector.getValueCapacity(); - - /* populate the vector */ - int j = 1; - for (int i = 0; i < initialCapacity; i += 2) { - intVector.set(i, j); - j++; - } - - try { - intVector.set(initialCapacity, j); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* check vector contents */ - j = 1; - for (int i = 0; i < initialCapacity; i += 2) { - assertEquals(j, intVector.get(i), "unexpected value at index: " + i); - j++; - } - - try { - intVector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* this should trigger a realloc() */ - intVector.setSafe(initialCapacity, j); - - /* underlying buffer should now be able to store double the number of values */ - assertTrue(intVector.getValueCapacity() >= initialCapacity * 2); - - /* vector data should still be intact after realloc */ - j = 1; - for (int i = 0; i <= initialCapacity; i += 2) { - assertEquals(j, intVector.get(i), "unexpected value at index: " + i); - j++; - } - - /* reset the vector */ - int capacityBeforeRealloc = intVector.getValueCapacity(); - intVector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeRealloc, intVector.getValueCapacity()); - - /* vector data should have been zeroed out */ - for (int i = 0; i < capacityBeforeRealloc; i++) { - assertTrue(intVector.isNull(i), "non-zero data not expected at index: " + i); - } - } - } - - @Test /* VarCharVector */ - public void testSizeOfValueBuffer() { - try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - int valueCount = 100; - int currentSize = 0; - vector.setInitialCapacity(valueCount); - vector.allocateNew(); - vector.setValueCount(valueCount); - for (int i = 0; i < valueCount; i++) { - currentSize += i; - vector.setSafe(i, new byte[i]); - } - - assertEquals(currentSize, vector.sizeOfValueBuffer()); - } - } - - @Test - public void testFixedFloat2() { - try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) { - boolean error = false; - int initialCapacity = 16; - - /* we should not throw exception for these values of capacity */ - floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); - floatVector.setInitialCapacity(MAX_VALUE_COUNT); - - try { - floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4); - } catch (OversizedAllocationException oe) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - floatVector.setInitialCapacity(initialCapacity); - /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ - assertEquals(0, floatVector.getValueCapacity()); - - /* allocate 32 bytes (16 * 2) */ - floatVector.allocateNew(); - /* underlying buffer should be able to store 16 values */ - assertTrue(floatVector.getValueCapacity() >= initialCapacity); - initialCapacity = floatVector.getValueCapacity(); - - floatVector.zeroVector(); - - /* populate the floatVector */ - floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f) - floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f) - floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f) - floatVector.set(6, (short) 0x901d); // Float16.toFloat16(-0.000502109527588f) - floatVector.set(8, (short) 0x121c); // Float16.toFloat16(+0.00074577331543f) - floatVector.set(10, (short) 0x921c); // Float16.toFloat16(-0.00074577331543f) - floatVector.set(12, (short) 0x501c); // Float16.toFloat16(+32.875f) - floatVector.set(14, (short) 0xd01c); // Float16.toFloat16(-32.875f) - - try { - floatVector.set(initialCapacity, (short) 0x141c); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* check vector contents */ - assertEquals((short) 0x101c, floatVector.get(0)); - assertEquals((short) 0x901c, floatVector.get(2)); - assertEquals((short) 0x101d, floatVector.get(4)); - assertEquals((short) 0x901d, floatVector.get(6)); - assertEquals((short) 0x121c, floatVector.get(8)); - assertEquals((short) 0x921c, floatVector.get(10)); - assertEquals((short) 0x501c, floatVector.get(12)); - assertEquals((short) 0xd01c, floatVector.get(14)); - - try { - floatVector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - } - - /* this should trigger a realloc() */ - floatVector.setSafe(initialCapacity, (short) 0x141c); // Float16.toFloat16(+0.00100326538086f) - - /* underlying buffer should now be able to store double the number of values */ - assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); - - /* vector data should still be intact after realloc */ - assertEquals((short) 0x101c, floatVector.get(0)); - assertEquals((short) 0x901c, floatVector.get(2)); - assertEquals((short) 0x101d, floatVector.get(4)); - assertEquals((short) 0x901d, floatVector.get(6)); - assertEquals((short) 0x121c, floatVector.get(8)); - assertEquals((short) 0x921c, floatVector.get(10)); - assertEquals((short) 0x501c, floatVector.get(12)); - assertEquals((short) 0xd01c, floatVector.get(14)); - assertEquals((short) 0x141c, floatVector.get(initialCapacity)); - - /* reset the vector */ - int capacityBeforeReset = floatVector.getValueCapacity(); - floatVector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); - - /* vector data should be zeroed out */ - for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); - } - } - } - - @Test - public void testFixedFloat2WithPossibleTruncate() { - try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) { - boolean error = false; - int initialCapacity = 16; - - /* we should not throw exception for these values of capacity */ - floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); - floatVector.setInitialCapacity(MAX_VALUE_COUNT); - - try { - floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4); - } catch (OversizedAllocationException oe) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - floatVector.setInitialCapacity(initialCapacity); - /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ - assertEquals(0, floatVector.getValueCapacity()); - - /* allocate 32 bytes (16 * 2) */ - floatVector.allocateNew(); - /* underlying buffer should be able to store 16 values */ - assertTrue(floatVector.getValueCapacity() >= initialCapacity); - initialCapacity = floatVector.getValueCapacity(); - - floatVector.zeroVector(); - - /* populate the floatVector */ - floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f) - floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f) - floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f) - floatVector.setWithPossibleTruncate(6, 2049.0f); // in f32=2049.000000, out f16=2048 - floatVector.setWithPossibleTruncate(8, 4098.0f); // in f32=4098.000000, out f16=4096 - floatVector.setWithPossibleTruncate(10, 8196.0f); // in f32=8196.000000, out f16=8192 - floatVector.setWithPossibleTruncate(12, 16392.0f); // in f32=16392.000000, out f16=16384 - floatVector.setWithPossibleTruncate(14, 32784.0f); // in f32=32784.000000, out f16=32768 - - try { - floatVector.setWithPossibleTruncate( - initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641 - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* check vector contents */ - assertEquals((short) 0x101c, floatVector.get(0)); - assertEquals((short) 0x901c, floatVector.get(2)); - assertEquals((short) 0x101d, floatVector.get(4)); - assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0); - assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0); - assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0); - assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0); - assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0); - - try { - floatVector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - } - - /* this should trigger a realloc() */ - floatVector.setSafeWithPossibleTruncate( - initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641 - - /* underlying buffer should now be able to store double the number of values */ - assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); - - /* vector data should still be intact after realloc */ - assertEquals((short) 0x101c, floatVector.get(0)); - assertEquals((short) 0x901c, floatVector.get(2)); - assertEquals((short) 0x101d, floatVector.get(4)); - assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0); - assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0); - assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0); - assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0); - assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0); - assertEquals(1.6181641f, floatVector.getValueAsDouble(initialCapacity), 0); - - /* reset the vector */ - int capacityBeforeReset = floatVector.getValueCapacity(); - floatVector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); - - /* vector data should be zeroed out */ - for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); - } - } - } - - @Test /* Float4Vector */ - public void testFixedType3() { - try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) { - boolean error = false; - int initialCapacity = 16; - - /* we should not throw exception for these values of capacity */ - floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); - floatVector.setInitialCapacity(MAX_VALUE_COUNT); - - try { - floatVector.setInitialCapacity(MAX_VALUE_COUNT * 2); - } catch (OversizedAllocationException oe) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - floatVector.setInitialCapacity(initialCapacity); - /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ - assertEquals(0, floatVector.getValueCapacity()); - - /* allocate 64 bytes (16 * 4) */ - floatVector.allocateNew(); - /* underlying buffer should be able to store 16 values */ - assertTrue(floatVector.getValueCapacity() >= initialCapacity); - initialCapacity = floatVector.getValueCapacity(); - - floatVector.zeroVector(); - - /* populate the floatVector */ - floatVector.set(0, 1.5f); - floatVector.set(2, 2.5f); - floatVector.set(4, 3.3f); - floatVector.set(6, 4.8f); - floatVector.set(8, 5.6f); - floatVector.set(10, 6.6f); - floatVector.set(12, 7.8f); - floatVector.set(14, 8.5f); - - try { - floatVector.set(initialCapacity, 9.5f); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* check vector contents */ - assertEquals(1.5f, floatVector.get(0), 0); - assertEquals(2.5f, floatVector.get(2), 0); - assertEquals(3.3f, floatVector.get(4), 0); - assertEquals(4.8f, floatVector.get(6), 0); - assertEquals(5.6f, floatVector.get(8), 0); - assertEquals(6.6f, floatVector.get(10), 0); - assertEquals(7.8f, floatVector.get(12), 0); - assertEquals(8.5f, floatVector.get(14), 0); - - try { - floatVector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* this should trigger a realloc() */ - floatVector.setSafe(initialCapacity, 9.5f); - - /* underlying buffer should now be able to store double the number of values */ - assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); - - /* vector data should still be intact after realloc */ - assertEquals(1.5f, floatVector.get(0), 0); - assertEquals(2.5f, floatVector.get(2), 0); - assertEquals(3.3f, floatVector.get(4), 0); - assertEquals(4.8f, floatVector.get(6), 0); - assertEquals(5.6f, floatVector.get(8), 0); - assertEquals(6.6f, floatVector.get(10), 0); - assertEquals(7.8f, floatVector.get(12), 0); - assertEquals(8.5f, floatVector.get(14), 0); - assertEquals(9.5f, floatVector.get(initialCapacity), 0); - - /* reset the vector */ - int capacityBeforeReset = floatVector.getValueCapacity(); - floatVector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); - - /* vector data should be zeroed out */ - for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); - } - } - } - - @Test /* Float8Vector */ - public void testFixedType4() { - try (final Float8Vector floatVector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { - boolean error = false; - int initialCapacity = 16; - - /* we should not throw exception for these values of capacity */ - floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE - 1); - floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE); - - try { - floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE * 2); - } catch (OversizedAllocationException oe) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - floatVector.setInitialCapacity(initialCapacity); - /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ - assertEquals(0, floatVector.getValueCapacity()); - - /* allocate 128 bytes (16 * 8) */ - floatVector.allocateNew(); - /* underlying buffer should be able to store 16 values */ - assertTrue(floatVector.getValueCapacity() >= initialCapacity); - initialCapacity = floatVector.getValueCapacity(); - - /* populate the vector */ - floatVector.set(0, 1.55); - floatVector.set(2, 2.53); - floatVector.set(4, 3.36); - floatVector.set(6, 4.82); - floatVector.set(8, 5.67); - floatVector.set(10, 6.67); - floatVector.set(12, 7.87); - floatVector.set(14, 8.56); - - try { - floatVector.set(initialCapacity, 9.53); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* check floatVector contents */ - assertEquals(1.55, floatVector.get(0), 0); - assertEquals(2.53, floatVector.get(2), 0); - assertEquals(3.36, floatVector.get(4), 0); - assertEquals(4.82, floatVector.get(6), 0); - assertEquals(5.67, floatVector.get(8), 0); - assertEquals(6.67, floatVector.get(10), 0); - assertEquals(7.87, floatVector.get(12), 0); - assertEquals(8.56, floatVector.get(14), 0); - - try { - floatVector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* this should trigger a realloc() */ - floatVector.setSafe(initialCapacity, 9.53); - - /* underlying buffer should now be able to store double the number of values */ - assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); - - /* vector data should still be intact after realloc */ - assertEquals(1.55, floatVector.get(0), 0); - assertEquals(2.53, floatVector.get(2), 0); - assertEquals(3.36, floatVector.get(4), 0); - assertEquals(4.82, floatVector.get(6), 0); - assertEquals(5.67, floatVector.get(8), 0); - assertEquals(6.67, floatVector.get(10), 0); - assertEquals(7.87, floatVector.get(12), 0); - assertEquals(8.56, floatVector.get(14), 0); - assertEquals(9.53, floatVector.get(initialCapacity), 0); - - /* reset the vector */ - int capacityBeforeReset = floatVector.getValueCapacity(); - floatVector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); - - /* vector data should be zeroed out */ - for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); - } - } - } - - @Test /* UInt4Vector */ - public void testNullableFixedType1() { - - // Create a new value vector for 1024 integers. - try (final UInt4Vector vector = - newVector( - UInt4Vector.class, EMPTY_SCHEMA_PATH, new ArrowType.Int(32, false), allocator); ) { - boolean error = false; - int initialCapacity = 1024; - - vector.setInitialCapacity(initialCapacity); - /* no memory allocation has happened yet */ - assertEquals(0, vector.getValueCapacity()); - - vector.allocateNew(); - assertTrue(vector.getValueCapacity() >= initialCapacity); - initialCapacity = vector.getValueCapacity(); - - // Put and set a few values - vector.set(0, 100); - vector.set(1, 101); - vector.set(100, 102); - vector.set(initialCapacity - 2, 103); - vector.set(initialCapacity - 1, 104); - - /* check vector contents */ - assertEquals(100, vector.get(0)); - assertEquals(101, vector.get(1)); - assertEquals(102, vector.get(100)); - assertEquals(103, vector.get(initialCapacity - 2)); - assertEquals(104, vector.get(initialCapacity - 1)); - - /* check unset bits/null values */ - for (int i = 2, j = 101; i <= 99 || j <= initialCapacity - 3; i++, j++) { - if (i <= 99) { - assertTrue(vector.isNull(i)); - } - if (j <= initialCapacity - 3) { - assertTrue(vector.isNull(j)); - } - } - - try { - vector.set(initialCapacity, 10000); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - try { - vector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* should trigger a realloc of the underlying bitvector and valuevector */ - vector.setSafe(initialCapacity, 10000); - - /* check new capacity */ - assertTrue(vector.getValueCapacity() >= initialCapacity * 2); - - /* vector contents should still be intact after realloc */ - assertEquals(100, vector.get(0)); - assertEquals(101, vector.get(1)); - assertEquals(102, vector.get(100)); - assertEquals(103, vector.get(initialCapacity - 2)); - assertEquals(104, vector.get(initialCapacity - 1)); - assertEquals(10000, vector.get(initialCapacity)); - - /* check unset bits/null values */ - for (int i = 2, j = 101; i < 99 || j < initialCapacity - 3; i++, j++) { - if (i <= 99) { - assertTrue(vector.isNull(i)); - } - if (j <= initialCapacity - 3) { - assertTrue(vector.isNull(j)); - } - } - - /* reset the vector */ - int capacityBeforeReset = vector.getValueCapacity(); - vector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeReset, vector.getValueCapacity()); - - /* vector data should be zeroed out */ - for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); - } - } - } - - @Test /* Float4Vector */ - public void testNullableFixedType2() { - // Create a new value vector for 1024 integers - try (final Float4Vector vector = - newVector(Float4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator); ) { - boolean error = false; - int initialCapacity = 16; - - vector.setInitialCapacity(initialCapacity); - /* no memory allocation has happened yet */ - assertEquals(0, vector.getValueCapacity()); - - vector.allocateNew(); - assertTrue(vector.getValueCapacity() >= initialCapacity); - initialCapacity = vector.getValueCapacity(); - - /* populate the vector */ - vector.set(0, 100.5f); - vector.set(2, 201.5f); - vector.set(4, 300.3f); - vector.set(6, 423.8f); - vector.set(8, 555.6f); - vector.set(10, 66.6f); - vector.set(12, 78.8f); - vector.set(14, 89.5f); - - try { - vector.set(initialCapacity, 90.5f); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* check vector contents */ - assertEquals(100.5f, vector.get(0), 0); - assertTrue(vector.isNull(1)); - assertEquals(201.5f, vector.get(2), 0); - assertTrue(vector.isNull(3)); - assertEquals(300.3f, vector.get(4), 0); - assertTrue(vector.isNull(5)); - assertEquals(423.8f, vector.get(6), 0); - assertTrue(vector.isNull(7)); - assertEquals(555.6f, vector.get(8), 0); - assertTrue(vector.isNull(9)); - assertEquals(66.6f, vector.get(10), 0); - assertTrue(vector.isNull(11)); - assertEquals(78.8f, vector.get(12), 0); - assertTrue(vector.isNull(13)); - assertEquals(89.5f, vector.get(14), 0); - assertTrue(vector.isNull(15)); - - try { - vector.get(initialCapacity); - } catch (IndexOutOfBoundsException ie) { - error = true; - } finally { - assertTrue(error); - error = false; - } - - /* this should trigger a realloc() */ - vector.setSafe(initialCapacity, 90.5f); - - /* underlying buffer should now be able to store double the number of values */ - assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - - /* vector data should still be intact after realloc */ - assertEquals(100.5f, vector.get(0), 0); - assertTrue(vector.isNull(1)); - assertEquals(201.5f, vector.get(2), 0); - assertTrue(vector.isNull(3)); - assertEquals(300.3f, vector.get(4), 0); - assertTrue(vector.isNull(5)); - assertEquals(423.8f, vector.get(6), 0); - assertTrue(vector.isNull(7)); - assertEquals(555.6f, vector.get(8), 0); - assertTrue(vector.isNull(9)); - assertEquals(66.6f, vector.get(10), 0); - assertTrue(vector.isNull(11)); - assertEquals(78.8f, vector.get(12), 0); - assertTrue(vector.isNull(13)); - assertEquals(89.5f, vector.get(14), 0); - assertTrue(vector.isNull(15)); - - /* reset the vector */ - int capacityBeforeReset = vector.getValueCapacity(); - vector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeReset, vector.getValueCapacity()); - - /* vector data should be zeroed out */ - for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); - } - } - } - - @Test /* IntVector */ - public void testNullableFixedType3() { - // Create a new value vector for 1024 integers - try (final IntVector vector = - newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, allocator)) { - int initialCapacity = 1024; - - /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ - assertEquals(0, vector.getValueCapacity()); - /* allocate space for 4KB data (1024 * 4) */ - vector.allocateNew(initialCapacity); - /* underlying buffer should be able to store 1024 values */ - assertTrue(vector.getValueCapacity() >= initialCapacity); - initialCapacity = vector.getValueCapacity(); - - vector.set(0, 1); - vector.set(1, 2); - vector.set(100, 3); - vector.set(1022, 4); - vector.set(1023, 5); - - /* check vector contents */ - int j = 1; - for (int i = 0; i <= 1023; i++) { - if ((i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) { - assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); - } else { - assertFalse(vector.isNull(i), "null data not expected at index: " + i); - assertEquals(j, vector.get(i), "unexpected value at index: " + i); - j++; - } - } - - vector.setValueCount(1024); - - List buffers = vector.getFieldBuffers(); - - assertEquals(2, buffers.size()); - - ArrowBuf validityVectorBuf = buffers.get(0); - - /* bitvector tracks 1024 integers --> 1024 bits --> 128 bytes */ - assertTrue(validityVectorBuf.readableBytes() >= 128); - assertEquals(3, validityVectorBuf.getByte(0)); // 1st and second bit defined - for (int i = 1; i < 12; i++) { - assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined until 100 - } - assertEquals(16, validityVectorBuf.getByte(12)); // 100th bit is defined (12 * 8 + 4) - for (int i = 13; i < 127; i++) { - assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined between 100th and 1022nd - } - assertEquals(-64, validityVectorBuf.getByte(127)); // 1022nd and 1023rd bit defined - - /* this should trigger a realloc() */ - vector.setSafe(initialCapacity, 6); - - /* underlying buffer should now be able to store double the number of values */ - assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - - /* vector data should still be intact after realloc */ - j = 1; - for (int i = 0; i < (initialCapacity * 2); i++) { - if ((i > 1023 && i != initialCapacity) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) { - assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); - } else { - assertFalse(vector.isNull(i), "null data not expected at index: " + i); - assertEquals(j, vector.get(i), "unexpected value at index: " + i); - j++; - } - } - - /* reset the vector */ - int capacityBeforeReset = vector.getValueCapacity(); - vector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(capacityBeforeReset, vector.getValueCapacity()); - - /* vector data should have been zeroed out */ - for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); - } - - vector.allocateNew(initialCapacity * 4); - // vector has been erased - for (int i = 0; i < initialCapacity * 4; i++) { - assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); - } - } - } - - @Test /* IntVector */ - public void testNullableFixedType4() { - try (final IntVector vector = - newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, allocator)) { - - /* no memory allocation has happened yet */ - assertEquals(0, vector.getValueCapacity()); - - vector.allocateNew(); - int valueCapacity = vector.getValueCapacity(); - assertEquals(vector.INITIAL_VALUE_ALLOCATION, valueCapacity); - - int baseValue = 20000; - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - vector.set(i, baseValue + i); - } - } - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertFalse(vector.isNull(i), "unexpected null value at index: " + i); - assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); - } - } - - vector.setSafe(valueCapacity, 20000000); - assertTrue(vector.getValueCapacity() >= valueCapacity * 2); - - for (int i = 0; i < vector.getValueCapacity(); i++) { - if (i == valueCapacity) { - assertFalse(vector.isNull(i), "unexpected null value at index: " + i); - assertEquals(20000000, vector.get(i), "unexpected value at index: " + i); - } else if (i < valueCapacity) { - if ((i & 1) == 1) { - assertFalse(vector.isNull(i), "unexpected null value at index: " + i); - assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); - } - } else { - assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); - } - } - - vector.zeroVector(); - - for (int i = 0; i < vector.getValueCapacity(); i += 2) { - vector.set(i, baseValue + i); - } - - for (int i = 0; i < vector.getValueCapacity(); i++) { - if (i % 2 == 0) { - assertFalse(vector.isNull(i), "unexpected null value at index: " + i); - assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); - } - } - - int valueCapacityBeforeRealloc = vector.getValueCapacity(); - vector.setSafe(valueCapacityBeforeRealloc + 1000, 400000000); - assertTrue(vector.getValueCapacity() >= valueCapacity * 4); - - for (int i = 0; i < vector.getValueCapacity(); i++) { - if (i == (valueCapacityBeforeRealloc + 1000)) { - assertFalse(vector.isNull(i), "unexpected null value at index: " + i); - assertEquals(400000000, vector.get(i), "unexpected value at index: " + i); - } else if (i < valueCapacityBeforeRealloc && (i % 2) == 0) { - assertFalse(vector.isNull(i), "unexpected null value at index: " + i); - assertEquals(baseValue + i, vector.get(i), "unexpected value at index: " + i); - } else { - assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); - } - } - - /* reset the vector */ - int valueCapacityBeforeReset = vector.getValueCapacity(); - vector.reset(); - - /* capacity shouldn't change after reset */ - assertEquals(valueCapacityBeforeReset, vector.getValueCapacity()); - - /* vector data should be zeroed out */ - for (int i = 0; i < valueCapacityBeforeReset; i++) { - assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); - } - } - } - - /* - * Tests for Variable Width Vectors - * - * Covered types as of now - * - * -- VarCharVector - * -- VarBinaryVector - * - * TODO: - * - * -- VarCharVector - * -- VarBinaryVector - */ - - /** - * ARROW-7831: this checks that a slice taken off a buffer is still readable after that buffer's - * allocator is closed. - */ - @Test /* VarCharVector */ - public void testSplitAndTransfer1() { - try (final VarCharVector targetVector = newVarCharVector("split-target", allocator)) { - try (final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR1); - sourceVector.set(1, STR2); - sourceVector.set(2, STR3); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - // split and transfer with slice starting at the beginning: this should not allocate - // anything new - sourceVector.splitAndTransferTo(0, 2, targetVector); - assertEquals(allocatedMem, allocator.getAllocatedMemory()); - // The validity and offset buffers are sliced from a same buffer.See - // BaseFixedWidthVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - // The refcnt of the - // offset buffer is increased as well for the same reason. This amounts to a total of 2. - assertEquals(validityRefCnt + 2, sourceVector.getValidityBuffer().refCnt()); - assertEquals(offsetRefCnt + 2, sourceVector.getOffsetBuffer().refCnt()); - assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt()); - } - assertArrayEquals(STR1, targetVector.get(0)); - assertArrayEquals(STR2, targetVector.get(1)); - } - } - - /** - * ARROW-7831: this checks that a vector that got sliced is still readable after the slice's - * allocator got closed. - */ - @Test /* VarCharVector */ - public void testSplitAndTransfer2() { - try (final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - try (final VarCharVector targetVector = newVarCharVector("split-target", allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR1); - sourceVector.set(1, STR2); - sourceVector.set(2, STR3); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - // split and transfer with slice starting at the beginning: this should not allocate - // anything new - sourceVector.splitAndTransferTo(0, 2, targetVector); - assertEquals(allocatedMem, allocator.getAllocatedMemory()); - // The validity and offset buffers are sliced from a same buffer.See - // BaseFixedWidthVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - // The refcnt of the - // offset buffer is increased as well for the same reason. This amounts to a total of 2. - assertEquals(validityRefCnt + 2, sourceVector.getValidityBuffer().refCnt()); - assertEquals(offsetRefCnt + 2, sourceVector.getOffsetBuffer().refCnt()); - assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt()); - } - assertArrayEquals(STR1, sourceVector.get(0)); - assertArrayEquals(STR2, sourceVector.get(1)); - assertArrayEquals(STR3, sourceVector.get(2)); - } - } - - /** - * ARROW-7831: this checks an offset splitting optimization, in the case where all the values up - * to the start of the slice are null/empty, which avoids allocation for the offset buffer. - */ - @Test /* VarCharVector */ - public void testSplitAndTransfer3() { - try (final VarCharVector targetVector = newVarCharVector("split-target", allocator); - final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, new byte[0]); - sourceVector.setNull(1); - sourceVector.set(2, STR1); - sourceVector.set(3, STR2); - sourceVector.set(4, STR3); - sourceVector.setValueCount(5); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(2, 2, targetVector); - // because the offset starts at 0 since the first 2 values are empty/null, the allocation only - // consists in - // the size needed for the validity buffer - final long validitySize = - DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( - BaseValueVector.getValidityBufferSizeFromCount(2)); - assertEquals(allocatedMem + validitySize, allocator.getAllocatedMemory()); - // The validity and offset buffers are sliced from a same buffer.See - // BaseFixedWidthVector#allocateBytes. - // Since values up to the startIndex are empty/null, the offset buffer doesn't need to be - // reallocated and - // therefore its refcnt is increased by 1. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - assertEquals(offsetRefCnt + 1, sourceVector.getOffsetBuffer().refCnt()); - assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR1, targetVector.get(0)); - assertArrayEquals(STR2, targetVector.get(1)); - } - } - - /** - * ARROW-7831: ensures that data is transferred from one allocator to another in case of 0-index - * start special cases. - */ - @Test /* VarCharVector */ - public void testSplitAndTransfer4() { - try (final BufferAllocator targetAllocator = - allocator.newChildAllocator("target-alloc", 256, 256); - final VarCharVector targetVector = newVarCharVector("split-target", targetAllocator)) { - try (final BufferAllocator sourceAllocator = - allocator.newChildAllocator("source-alloc", 256, 256); - final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - sourceVector.allocateNew(50, 3); - - sourceVector.set(0, STR1); - sourceVector.set(1, STR2); - sourceVector.set(2, STR3); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - // split and transfer with slice starting at the beginning: this should not allocate - // anything new - sourceVector.splitAndTransferTo(0, 2, targetVector); - assertEquals(allocatedMem, allocator.getAllocatedMemory()); - // Unlike testSplitAndTransfer1 where the buffers originated from the same allocator, the - // refcnts of each - // buffers for this test should be the same as what the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - assertEquals(offsetRefCnt, sourceVector.getOffsetBuffer().refCnt()); - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - } - assertArrayEquals(STR1, targetVector.get(0)); - assertArrayEquals(STR2, targetVector.get(1)); - } - } - - @Test /* VarCharVector */ - public void testNullableVarType1() { - - // Create a new value vector for 1024 integers. - try (final VarCharVector vector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024 * 10, 1024); - - vector.set(0, STR1); - vector.set(1, STR2); - vector.set(2, STR3); - vector.setSafe(3, STR3, 1, STR3.length - 1); - vector.setSafe(4, STR3, 2, STR3.length - 2); - ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3); - vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1); - vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2); - - // Set with convenience function - Text txt = new Text("foo"); - vector.setSafe(7, txt); - - // Check the sample strings. - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3)); - assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4)); - assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5)); - assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6)); - - // Check returning a Text object - assertEquals(txt, vector.getObject(7)); - - // Ensure null value throws. - assertNull(vector.get(8)); - } - } - - @Test - public void testGetTextRepeatedly() { - try (final VarCharVector vector = new VarCharVector("myvector", allocator)) { - - ValueVectorDataPopulator.setVector(vector, STR1, STR2); - vector.setValueCount(2); - - /* check the vector output */ - Text text = new Text(); - vector.read(0, text); - assertArrayEquals(STR1, text.getBytes()); - vector.read(1, text); - assertArrayEquals(STR2, text.getBytes()); - } - } - - @Test /* VarBinaryVector */ - public void testNullableVarType2() { - - // Create a new value vector for 1024 integers. - try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024 * 10, 1024); - - vector.set(0, STR1); - vector.set(1, STR2); - vector.set(2, STR3); - vector.setSafe(3, STR3, 1, STR3.length - 1); - vector.setSafe(4, STR3, 2, STR3.length - 2); - ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3); - vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1); - vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2); - - // Check the sample strings. - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3)); - assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4)); - assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5)); - assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6)); - - // Ensure null value throws. - assertNull(vector.get(7)); - } - } - - @Test - public void testReallocateCheckSuccess() { - assertThrows( - OversizedAllocationException.class, - () -> { - // Create a new value vector for 1024 integers. - try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024 * 10, 1024); - - vector.set(0, STR1); - // Check the sample strings. - assertArrayEquals(STR1, vector.get(0)); - - // update the index offset to a larger one - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - offsetBuf.setInt(VarBinaryVector.OFFSET_WIDTH, Integer.MAX_VALUE - 5); - - vector.setValueLengthSafe(1, 6); - } - }); - } - - @Test - public void testGetBytesRepeatedly() { - try (VarBinaryVector vector = new VarBinaryVector("", allocator)) { - vector.allocateNew(5, 1); - - final String str = "hello world"; - final String str2 = "foo"; - vector.setSafe(0, str.getBytes(StandardCharsets.UTF_8)); - vector.setSafe(1, str2.getBytes(StandardCharsets.UTF_8)); - - // verify results - ReusableByteArray reusableByteArray = new ReusableByteArray(); - vector.read(0, reusableByteArray); - assertArrayEquals( - str.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange( - reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); - byte[] oldBuffer = reusableByteArray.getBuffer(); - - vector.read(1, reusableByteArray); - assertArrayEquals( - str2.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange( - reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); - - // There should not have been any reallocation since the newer value is smaller in length. - assertSame(oldBuffer, reusableByteArray.getBuffer()); - } - } - - /* - * generic tests - * - * -- lastSet() and setValueCount() - * -- fillEmpties() - * -- VectorLoader and VectorUnloader - * -- some realloc tests - * - * TODO: - * - * The realloc() related tests below should be moved up and we need to - * add realloc related tests (edge cases) for more vector types. - */ - - @Test /* Float8Vector */ - public void testReallocAfterVectorTransfer1() { - try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { - int initialCapacity = 4096; - - /* use the default capacity; 4096*8 => 32KB */ - vector.setInitialCapacity(initialCapacity); - vector.allocateNew(); - - assertTrue(vector.getValueCapacity() >= initialCapacity); - initialCapacity = vector.getValueCapacity(); - - double baseValue = 100.375; - - for (int i = 0; i < initialCapacity; i++) { - vector.setSafe(i, baseValue + (double) i); - } - - /* the above setSafe calls should not have triggered a realloc as - * we are within the capacity. check the vector contents - */ - assertEquals(initialCapacity, vector.getValueCapacity()); - - for (int i = 0; i < initialCapacity; i++) { - double value = vector.get(i); - assertEquals(baseValue + (double) i, value, 0); - } - - /* this should trigger a realloc */ - vector.setSafe(initialCapacity, baseValue + (double) initialCapacity); - assertTrue(vector.getValueCapacity() >= initialCapacity * 2); - int capacityAfterRealloc1 = vector.getValueCapacity(); - - for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) { - vector.setSafe(i, baseValue + (double) i); - } - - for (int i = 0; i < capacityAfterRealloc1; i++) { - double value = vector.get(i); - assertEquals(baseValue + (double) i, value, 0); - } - - /* this should trigger a realloc */ - vector.setSafe(capacityAfterRealloc1, baseValue + (double) capacityAfterRealloc1); - assertTrue(vector.getValueCapacity() >= initialCapacity * 4); - int capacityAfterRealloc2 = vector.getValueCapacity(); - - for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) { - vector.setSafe(i, baseValue + (double) i); - } - - for (int i = 0; i < capacityAfterRealloc2; i++) { - double value = vector.get(i); - assertEquals(baseValue + (double) i, value, 0); - } - - /* at this point we are working with a 128KB buffer data for this - * vector. now let's transfer this vector - */ - - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - - Float8Vector toVector = (Float8Vector) transferPair.getTo(); - - /* now let's realloc the toVector */ - toVector.reAlloc(); - assertTrue(toVector.getValueCapacity() >= initialCapacity * 8); - - for (int i = 0; i < toVector.getValueCapacity(); i++) { - if (i < capacityAfterRealloc2) { - assertEquals(baseValue + (double) i, toVector.get(i), 0); - } else { - assertTrue(toVector.isNull(i)); - } - } - - toVector.close(); - } - } - - @Test /* Float8Vector */ - public void testReallocAfterVectorTransfer2() { - try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) { - int initialCapacity = 4096; - - vector.allocateNew(initialCapacity); - assertTrue(vector.getValueCapacity() >= initialCapacity); - initialCapacity = vector.getValueCapacity(); - - double baseValue = 100.375; - - for (int i = 0; i < initialCapacity; i++) { - vector.setSafe(i, baseValue + (double) i); - } - - /* the above setSafe calls should not have triggered a realloc as - * we are within the capacity. check the vector contents - */ - assertEquals(initialCapacity, vector.getValueCapacity()); - - for (int i = 0; i < initialCapacity; i++) { - double value = vector.get(i); - assertEquals(baseValue + (double) i, value, 0); - } - - /* this should trigger a realloc */ - vector.setSafe(initialCapacity, baseValue + (double) initialCapacity); - assertTrue(vector.getValueCapacity() >= initialCapacity * 2); - int capacityAfterRealloc1 = vector.getValueCapacity(); - - for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) { - vector.setSafe(i, baseValue + (double) i); - } - - for (int i = 0; i < capacityAfterRealloc1; i++) { - double value = vector.get(i); - assertEquals(baseValue + (double) i, value, 0); - } - - /* this should trigger a realloc */ - vector.setSafe(capacityAfterRealloc1, baseValue + (double) capacityAfterRealloc1); - assertTrue(vector.getValueCapacity() >= initialCapacity * 4); - int capacityAfterRealloc2 = vector.getValueCapacity(); - - for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) { - vector.setSafe(i, baseValue + (double) i); - } - - for (int i = 0; i < capacityAfterRealloc2; i++) { - double value = vector.get(i); - assertEquals(baseValue + (double) i, value, 0); - } - - /* at this point we are working with a 128KB buffer data for this - * vector. now let's transfer this vector - */ - - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - - Float8Vector toVector = (Float8Vector) transferPair.getTo(); - - /* check toVector contents before realloc */ - for (int i = 0; i < toVector.getValueCapacity(); i++) { - assertFalse(toVector.isNull(i), "unexpected null value at index: " + i); - double value = toVector.get(i); - assertEquals(baseValue + (double) i, value, 0, "unexpected value at index: " + i); - } - - /* now let's realloc the toVector and check contents again */ - toVector.reAlloc(); - assertTrue(toVector.getValueCapacity() >= initialCapacity * 8); - - for (int i = 0; i < toVector.getValueCapacity(); i++) { - if (i < capacityAfterRealloc2) { - assertFalse(toVector.isNull(i), "unexpected null value at index: " + i); - double value = toVector.get(i); - assertEquals(baseValue + (double) i, value, 0, "unexpected value at index: " + i); - } else { - assertTrue(toVector.isNull(i), "unexpected non-null value at index: " + i); - } - } - - toVector.close(); - } - } - - @Test /* VarCharVector */ - public void testReallocAfterVectorTransfer3() { - try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - /* 4096 values with 10 byte per record */ - vector.allocateNew(4096 * 10, 4096); - int valueCapacity = vector.getValueCapacity(); - assertTrue(valueCapacity >= 4096); - - /* populate the vector */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } - } - - /* Check the vector output */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } - } - - /* trigger first realloc */ - vector.setSafe(valueCapacity, STR2, 0, STR2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) { - vector.reallocDataBuffer(); - } - - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } - } - - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } - } - - /* trigger second realloc */ - vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) { - vector.reallocDataBuffer(); - } - - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } - } - - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } - } - - /* we are potentially working with 4x the size of vector buffer - * that we initially started with. Now let's transfer the vector. - */ - - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - VarCharVector toVector = (VarCharVector) transferPair.getTo(); - valueCapacity = toVector.getValueCapacity(); - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, toVector.get(i)); - } else { - assertArrayEquals(STR2, toVector.get(i)); - } - } - - toVector.close(); - } - } - - @Test /* IntVector */ - public void testReallocAfterVectorTransfer4() { - try (final IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - - /* 4096 values */ - vector.allocateNew(4096); - int valueCapacity = vector.getValueCapacity(); - assertTrue(valueCapacity >= 4096); - - /* populate the vector */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 0) { - vector.set(i, 1000 + i); - } - } - - /* Check the vector output */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 0) { - assertEquals(1000 + i, vector.get(i)); - } else { - assertTrue(vector.isNull(i)); - } - } - - /* trigger first realloc */ - vector.setSafe(valueCapacity, 10000000); - assertTrue(vector.getValueCapacity() >= valueCapacity * 2); - - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 0) { - vector.set(i, 1000 + i); - } - } - - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 0) { - assertEquals(1000 + i, vector.get(i)); - } else { - assertTrue(vector.isNull(i)); - } - } - - /* trigger second realloc */ - vector.setSafe(valueCapacity, 10000000); - assertTrue(vector.getValueCapacity() >= valueCapacity * 2); - - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 0) { - vector.set(i, 1000 + i); - } - } - - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 0) { - assertEquals(1000 + i, vector.get(i)); - } else { - assertTrue(vector.isNull(i)); - } - } - - /* we are potentially working with 4x the size of vector buffer - * that we initially started with. Now let's transfer the vector. - */ - - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - IntVector toVector = (IntVector) transferPair.getTo(); - /* value capacity of source and target vectors should be same after - * the transfer. - */ - assertEquals(valueCapacity, toVector.getValueCapacity()); - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 0) { - assertEquals(1000 + i, toVector.get(i)); - } else { - assertTrue(toVector.isNull(i)); - } - } - - toVector.close(); - } - } - - @Test - public void testReAllocFixedWidthVector() { - // Create a new value vector for 1024 integers - try (final Float4Vector vector = - newVector(Float4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator)) { - vector.allocateNew(1024); - - assertTrue(vector.getValueCapacity() >= 1024); - int initialCapacity = vector.getValueCapacity(); - - // Put values in indexes that fall within the initial allocation - vector.setSafe(0, 100.1f); - vector.setSafe(100, 102.3f); - vector.setSafe(1023, 104.5f); - - // Now try to put values in space that falls beyond the initial allocation - vector.setSafe(2000, 105.5f); - - // Check valueCapacity is more than initial allocation - assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - - assertEquals(100.1f, vector.get(0), 0); - assertEquals(102.3f, vector.get(100), 0); - assertEquals(104.5f, vector.get(1023), 0); - assertEquals(105.5f, vector.get(2000), 0); - - // Set the valueCount to be more than valueCapacity of current allocation. This is possible - // for ValueVectors - // as we don't call setSafe for null values, but we do call setValueCount when all values are - // inserted into the - // vector - vector.setValueCount(vector.getValueCapacity() + 200); - } - } - - @Test - public void testReAllocVariableWidthVector() { - try (final VarCharVector vector = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { - vector.setInitialCapacity(4095); - vector.allocateNew(); - - int initialCapacity = vector.getValueCapacity(); - assertTrue(initialCapacity >= 4095); - - /* Put values in indexes that fall within the initial allocation */ - vector.setSafe(0, STR1, 0, STR1.length); - vector.setSafe(initialCapacity - 1, STR2, 0, STR2.length); - - /* the above set calls should NOT have triggered a realloc */ - assertEquals(initialCapacity, vector.getValueCapacity()); - - /* Now try to put values in space that falls beyond the initial allocation */ - vector.setSafe(initialCapacity + 200, STR3, 0, STR3.length); - - /* Check valueCapacity is more than initial allocation */ - assertTrue(initialCapacity * 2 <= vector.getValueCapacity()); - - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(initialCapacity - 1)); - assertArrayEquals(STR3, vector.get(initialCapacity + 200)); - - // Set the valueCount to be more than valueCapacity of current allocation. This is possible - // for ValueVectors - // as we don't call setSafe for null values, but we do call setValueCount when the current - // batch is processed. - vector.setValueCount(vector.getValueCapacity() + 200); - } - } - - @Test - public void testFillEmptiesNotOverfill() { - try (final VarCharVector vector = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { - vector.setInitialCapacity(4095); - vector.allocateNew(); - - int initialCapacity = vector.getValueCapacity(); - assertTrue(initialCapacity >= 4095); - - vector.setSafe(4094, "hello".getBytes(StandardCharsets.UTF_8), 0, 5); - /* the above set method should NOT have triggered a realloc */ - assertEquals(initialCapacity, vector.getValueCapacity()); - - long bufSizeBefore = vector.getFieldBuffers().get(1).capacity(); - vector.setValueCount(initialCapacity); - assertEquals(bufSizeBefore, vector.getFieldBuffers().get(1).capacity()); - assertEquals(initialCapacity, vector.getValueCapacity()); - } - } - - @Test - public void testSetSafeWithArrowBufNoExcessAllocs() { - final int numValues = BaseFixedWidthVector.INITIAL_VALUE_ALLOCATION * 2; - final byte[] valueBytes = "hello world".getBytes(StandardCharsets.UTF_8); - final int valueBytesLength = valueBytes.length; - final int isSet = 1; - - try (final VarCharVector fromVector = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); - final VarCharVector toVector = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { - /* - * Populate the from vector with 'numValues' with byte-arrays, each of size 'valueBytesLength'. - */ - fromVector.setInitialCapacity(numValues); - fromVector.allocateNew(); - for (int i = 0; i < numValues; ++i) { - fromVector.setSafe(i, valueBytes, 0 /*start*/, valueBytesLength); - } - fromVector.setValueCount(numValues); - ArrowBuf fromDataBuffer = fromVector.getDataBuffer(); - assertTrue(numValues * valueBytesLength <= fromDataBuffer.capacity()); - - /* - * Copy the entries one-by-one from 'fromVector' to 'toVector', but use the setSafe with - * ArrowBuf API (instead of setSafe with byte-array). - */ - toVector.setInitialCapacity(numValues); - toVector.allocateNew(); - for (int i = 0; i < numValues; i++) { - int start = fromVector.getStartOffset(i); - int end = fromVector.getStartOffset(i + 1); - toVector.setSafe(i, isSet, start, end, fromDataBuffer); - } - - /* - * Since the 'fromVector' and 'toVector' have the same initial capacity, and were populated - * with the same varchar elements, the allocations and hence, the final capacity should be - * the same. - */ - assertEquals(fromDataBuffer.capacity(), toVector.getDataBuffer().capacity()); - } - } - - @Test - public void testCopyFromWithNulls() { - try (final VarCharVector vector = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); - final VarCharVector vector2 = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { - - vector.setInitialCapacity(4095); - vector.allocateNew(); - int capacity = vector.getValueCapacity(); - assertTrue(capacity >= 4095); - - for (int i = 0; i < capacity; i++) { - if (i % 3 == 0) { - continue; - } - byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); - vector.setSafe(i, b, 0, b.length); - } - - /* NO reAlloc() should have happened in setSafe() */ - assertEquals(capacity, vector.getValueCapacity()); - - vector.setValueCount(capacity); - - for (int i = 0; i < capacity; i++) { - if (i % 3 == 0) { - assertNull(vector.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - - vector2.setInitialCapacity(4095); - vector2.allocateNew(); - int capacity2 = vector2.getValueCapacity(); - assertEquals(capacity2, capacity); - - for (int i = 0; i < capacity; i++) { - vector2.copyFromSafe(i, i, vector); - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector2.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - - /* NO reAlloc() should have happened in copyFrom */ - assertEquals(capacity, vector2.getValueCapacity()); - - vector2.setValueCount(capacity); - - for (int i = 0; i < capacity; i++) { - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector2.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - } - } - - @Test - public void testCopyFromWithNulls1() { - try (final VarCharVector vector = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator); - final VarCharVector vector2 = - newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) { - - vector.setInitialCapacity(4095); - vector.allocateNew(); - int capacity = vector.getValueCapacity(); - assertTrue(capacity >= 4095); - - for (int i = 0; i < capacity; i++) { - if (i % 3 == 0) { - continue; - } - byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); - vector.setSafe(i, b, 0, b.length); - } - - /* NO reAlloc() should have happened in setSafe() */ - assertEquals(capacity, vector.getValueCapacity()); - - vector.setValueCount(capacity); - - for (int i = 0; i < capacity; i++) { - if (i % 3 == 0) { - assertNull(vector.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - - /* set lesser initial capacity than actually needed - * to trigger reallocs in copyFromSafe() - */ - vector2.allocateNew(1024 * 10, 1024); - - int capacity2 = vector2.getValueCapacity(); - assertTrue(capacity2 >= 1024); - assertTrue(capacity2 <= capacity); - - for (int i = 0; i < capacity; i++) { - vector2.copyFromSafe(i, i, vector); - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector2.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - - /* 2 reAllocs should have happened in copyFromSafe() */ - assertEquals(capacity, vector2.getValueCapacity()); - - vector2.setValueCount(capacity); - - for (int i = 0; i < capacity; i++) { - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else { - assertEquals( - Integer.toString(i), - vector2.getObject(i).toString(), - "unexpected value at index: " + i); - } - } - } - } - - @Test - public void testSetLastSetUsage() { - try (final VarCharVector vector = new VarCharVector("myvector", allocator)) { - vector.allocateNew(1024 * 10, 1024); - - setBytes(0, STR1, vector); - setBytes(1, STR2, vector); - setBytes(2, STR3, vector); - setBytes(3, STR4, vector); - setBytes(4, STR5, vector); - setBytes(5, STR6, vector); - - /* Check current lastSet */ - assertEquals(-1, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - - /* - * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt - * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays. - * Run the test by commenting out next line and we should see incorrect vector output. - */ - vector.setLastSet(5); - vector.setValueCount(20); - - /* Check current lastSet */ - assertEquals(19, vector.getLastSet()); - - /* Check the vector output again */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - assertEquals(0, vector.getValueLength(10)); - assertEquals(0, vector.getValueLength(11)); - assertEquals(0, vector.getValueLength(12)); - assertEquals(0, vector.getValueLength(13)); - assertEquals(0, vector.getValueLength(14)); - assertEquals(0, vector.getValueLength(15)); - assertEquals(0, vector.getValueLength(16)); - assertEquals(0, vector.getValueLength(17)); - assertEquals(0, vector.getValueLength(18)); - assertEquals(0, vector.getValueLength(19)); - - /* Check offsets */ - assertEquals(0, vector.offsetBuffer.getInt(0 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(6, vector.offsetBuffer.getInt(1 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(16, vector.offsetBuffer.getInt(2 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(21, vector.offsetBuffer.getInt(3 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(30, vector.offsetBuffer.getInt(4 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(34, vector.offsetBuffer.getInt(5 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(6 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(7 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(8 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(9 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(10 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(11 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(12 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(13 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(14 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(15 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(16 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(17 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(18 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(19 * BaseVariableWidthVector.OFFSET_WIDTH)); - - vector.set(19, STR6); - assertArrayEquals(STR6, vector.get(19)); - assertEquals(40, vector.offsetBuffer.getInt(19 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(46, vector.offsetBuffer.getInt(20 * BaseVariableWidthVector.OFFSET_WIDTH)); - } - } - - @Test - public void testVectorLoadUnload() { - - try (final VarCharVector vector1 = new VarCharVector("myvector", allocator)) { - - setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); - - assertEquals(5, vector1.getLastSet()); - vector1.setValueCount(15); - assertEquals(14, vector1.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector1.get(0)); - assertArrayEquals(STR2, vector1.get(1)); - assertArrayEquals(STR3, vector1.get(2)); - assertArrayEquals(STR4, vector1.get(3)); - assertArrayEquals(STR5, vector1.get(4)); - assertArrayEquals(STR6, vector1.get(5)); - - Field field = vector1.getField(); - String fieldName = field.getName(); - - List fields = new ArrayList<>(); - List fieldVectors = new ArrayList<>(); - - fields.add(field); - fieldVectors.add(vector1); - - Schema schema = new Schema(fields); - - VectorSchemaRoot schemaRoot1 = - new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); - VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); - - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); - VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - - VectorLoader vectorLoader = new VectorLoader(schemaRoot2); - vectorLoader.load(recordBatch); - - VarCharVector vector2 = (VarCharVector) schemaRoot2.getVector(fieldName); - /* - * lastSet would have internally been set by VectorLoader.load() when it invokes - * loadFieldBuffers. - */ - assertEquals(14, vector2.getLastSet()); - vector2.setValueCount(25); - assertEquals(24, vector2.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector2.get(0)); - assertArrayEquals(STR2, vector2.get(1)); - assertArrayEquals(STR3, vector2.get(2)); - assertArrayEquals(STR4, vector2.get(3)); - assertArrayEquals(STR5, vector2.get(4)); - assertArrayEquals(STR6, vector2.get(5)); - } - } - } - - @Test - public void testFillEmptiesUsage() { - try (final VarCharVector vector = new VarCharVector("myvector", allocator)) { - - vector.allocateNew(1024 * 10, 1024); - - setBytes(0, STR1, vector); - setBytes(1, STR2, vector); - setBytes(2, STR3, vector); - setBytes(3, STR4, vector); - setBytes(4, STR5, vector); - setBytes(5, STR6, vector); - - /* Check current lastSet */ - assertEquals(-1, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - - vector.setLastSet(5); - /* fill empty byte arrays from index [6, 9] */ - vector.fillEmpties(10); - - /* Check current lastSet */ - assertEquals(9, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - - setBytes(10, STR1, vector); - setBytes(11, STR2, vector); - - vector.setLastSet(11); - /* fill empty byte arrays from index [12, 14] */ - vector.setValueCount(15); - - /* Check current lastSet */ - assertEquals(14, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - assertArrayEquals(STR1, vector.get(10)); - assertArrayEquals(STR2, vector.get(11)); - assertEquals(0, vector.getValueLength(12)); - assertEquals(0, vector.getValueLength(13)); - assertEquals(0, vector.getValueLength(14)); - - /* Check offsets */ - assertEquals(0, vector.offsetBuffer.getInt(0 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(6, vector.offsetBuffer.getInt(1 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(16, vector.offsetBuffer.getInt(2 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(21, vector.offsetBuffer.getInt(3 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(30, vector.offsetBuffer.getInt(4 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(34, vector.offsetBuffer.getInt(5 * BaseVariableWidthVector.OFFSET_WIDTH)); - - assertEquals(40, vector.offsetBuffer.getInt(6 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(7 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(8 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(9 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(40, vector.offsetBuffer.getInt(10 * BaseVariableWidthVector.OFFSET_WIDTH)); - - assertEquals(46, vector.offsetBuffer.getInt(11 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(56, vector.offsetBuffer.getInt(12 * BaseVariableWidthVector.OFFSET_WIDTH)); - - assertEquals(56, vector.offsetBuffer.getInt(13 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(56, vector.offsetBuffer.getInt(14 * BaseVariableWidthVector.OFFSET_WIDTH)); - assertEquals(56, vector.offsetBuffer.getInt(15 * BaseVariableWidthVector.OFFSET_WIDTH)); - } - } - - @Test /* VarCharVector */ - public void testGetBufferAddress1() { - - try (final VarCharVector vector = new VarCharVector("myvector", allocator)) { - - setVector(vector, STR1, STR2, STR3, STR4, STR5, STR6); - vector.setValueCount(15); - - /* check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertArrayEquals(STR5, vector.get(4)); - assertArrayEquals(STR6, vector.get(5)); - - List buffers = vector.getFieldBuffers(); - long bitAddress = vector.getValidityBufferAddress(); - long offsetAddress = vector.getOffsetBufferAddress(); - long dataAddress = vector.getDataBufferAddress(); - - assertEquals(3, buffers.size()); - assertEquals(bitAddress, buffers.get(0).memoryAddress()); - assertEquals(offsetAddress, buffers.get(1).memoryAddress()); - assertEquals(dataAddress, buffers.get(2).memoryAddress()); - } - } - - @Test /* IntVector */ - public void testGetBufferAddress2() { - try (final IntVector vector = new IntVector("myvector", allocator)) { - boolean error = false; - vector.allocateNew(16); - - /* populate the vector */ - for (int i = 0; i < 16; i += 2) { - vector.set(i, i + 10); - } - - /* check the vector output */ - for (int i = 0; i < 16; i += 2) { - assertEquals(i + 10, vector.get(i)); - } - - List buffers = vector.getFieldBuffers(); - long bitAddress = vector.getValidityBufferAddress(); - long dataAddress = vector.getDataBufferAddress(); - - try { - vector.getOffsetBufferAddress(); - } catch (UnsupportedOperationException ue) { - error = true; - } finally { - assertTrue(error); - } - - assertEquals(2, buffers.size()); - assertEquals(bitAddress, buffers.get(0).memoryAddress()); - assertEquals(dataAddress, buffers.get(1).memoryAddress()); - } - } - - @Test - public void testMultipleClose() { - BufferAllocator vectorAllocator = - allocator.newChildAllocator("vector_allocator", 0, Long.MAX_VALUE); - IntVector vector = - newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, vectorAllocator); - vector.close(); - vectorAllocator.close(); - vector.close(); - vectorAllocator.close(); - } - - /* this method is used by the tests to bypass the vector set methods that manipulate - * lastSet. The method is to test the lastSet property and that's why we load the vector - * in a way that lastSet is not set automatically. - */ - public static void setBytes(int index, byte[] bytes, VarCharVector vector) { - final int currentOffset = - vector.offsetBuffer.getInt(index * BaseVariableWidthVector.OFFSET_WIDTH); - - BitVectorHelper.setBit(vector.validityBuffer, index); - vector.offsetBuffer.setInt( - (index + 1) * BaseVariableWidthVector.OFFSET_WIDTH, currentOffset + bytes.length); - vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length); - } - - @Test /* VarCharVector */ - public void testSetInitialCapacity() { - try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - - /* use the default 8 data bytes on average per element */ - int defaultCapacity = BaseValueVector.INITIAL_VALUE_ALLOCATION - 1; - vector.setInitialCapacity(defaultCapacity); - vector.allocateNew(); - assertEquals(defaultCapacity, vector.getValueCapacity()); - assertEquals( - CommonUtil.nextPowerOfTwo(defaultCapacity * 8), vector.getDataBuffer().capacity()); - - vector.setInitialCapacity(defaultCapacity, 1); - vector.allocateNew(); - assertEquals(defaultCapacity, vector.getValueCapacity()); - assertEquals(CommonUtil.nextPowerOfTwo(defaultCapacity), vector.getDataBuffer().capacity()); - - vector.setInitialCapacity(defaultCapacity, 0.1); - vector.allocateNew(); - assertEquals(defaultCapacity, vector.getValueCapacity()); - assertEquals( - CommonUtil.nextPowerOfTwo((int) (defaultCapacity * 0.1)), - vector.getDataBuffer().capacity()); - - vector.setInitialCapacity(defaultCapacity, 0.01); - vector.allocateNew(); - assertEquals(defaultCapacity, vector.getValueCapacity()); - assertEquals( - CommonUtil.nextPowerOfTwo((int) (defaultCapacity * 0.01)), - vector.getDataBuffer().capacity()); - - vector.setInitialCapacity(5, 0.01); - vector.allocateNew(); - assertEquals(5, vector.getValueCapacity()); - assertEquals(2, vector.getDataBuffer().capacity()); - } - } - - @Test - public void testDefaultAllocNewAll() { - int defaultCapacity = BaseValueVector.INITIAL_VALUE_ALLOCATION; - int expectedSize; - long beforeSize; - try (BufferAllocator childAllocator = - allocator.newChildAllocator("defaultAllocs", 0, Long.MAX_VALUE); - final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, childAllocator); - final BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, childAllocator); - final BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, childAllocator); - final DecimalVector decimalVector = - new DecimalVector(EMPTY_SCHEMA_PATH, childAllocator, 38, 6); - final VarCharVector varCharVector = new VarCharVector(EMPTY_SCHEMA_PATH, childAllocator)) { - - // verify that the wastage is within bounds for IntVector. - beforeSize = childAllocator.getAllocatedMemory(); - intVector.allocateNew(); - assertTrue(intVector.getValueCapacity() >= defaultCapacity); - expectedSize = - (defaultCapacity * IntVector.TYPE_WIDTH) - + BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity); - assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); - - // verify that the wastage is within bounds for BigIntVector. - beforeSize = childAllocator.getAllocatedMemory(); - bigIntVector.allocateNew(); - assertTrue(bigIntVector.getValueCapacity() >= defaultCapacity); - expectedSize = - (defaultCapacity * bigIntVector.TYPE_WIDTH) - + BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity); - assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); - - // verify that the wastage is within bounds for DecimalVector. - beforeSize = childAllocator.getAllocatedMemory(); - decimalVector.allocateNew(); - assertTrue(decimalVector.getValueCapacity() >= defaultCapacity); - expectedSize = - (defaultCapacity * decimalVector.TYPE_WIDTH) - + BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity); - assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); - - // verify that the wastage is within bounds for VarCharVector. - // var char vector have an offsets array that is 1 less than defaultCapacity - beforeSize = childAllocator.getAllocatedMemory(); - varCharVector.allocateNew(); - assertTrue(varCharVector.getValueCapacity() >= defaultCapacity - 1); - expectedSize = - (defaultCapacity * VarCharVector.OFFSET_WIDTH) - + BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) - + defaultCapacity * 8; - // wastage should be less than 5%. - assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); - - // verify that the wastage is within bounds for BitVector. - beforeSize = childAllocator.getAllocatedMemory(); - bitVector.allocateNew(); - assertTrue(bitVector.getValueCapacity() >= defaultCapacity); - expectedSize = BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) * 2; - assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05); - } - } - - @Test - public void testSetNullableVarCharHolder() { - try (VarCharVector vector = new VarCharVector("", allocator)) { - vector.allocateNew(100, 10); - - NullableVarCharHolder nullHolder = new NullableVarCharHolder(); - nullHolder.isSet = 0; - - NullableVarCharHolder stringHolder = new NullableVarCharHolder(); - stringHolder.isSet = 1; - - String str = "hello"; - ArrowBuf buf = allocator.buffer(16); - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - - stringHolder.start = 0; - stringHolder.end = str.length(); - stringHolder.buffer = buf; - - vector.set(0, nullHolder); - vector.set(1, stringHolder); - - // verify results - assertTrue(vector.isNull(0)); - assertEquals(str, new String(vector.get(1), StandardCharsets.UTF_8)); - - buf.close(); - } - } - - @Test - public void testSetNullableVarCharHolderSafe() { - try (VarCharVector vector = new VarCharVector("", allocator)) { - vector.allocateNew(5, 1); - - NullableVarCharHolder nullHolder = new NullableVarCharHolder(); - nullHolder.isSet = 0; - - NullableVarCharHolder stringHolder = new NullableVarCharHolder(); - stringHolder.isSet = 1; - - String str = "hello world"; - ArrowBuf buf = allocator.buffer(16); - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - - stringHolder.start = 0; - stringHolder.end = str.length(); - stringHolder.buffer = buf; - - vector.setSafe(0, stringHolder); - vector.setSafe(1, nullHolder); - - // verify results - assertEquals(str, new String(vector.get(0), StandardCharsets.UTF_8)); - assertTrue(vector.isNull(1)); - - buf.close(); - } - } - - @Test - public void testSetNullableVarBinaryHolder() { - try (VarBinaryVector vector = new VarBinaryVector("", allocator)) { - vector.allocateNew(100, 10); - - NullableVarBinaryHolder nullHolder = new NullableVarBinaryHolder(); - nullHolder.isSet = 0; - - NullableVarBinaryHolder binHolder = new NullableVarBinaryHolder(); - binHolder.isSet = 1; - - String str = "hello"; - ArrowBuf buf = allocator.buffer(16); - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - - binHolder.start = 0; - binHolder.end = str.length(); - binHolder.buffer = buf; - - vector.set(0, nullHolder); - vector.set(1, binHolder); - - // verify results - assertTrue(vector.isNull(0)); - assertEquals(str, new String(vector.get(1), StandardCharsets.UTF_8)); - - buf.close(); - } - } - - @Test - public void testSetNullableVarBinaryHolderSafe() { - try (VarBinaryVector vector = new VarBinaryVector("", allocator)) { - vector.allocateNew(5, 1); - - NullableVarBinaryHolder nullHolder = new NullableVarBinaryHolder(); - nullHolder.isSet = 0; - - NullableVarBinaryHolder binHolder = new NullableVarBinaryHolder(); - binHolder.isSet = 1; - - String str = "hello world"; - ArrowBuf buf = allocator.buffer(16); - buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - - binHolder.start = 0; - binHolder.end = str.length(); - binHolder.buffer = buf; - - vector.setSafe(0, binHolder); - vector.setSafe(1, nullHolder); - - // verify results - assertEquals(str, new String(vector.get(0), StandardCharsets.UTF_8)); - assertTrue(vector.isNull(1)); - - buf.close(); - } - } - - @Test - public void testGetPointerFixedWidth() { - final int vectorLength = 100; - try (IntVector vec1 = new IntVector("vec1", allocator); - IntVector vec2 = new IntVector("vec2", allocator)) { - vec1.allocateNew(vectorLength); - vec2.allocateNew(vectorLength); - - for (int i = 0; i < vectorLength; i++) { - if (i % 10 == 0) { - vec1.setNull(i); - vec2.setNull(i); - } else { - vec1.set(i, i * 1234); - vec2.set(i, i * 1234); - } - } - - ArrowBufPointer ptr1 = new ArrowBufPointer(); - ArrowBufPointer ptr2 = new ArrowBufPointer(); - - for (int i = 0; i < vectorLength; i++) { - vec1.getDataPointer(i, ptr1); - vec2.getDataPointer(i, ptr2); - - if (i % 10 == 0) { - assertNull(ptr1.getBuf()); - assertNull(ptr2.getBuf()); - } - - assertTrue(ptr1.equals(ptr2)); - assertTrue(ptr2.equals(ptr2)); - } - } - } - - @Test - public void testGetPointerVariableWidth() { - final String[] sampleData = - new String[] {"abc", "123", "def", null, "hello", "aaaaa", "world", "2019", null, "0717"}; - - try (VarCharVector vec1 = new VarCharVector("vec1", allocator); - VarCharVector vec2 = new VarCharVector("vec2", allocator)) { - vec1.allocateNew(sampleData.length * 10, sampleData.length); - vec2.allocateNew(sampleData.length * 10, sampleData.length); - - for (int i = 0; i < sampleData.length; i++) { - String str = sampleData[i]; - if (str != null) { - vec1.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8)); - vec2.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8)); - } else { - vec1.setNull(i); - vec2.setNull(i); - } - } - - ArrowBufPointer ptr1 = new ArrowBufPointer(); - ArrowBufPointer ptr2 = new ArrowBufPointer(); - - for (int i = 0; i < sampleData.length; i++) { - vec1.getDataPointer(i, ptr1); - vec2.getDataPointer(i, ptr2); - - assertTrue(ptr1.equals(ptr2)); - assertTrue(ptr2.equals(ptr2)); - } - } - } - - @Test - public void testGetNullFromVariableWidthVector() { - try (final VarCharVector varCharVector = new VarCharVector("varcharvec", allocator); - final VarBinaryVector varBinaryVector = new VarBinaryVector("varbinary", allocator)) { - varCharVector.allocateNew(10, 1); - varBinaryVector.allocateNew(10, 1); - - varCharVector.setNull(0); - varBinaryVector.setNull(0); - - assertNull(varCharVector.get(0)); - assertNull(varBinaryVector.get(0)); - } - } - - @Test - public void testZeroVectorEquals() { - try (final ZeroVector vector1 = new ZeroVector("vector"); - final ZeroVector vector2 = new ZeroVector("vector")) { - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testZeroVectorNotEquals() { - try (final IntVector intVector = new IntVector("int", allocator); - final ZeroVector zeroVector = new ZeroVector("zero"); - final ZeroVector zeroVector1 = new ZeroVector("zero1")) { - - VectorEqualsVisitor zeroVisitor = new VectorEqualsVisitor(); - assertFalse(zeroVisitor.vectorEquals(intVector, zeroVector)); - - VectorEqualsVisitor intVisitor = new VectorEqualsVisitor(); - assertFalse(intVisitor.vectorEquals(zeroVector, intVector)); - - VectorEqualsVisitor twoZeroVisitor = new VectorEqualsVisitor(); - // they are not equal because of distinct names - assertFalse(twoZeroVisitor.vectorEquals(zeroVector, zeroVector1)); - } - } - - @Test - public void testBitVectorEquals() { - try (final BitVector vector1 = new BitVector("bit", allocator); - final BitVector vector2 = new BitVector("bit", allocator)) { - - setVector(vector1, 0, 1, 0); - setVector(vector2, 1, 1, 0); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - - assertFalse(visitor.vectorEquals(vector1, vector2)); - - vector1.set(0, 1); - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testIntVectorEqualsWithNull() { - try (final IntVector vector1 = new IntVector("int", allocator); - final IntVector vector2 = new IntVector("int", allocator)) { - - setVector(vector1, 1, 2); - setVector(vector2, 1, null); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - - assertFalse(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testIntVectorEquals() { - try (final IntVector vector1 = new IntVector("int", allocator); - final IntVector vector2 = new IntVector("int", allocator)) { - - setVector(vector1, 1, 2, 3); - setVector(vector2, 1, 2, null); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - - assertFalse(visitor.vectorEquals(vector1, vector2)); - - vector2.setValueCount(3); - vector2.setSafe(2, 2); - assertFalse(vector1.equals(vector2)); - - vector2.setSafe(2, 3); - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testDecimalVectorEquals() { - try (final DecimalVector vector1 = new DecimalVector("decimal", allocator, 3, 3); - final DecimalVector vector2 = new DecimalVector("decimal", allocator, 3, 3); - final DecimalVector vector3 = new DecimalVector("decimal", allocator, 3, 2)) { - - setVector(vector1, 100L, 200L); - setVector(vector2, 100L, 200L); - setVector(vector3, 100L, 200L); - - VectorEqualsVisitor visitor1 = new VectorEqualsVisitor(); - VectorEqualsVisitor visitor2 = new VectorEqualsVisitor(); - - assertTrue(visitor1.vectorEquals(vector1, vector2)); - assertFalse(visitor2.vectorEquals(vector1, vector3)); - } - } - - @Test - public void testVarcharVectorEqualsWithNull() { - try (final VarCharVector vector1 = new VarCharVector("varchar", allocator); - final VarCharVector vector2 = new VarCharVector("varchar", allocator)) { - - setVector(vector1, STR1, STR2); - setVector(vector2, STR1, null); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testVarcharVectorEquals() { - try (final VarCharVector vector1 = new VarCharVector("varchar", allocator); - final VarCharVector vector2 = new VarCharVector("varchar", allocator)) { - - setVector(vector1, STR1, STR2, STR3); - setVector(vector2, STR1, STR2); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - - vector2.setSafe(2, STR3, 0, STR3.length); - vector2.setValueCount(3); - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testVarBinaryVectorEquals() { - try (final VarBinaryVector vector1 = new VarBinaryVector("binary", allocator); - final VarBinaryVector vector2 = new VarBinaryVector("binary", allocator)) { - - setVector(vector1, STR1, STR2, STR3); - setVector(vector2, STR1, STR2); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - - vector2.setSafe(2, STR3, 0, STR3.length); - vector2.setValueCount(3); - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testListVectorEqualsWithNull() { - try (final ListVector vector1 = ListVector.empty("list", allocator); - final ListVector vector2 = ListVector.empty("list", allocator); ) { - - UnionListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeListVector(writer1, new int[] {1, 2}); - writeListVector(writer1, new int[] {3, 4}); - writeListVector(writer1, new int[] {}); - writer1.setValueCount(3); - - UnionListWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeListVector(writer2, new int[] {1, 2}); - writeListVector(writer2, new int[] {3, 4}); - writer2.setValueCount(3); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - - assertFalse(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testListViewVectorEqualsWithNull() { - try (final ListViewVector vector1 = ListViewVector.empty("listview", allocator); - final ListViewVector vector2 = ListViewVector.empty("listview", allocator); ) { - - UnionListViewWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeListViewVector(writer1, new int[] {1, 2}); - writeListViewVector(writer1, new int[] {3, 4}); - writeListViewVector(writer1, new int[] {}); - writer1.setValueCount(3); - - UnionListViewWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeListViewVector(writer2, new int[] {1, 2}); - writeListViewVector(writer2, new int[] {3, 4}); - writer2.setValueCount(3); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - - assertFalse(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testLargeListViewVectorEqualsWithNull() { - try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector vector2 = - LargeListViewVector.empty("largelistview", allocator); ) { - - UnionLargeListViewWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeLargeListViewVector(writer1, new int[] {1, 2}); - writeLargeListViewVector(writer1, new int[] {3, 4}); - writeLargeListViewVector(writer1, new int[] {}); - writer1.setValueCount(3); - - UnionLargeListViewWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeLargeListViewVector(writer2, new int[] {1, 2}); - writeLargeListViewVector(writer2, new int[] {3, 4}); - writer2.setValueCount(3); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - - assertFalse(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testListVectorEquals() { - try (final ListVector vector1 = ListVector.empty("list", allocator); - final ListVector vector2 = ListVector.empty("list", allocator); ) { - - UnionListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeListVector(writer1, new int[] {1, 2}); - writeListVector(writer1, new int[] {3, 4}); - writeListVector(writer1, new int[] {5, 6}); - writer1.setValueCount(3); - - UnionListWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeListVector(writer2, new int[] {1, 2}); - writeListVector(writer2, new int[] {3, 4}); - writer2.setValueCount(2); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - - writeListVector(writer2, new int[] {5, 6}); - writer2.setValueCount(3); - - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testListViewVectorEquals() { - try (final ListViewVector vector1 = ListViewVector.empty("listview", allocator); - final ListViewVector vector2 = ListViewVector.empty("listview", allocator); ) { - - UnionListViewWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeListViewVector(writer1, new int[] {1, 2}); - writeListViewVector(writer1, new int[] {3, 4}); - writeListViewVector(writer1, new int[] {5, 6}); - writer1.setValueCount(3); - - UnionListViewWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeListViewVector(writer2, new int[] {1, 2}); - writeListViewVector(writer2, new int[] {3, 4}); - writer2.setValueCount(2); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - - writeListViewVector(writer2, new int[] {5, 6}); - writer2.setValueCount(3); - - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testLargeListViewVectorEquals() { - try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector vector2 = - LargeListViewVector.empty("largelistview", allocator); ) { - - UnionLargeListViewWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeLargeListViewVector(writer1, new int[] {1, 2}); - writeLargeListViewVector(writer1, new int[] {3, 4}); - writeLargeListViewVector(writer1, new int[] {5, 6}); - writer1.setValueCount(3); - - UnionLargeListViewWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeLargeListViewVector(writer2, new int[] {1, 2}); - writeLargeListViewVector(writer2, new int[] {3, 4}); - writer2.setValueCount(2); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - - writeLargeListViewVector(writer2, new int[] {5, 6}); - writer2.setValueCount(3); - - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testListVectorSetNull() { - try (final ListVector vector = ListVector.empty("list", allocator)) { - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - - writeListVector(writer, new int[] {1, 2}); - writeListVector(writer, new int[] {3, 4}); - writeListVector(writer, new int[] {5, 6}); - vector.setNull(3); - vector.setNull(4); - vector.setNull(5); - writer.setValueCount(6); - - assertEquals(vector.getObject(0), Arrays.asList(1, 2)); - assertEquals(vector.getObject(1), Arrays.asList(3, 4)); - assertEquals(vector.getObject(2), Arrays.asList(5, 6)); - assertTrue(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertTrue(vector.isNull(5)); - } - } - - @Test - public void testListViewVectorSetNull() { - try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { - UnionListViewWriter writer = vector.getWriter(); - writer.allocate(); - - writeListViewVector(writer, new int[] {1, 2}); - writeListViewVector(writer, new int[] {3, 4}); - writeListViewVector(writer, new int[] {5, 6}); - vector.setNull(3); - vector.setNull(4); - vector.setNull(5); - writer.setValueCount(6); - - assertEquals(vector.getObject(0), Arrays.asList(1, 2)); - assertEquals(vector.getObject(1), Arrays.asList(3, 4)); - assertEquals(vector.getObject(2), Arrays.asList(5, 6)); - assertTrue(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertTrue(vector.isNull(5)); - } - } - - @Test - public void testLargeListViewVectorSetNull() { - try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { - UnionLargeListViewWriter writer = vector.getWriter(); - writer.allocate(); - - writeLargeListViewVector(writer, new int[] {1, 2}); - writeLargeListViewVector(writer, new int[] {3, 4}); - writeLargeListViewVector(writer, new int[] {5, 6}); - vector.setNull(3); - vector.setNull(4); - vector.setNull(5); - writer.setValueCount(6); - - assertEquals(vector.getObject(0), Arrays.asList(1, 2)); - assertEquals(vector.getObject(1), Arrays.asList(3, 4)); - assertEquals(vector.getObject(2), Arrays.asList(5, 6)); - assertTrue(vector.isNull(3)); - assertTrue(vector.isNull(4)); - assertTrue(vector.isNull(5)); - } - } - - @Test - public void testStructVectorEqualsWithNull() { - - try (final StructVector vector1 = StructVector.empty("struct", allocator); - final StructVector vector2 = StructVector.empty("struct", allocator); ) { - vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - NullableStructWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - writeStructVector(writer1, 1, 10L); - writeStructVector(writer1, 2, 20L); - writeStructVector(writer1, 3, 30L); - writer1.setValueCount(3); - - NullableStructWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - writeStructVector(writer2, 1, 10L); - writeStructVector(writer2, 3, 30L); - writer2.setValueCount(3); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testStructVectorEquals() { - try (final StructVector vector1 = StructVector.empty("struct", allocator); - final StructVector vector2 = StructVector.empty("struct", allocator); ) { - vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - NullableStructWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - writeStructVector(writer1, 1, 10L); - writeStructVector(writer1, 2, 20L); - writeStructVector(writer1, 3, 30L); - writer1.setValueCount(3); - - NullableStructWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - writeStructVector(writer2, 1, 10L); - writeStructVector(writer2, 2, 20L); - writer2.setValueCount(2); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - - writeStructVector(writer2, 3, 30L); - writer2.setValueCount(3); - - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testStructVectorEqualsWithDiffChild() { - try (final StructVector vector1 = StructVector.empty("struct", allocator); - final StructVector vector2 = StructVector.empty("struct", allocator); ) { - vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector2.addOrGet("f10", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - NullableStructWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - writeStructVector(writer1, 1, 10L); - writeStructVector(writer1, 2, 20L); - writer1.setValueCount(2); - - NullableStructWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - writeStructVector(writer2, 1, 10L); - writeStructVector(writer2, 2, 20L); - writer2.setValueCount(2); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertFalse(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testStructVectorAcceptsDenseUnionChild() { - Field childField = - new Field( - "child", - FieldType.notNullable(new ArrowType.Union(UnionMode.Dense, new int[] {})), - Collections.emptyList()); - Field structField = - new Field( - "struct", - FieldType.notNullable(ArrowType.Struct.INSTANCE), - Collections.singletonList(childField)); - - try (FieldVector structVec = structField.createVector(allocator)) { - assertEquals(structField, structVec.getField()); - } - } - - @Test - public void testUnionVectorEquals() { - try (final UnionVector vector1 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); - final UnionVector vector2 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); ) { - - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 10; - uInt4Holder.isSet = 1; - - final NullableIntHolder intHolder = new NullableIntHolder(); - uInt4Holder.value = 20; - uInt4Holder.isSet = 1; - - vector1.setType(0, Types.MinorType.UINT4); - vector1.setSafe(0, uInt4Holder); - - vector1.setType(1, Types.MinorType.INT); - vector1.setSafe(1, intHolder); - vector1.setValueCount(2); - - vector2.setType(0, Types.MinorType.UINT4); - vector2.setSafe(0, uInt4Holder); - - vector2.setType(1, Types.MinorType.INT); - vector2.setSafe(1, intHolder); - vector2.setValueCount(2); - - VectorEqualsVisitor visitor = new VectorEqualsVisitor(); - assertTrue(visitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testEqualsWithIndexOutOfRange() { - assertThrows( - IllegalArgumentException.class, - () -> { - try (final IntVector vector1 = new IntVector("int", allocator); - final IntVector vector2 = new IntVector("int", allocator)) { - - setVector(vector1, 1, 2); - setVector(vector2, 1, 2); - - assertTrue(new RangeEqualsVisitor(vector1, vector2).rangeEquals(new Range(2, 3, 1))); - } - }); - } - - @Test - public void testFixedWidthVectorNullHashCode() { - try (IntVector intVec = new IntVector("int vector", allocator)) { - intVec.allocateNew(1); - intVec.setValueCount(1); - - intVec.set(0, 100); - intVec.setNull(0); - - assertEquals(0, intVec.hashCode(0)); - } - } - - @Test - public void testVariableWidthVectorNullHashCode() { - try (VarCharVector varChVec = new VarCharVector("var char vector", allocator)) { - varChVec.allocateNew(100, 1); - varChVec.setValueCount(1); - - varChVec.set(0, "abc".getBytes(StandardCharsets.UTF_8)); - varChVec.setNull(0); - - assertEquals(0, varChVec.hashCode(0)); - } - } - - @Test - public void testUnionNullHashCode() { - try (UnionVector srcVector = - new UnionVector( - EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) { - srcVector.allocateNew(); - - final NullableIntHolder holder = new NullableIntHolder(); - holder.isSet = 0; - - // write some data - srcVector.setType(0, MinorType.INT); - srcVector.setSafe(0, holder); - - assertEquals(0, srcVector.hashCode(0)); - } - } - - @Test - public void testToString() { - try (final IntVector intVector = new IntVector("intVector", allocator); - final ListVector listVector = ListVector.empty("listVector", allocator); - final StructVector structVector = StructVector.empty("structVector", allocator)) { - - // validate intVector toString - assertEquals("[]", intVector.toString()); - intVector.setValueCount(3); - intVector.setSafe(0, 1); - intVector.setSafe(1, 2); - intVector.setSafe(2, 3); - assertEquals("[1, 2, 3]", intVector.toString()); - - // validate intVector with plenty values - intVector.setValueCount(100); - for (int i = 0; i < 100; i++) { - intVector.setSafe(i, i); - } - assertEquals( - "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]", - intVector.toString()); - - // validate listVector toString - listVector.allocateNewSafe(); - listVector.initializeChildrenFromFields( - Collections.singletonList(Field.nullable("child", ArrowType.Utf8.INSTANCE))); - VarCharVector dataVector = (VarCharVector) listVector.getDataVector(); - - listVector.startNewValue(0); - dataVector.setSafe(0, "aaa".getBytes(StandardCharsets.UTF_8)); - dataVector.setSafe(1, "bbb".getBytes(StandardCharsets.UTF_8)); - listVector.endValue(0, 2); - - listVector.startNewValue(1); - dataVector.setSafe(2, "ccc".getBytes(StandardCharsets.UTF_8)); - dataVector.setSafe(3, "ddd".getBytes(StandardCharsets.UTF_8)); - listVector.endValue(1, 2); - listVector.setValueCount(2); - - assertEquals("[[\"aaa\",\"bbb\"], [\"ccc\",\"ddd\"]]", listVector.toString()); - - // validate structVector toString - structVector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - structVector.addOrGet( - "f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - NullableStructWriter structWriter = structVector.getWriter(); - structWriter.allocate(); - - writeStructVector(structWriter, 1, 10L); - writeStructVector(structWriter, 2, 20L); - structWriter.setValueCount(2); - - assertEquals("[{\"f0\":1,\"f1\":10}, {\"f0\":2,\"f1\":20}]", structVector.toString()); - } - } - - @Test - public void testUInt1VectorToString() { - try (final UInt1Vector uInt1Vector = new UInt1Vector("uInt1Vector", allocator)) { - setVector(uInt1Vector, (byte) 0xff); - assertEquals("[255]", uInt1Vector.toString()); - } - } - - @Test - public void testUInt2VectorToString() { - try (final UInt2Vector uInt2Vector = new UInt2Vector("uInt2Vector", allocator)) { - setVector(uInt2Vector, (char) 0xffff); - assertEquals("[65535]", uInt2Vector.toString()); - } - } - - @Test - public void testUInt4VectorToString() { - try (final UInt4Vector uInt4Vector = new UInt4Vector("uInt4Vector", allocator)) { - setVector(uInt4Vector, 0xffffffff); - assertEquals("[4294967295]", uInt4Vector.toString()); - } - } - - @Test - public void testUInt8VectorToString() { - try (final UInt8Vector uInt8Vector = new UInt8Vector("uInt8Vector", allocator)) { - setVector(uInt8Vector, 0xffffffffffffffffL); - assertEquals("[18446744073709551615]", uInt8Vector.toString()); - } - } - - @Test - public void testUnloadVariableWidthVector() { - try (final VarCharVector varCharVector = new VarCharVector("var char", allocator)) { - varCharVector.allocateNew(5, 2); - varCharVector.setValueCount(2); - - varCharVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8)); - - List bufs = varCharVector.getFieldBuffers(); - assertEquals(3, bufs.size()); - - ArrowBuf offsetBuf = bufs.get(1); - ArrowBuf dataBuf = bufs.get(2); - - assertEquals(12, offsetBuf.writerIndex()); - assertEquals(4, offsetBuf.getInt(4)); - assertEquals(4, offsetBuf.getInt(8)); - - assertEquals(4, dataBuf.writerIndex()); - } - } - - private void writeStructVector(NullableStructWriter writer, int value1, long value2) { - writer.start(); - writer.integer("f0").writeInt(value1); - writer.bigInt("f1").writeBigInt(value2); - writer.end(); - } - - private void writeListVector(UnionListWriter writer, int[] values) { - writer.startList(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endList(); - } - - private void writeListViewVector(UnionListViewWriter writer, int[] values) { - writer.startListView(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endListView(); - } - - private void writeLargeListViewVector(UnionLargeListViewWriter writer, int[] values) { - writer.startListView(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endListView(); - } - - @Test - public void testVariableVectorGetEndOffset() { - try (final VarCharVector vector1 = new VarCharVector("v1", allocator); - final VarBinaryVector vector2 = new VarBinaryVector("v2", allocator)) { - - setVector(vector1, STR1, null, STR2); - setVector(vector2, STR1, STR2, STR3); - - assertEquals(0, vector1.getStartOffset(0)); - assertEquals(STR1.length, vector1.getEndOffset(0)); - assertEquals(STR1.length, vector1.getStartOffset(1)); - assertEquals(STR1.length, vector1.getEndOffset(1)); - assertEquals(STR1.length, vector1.getStartOffset(2)); - assertEquals(STR1.length + STR2.length, vector1.getEndOffset(2)); - - assertEquals(0, vector2.getStartOffset(0)); - assertEquals(STR1.length, vector2.getEndOffset(0)); - assertEquals(STR1.length, vector2.getStartOffset(1)); - assertEquals(STR1.length + STR2.length, vector2.getEndOffset(1)); - assertEquals(STR1.length + STR2.length, vector2.getStartOffset(2)); - assertEquals(STR1.length + STR2.length + STR3.length, vector2.getEndOffset(2)); - } - } - - @Test - public void testEmptyBufBehavior() { - final int valueCount = 10; - - try (final IntVector vector = new IntVector("v", allocator)) { - assertEquals(1, vector.getDataBuffer().refCnt()); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(0, vector.getDataBuffer().capacity()); - assertEquals(0, vector.getValidityBuffer().capacity()); - - vector.allocateNew(valueCount); - assertEquals(2, vector.getDataBuffer().refCnt()); - assertEquals(2, vector.getValidityBuffer().refCnt()); - assertEquals(56, vector.getDataBuffer().capacity()); - assertEquals(8, vector.getValidityBuffer().capacity()); - - vector.close(); - assertEquals(1, vector.getDataBuffer().refCnt()); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(0, vector.getDataBuffer().capacity()); - assertEquals(0, vector.getValidityBuffer().capacity()); - } - - try (final VarCharVector vector = new VarCharVector("v", allocator)) { - assertEquals(1, vector.getDataBuffer().refCnt()); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(1, vector.getOffsetBuffer().refCnt()); - assertEquals(0, vector.getDataBuffer().capacity()); - assertEquals(0, vector.getValidityBuffer().capacity()); - assertEquals(0, vector.getOffsetBuffer().capacity()); - - vector.allocateNew(valueCount); - assertEquals(1, vector.getDataBuffer().refCnt()); - assertEquals(2, vector.getValidityBuffer().refCnt()); - assertEquals(2, vector.getOffsetBuffer().refCnt()); - assertEquals(32768, vector.getDataBuffer().capacity()); - assertEquals(8, vector.getValidityBuffer().capacity()); - assertEquals(56, vector.getOffsetBuffer().capacity()); - - vector.close(); - assertEquals(1, vector.getDataBuffer().refCnt()); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(1, vector.getOffsetBuffer().refCnt()); - assertEquals(0, vector.getDataBuffer().capacity()); - assertEquals(0, vector.getValidityBuffer().capacity()); - assertEquals(0, vector.getOffsetBuffer().capacity()); - } - - try (final ListVector vector = ListVector.empty("v", allocator)) { - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(1, vector.getOffsetBuffer().refCnt()); - assertEquals(0, vector.getValidityBuffer().capacity()); - assertEquals(0, vector.getOffsetBuffer().capacity()); - - vector.setValueCount(valueCount); - vector.allocateNewSafe(); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(1, vector.getOffsetBuffer().refCnt()); - assertEquals(512, vector.getValidityBuffer().capacity()); - assertEquals(16384, vector.getOffsetBuffer().capacity()); - - vector.close(); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(1, vector.getOffsetBuffer().refCnt()); - assertEquals(0, vector.getValidityBuffer().capacity()); - assertEquals(0, vector.getOffsetBuffer().capacity()); - } - - try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 2, allocator)) { - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(0, vector.getValidityBuffer().capacity()); - - vector.setValueCount(10); - vector.allocateNewSafe(); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(512, vector.getValidityBuffer().capacity()); - - vector.close(); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(0, vector.getValidityBuffer().capacity()); - } - - try (final StructVector vector = StructVector.empty("v", allocator)) { - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(0, vector.getValidityBuffer().capacity()); - - vector.setValueCount(valueCount); - vector.allocateNewSafe(); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(512, vector.getValidityBuffer().capacity()); - - vector.close(); - assertEquals(1, vector.getValidityBuffer().refCnt()); - assertEquals(0, vector.getValidityBuffer().capacity()); - } - - try (final UnionVector vector = UnionVector.empty("v", allocator)) { - assertEquals(1, vector.getTypeBuffer().refCnt()); - assertEquals(0, vector.getTypeBuffer().capacity()); - - vector.setValueCount(10); - vector.allocateNewSafe(); - assertEquals(1, vector.getTypeBuffer().refCnt()); - assertEquals(4096, vector.getTypeBuffer().capacity()); - - vector.close(); - assertEquals(1, vector.getTypeBuffer().refCnt()); - assertEquals(0, vector.getTypeBuffer().capacity()); - } - - try (final DenseUnionVector vector = DenseUnionVector.empty("v", allocator)) { - assertEquals(1, vector.getTypeBuffer().refCnt()); - assertEquals(1, vector.getOffsetBuffer().refCnt()); - assertEquals(0, vector.getTypeBuffer().capacity()); - assertEquals(0, vector.getOffsetBuffer().capacity()); - - vector.setValueCount(valueCount); - vector.allocateNew(); - assertEquals(1, vector.getTypeBuffer().refCnt()); - assertEquals(1, vector.getOffsetBuffer().refCnt()); - assertEquals(4096, vector.getTypeBuffer().capacity()); - assertEquals(16384, vector.getOffsetBuffer().capacity()); - - vector.close(); - assertEquals(1, vector.getTypeBuffer().refCnt()); - assertEquals(1, vector.getOffsetBuffer().refCnt()); - assertEquals(0, vector.getTypeBuffer().capacity()); - assertEquals(0, vector.getOffsetBuffer().capacity()); - } - } - - @Test - public void testSetGetUInt1() { - try (UInt1Vector vector = new UInt1Vector("vector", allocator)) { - vector.allocateNew(2); - - vector.setWithPossibleTruncate(0, UInt1Vector.MAX_UINT1); - vector.setUnsafeWithPossibleTruncate(1, UInt1Vector.MAX_UINT1); - vector.setValueCount(2); - - assertEquals(UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK, vector.getValueAsLong(0)); - assertEquals(UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK, vector.getValueAsLong(1)); - } - } - - @Test - public void testSetGetUInt2() { - try (UInt2Vector vector = new UInt2Vector("vector", allocator)) { - vector.allocateNew(2); - - vector.setWithPossibleTruncate(0, UInt2Vector.MAX_UINT2); - vector.setUnsafeWithPossibleTruncate(1, UInt2Vector.MAX_UINT2); - vector.setValueCount(2); - - assertEquals(UInt2Vector.MAX_UINT2, vector.getValueAsLong(0)); - assertEquals(UInt2Vector.MAX_UINT2, vector.getValueAsLong(1)); - } - } - - @Test - public void testSetGetUInt4() { - try (UInt4Vector vector = new UInt4Vector("vector", allocator)) { - vector.allocateNew(2); - - vector.setWithPossibleTruncate(0, UInt4Vector.MAX_UINT4); - vector.setUnsafeWithPossibleTruncate(1, UInt4Vector.MAX_UINT4); - vector.setValueCount(2); - - long expected = UInt4Vector.MAX_UINT4 & UInt4Vector.PROMOTION_MASK; - assertEquals(expected, vector.getValueAsLong(0)); - assertEquals(expected, vector.getValueAsLong(1)); - } - } - - @Test - public void testSplitAndTransferFixedWithVector1() { - RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); - try (BufferAllocator child = allocator.newChildAllocator("child", 0, Long.MAX_VALUE)) { - try (IntVector vector = new IntVector("vector", child)) { - vector.setSafe(0, 1); - vector.setSafe(1, 2); - vector.setSafe(2, 3); - vector.setValueCount(3); - - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 1); - try (IntVector target = (IntVector) transferPair.getTo()) { - // no-op try-with-resource - assertEquals(1, target.get(0)); - } - } - } - } - - @Test - public void testSplitAndTransferFixedWithVector2() { - IntVector target; - try (BufferAllocator child = allocator.newChildAllocator("child", 0, Long.MAX_VALUE)) { - try (IntVector vector = new IntVector("source", child)) { - vector.setSafe(0, 1); - vector.setSafe(1, 2); - vector.setSafe(2, 3); - vector.setValueCount(3); - - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.splitAndTransfer(0, 1); - target = (IntVector) transferPair.getTo(); - assertEquals(1, target.get(0)); - } - } - target.close(); - } - - @Test - public void testVectorLoadUnloadOnNonVariadicVectors() { - - try (final IntVector vector1 = new IntVector("myvector", allocator)) { - - setVector(vector1, 1, 2, 3, 4, 5, 6); - vector1.setValueCount(15); - - /* Check the vector output */ - assertEquals(1, vector1.get(0)); - assertEquals(2, vector1.get(1)); - assertEquals(3, vector1.get(2)); - assertEquals(4, vector1.get(3)); - assertEquals(5, vector1.get(4)); - assertEquals(6, vector1.get(5)); - - Field field = vector1.getField(); - String fieldName = field.getName(); - - List fields = new ArrayList<>(); - List fieldVectors = new ArrayList<>(); - - fields.add(field); - fieldVectors.add(vector1); - - Schema schema = new Schema(fields); - - VectorSchemaRoot schemaRoot1 = - new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); - VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); - - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); - VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - - // validating recordBatch doesn't contain an output for variadicBufferCounts - assertTrue(recordBatch.getVariadicBufferCounts().isEmpty()); - - VectorLoader vectorLoader = new VectorLoader(schemaRoot2); - vectorLoader.load(recordBatch); - - IntVector vector2 = (IntVector) schemaRoot2.getVector(fieldName); - vector2.setValueCount(25); - - /* Check the vector output */ - assertEquals(1, vector2.get(0)); - assertEquals(2, vector2.get(1)); - assertEquals(3, vector2.get(2)); - assertEquals(4, vector2.get(3)); - assertEquals(5, vector2.get(4)); - assertEquals(6, vector2.get(5)); - } - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVectorIterable.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVectorIterable.java deleted file mode 100644 index e6b79e89b9942..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVectorIterable.java +++ /dev/null @@ -1,909 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.hamcrest.MatcherAssert.assertThat; - -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.time.Duration; -import java.time.LocalDateTime; -import java.time.Period; -import java.time.ZoneOffset; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; -import org.apache.arrow.vector.complex.impl.UnionListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.Text; -import org.hamcrest.collection.IsIterableContainingInOrder; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestValueVectorIterable { - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testBigIntVectorIterable() { - try (final BigIntVector bigIntVector = new BigIntVector("bigInt", allocator)) { - bigIntVector.allocateNew(3); - bigIntVector.setSafe(0, 6); - bigIntVector.setSafe(1, 2); - bigIntVector.setSafe(2, 19); - bigIntVector.setValueCount(3); - - assertThat( - bigIntVector.getValueIterable(), IsIterableContainingInOrder.contains(6L, 2L, 19L)); - } - } - - @Test - public void testBitVectorIterable() { - try (final BitVector bitVector = new BitVector("bit", allocator)) { - bitVector.allocateNew(3); - bitVector.setSafe(0, 0); - bitVector.setNull(1); - bitVector.setSafe(2, 1); - bitVector.setValueCount(3); - - assertThat( - bitVector.getValueIterable(), IsIterableContainingInOrder.contains(false, null, true)); - } - } - - @Test - public void testDateDayVectorIterable() { - try (final DateDayVector dateDayVector = new DateDayVector("dateDay", allocator)) { - dateDayVector.allocateNew(3); - dateDayVector.setSafe(0, 30000); - dateDayVector.setNull(1); - dateDayVector.setSafe(2, 555); - dateDayVector.setValueCount(3); - - assertThat( - dateDayVector.getValueIterable(), IsIterableContainingInOrder.contains(30000, null, 555)); - } - } - - @Test - public void testDateMilliVectorIterable() { - try (final DateMilliVector dateMilliVector = new DateMilliVector("dateMilli", allocator)) { - dateMilliVector.allocateNew(3); - dateMilliVector.setSafe(0, 30000L); - dateMilliVector.setNull(1); - dateMilliVector.setSafe(2, 555L); - dateMilliVector.setValueCount(3); - - final LocalDateTime value1 = LocalDateTime.ofEpochSecond(30L, 0, ZoneOffset.ofHours(0)); - final LocalDateTime value3 = - LocalDateTime.ofEpochSecond(0L, 555000000, ZoneOffset.ofHours(0)); - assertThat( - dateMilliVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testDecimal256VectorIterable() { - try (final Decimal256Vector decimal256Vector = - new Decimal256Vector("decimal256", allocator, 8, 2)) { - decimal256Vector.allocateNew(3); - decimal256Vector.setSafe(0, 30000L); - decimal256Vector.setNull(1); - decimal256Vector.setSafe(2, 555L); - decimal256Vector.setValueCount(3); - - final BigDecimal value1 = new BigDecimal(300L).setScale(2, RoundingMode.HALF_UP); - final BigDecimal value3 = - new BigDecimal(555L).scaleByPowerOfTen(-2).setScale(2, RoundingMode.HALF_UP); - assertThat( - decimal256Vector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testDecimalVectorIterable() { - try (final DecimalVector decimalVector = new DecimalVector("decimalDay", allocator, 8, 2)) { - decimalVector.allocateNew(3); - decimalVector.setSafe(0, 30000); - decimalVector.setNull(1); - decimalVector.setSafe(2, 555); - decimalVector.setValueCount(3); - - final BigDecimal value1 = new BigDecimal(300L).setScale(2, RoundingMode.HALF_UP); - final BigDecimal value3 = - new BigDecimal(555L).scaleByPowerOfTen(-2).setScale(2, RoundingMode.HALF_UP); - assertThat( - decimalVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testDurationVectorIterable() { - try (final DurationVector durationVector = - new DurationVector( - Field.nullablePrimitive("duration", new ArrowType.Duration(TimeUnit.MILLISECOND)), - allocator)) { - durationVector.allocateNew(3); - durationVector.setSafe(0, 30000); - durationVector.setNull(1); - durationVector.setSafe(2, 555); - durationVector.setValueCount(3); - - final Duration value1 = Duration.ofMillis(30000); - final Duration value3 = Duration.ofMillis(555); - assertThat( - durationVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testFixedSizeBinaryVectorIterable() { - try (final FixedSizeBinaryVector fixedSizeBinaryVector = - new FixedSizeBinaryVector("binary", allocator, 4)) { - final byte[] value1 = new byte[] {0, 0, 0, 1}; - final byte[] value3 = new byte[] {1, 0, 0, 0}; - - fixedSizeBinaryVector.allocateNew(3); - fixedSizeBinaryVector.setSafe(0, value1); - fixedSizeBinaryVector.setNull(1); - fixedSizeBinaryVector.setSafe(2, value3); - fixedSizeBinaryVector.setValueCount(3); - - assertThat( - fixedSizeBinaryVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testFloat2VectorIterable() { - try (final Float2Vector float2Vector = new Float2Vector("float2", allocator)) { - float2Vector.allocateNew(3); - float2Vector.setSafe(0, (short) 7777); - float2Vector.setNull(1); - float2Vector.setSafe(2, (short) 5); - float2Vector.setValueCount(3); - - assertThat( - float2Vector.getValueIterable(), - IsIterableContainingInOrder.contains((short) 7777, null, (short) 5)); - } - } - - @Test - public void testFloat4VectorIterable() { - try (final Float4Vector float4Vector = new Float4Vector("float4", allocator)) { - float4Vector.allocateNew(3); - float4Vector.setSafe(0, 16.32f); - float4Vector.setNull(1); - float4Vector.setSafe(2, -10.75f); - float4Vector.setValueCount(3); - - assertThat( - float4Vector.getValueIterable(), - IsIterableContainingInOrder.contains(16.32f, null, -10.75f)); - } - } - - @Test - public void testFloat8VectorIterable() { - try (final Float8Vector float8Vector = new Float8Vector("float8", allocator)) { - float8Vector.allocateNew(3); - float8Vector.setSafe(0, 16.32); - float8Vector.setNull(1); - float8Vector.setSafe(2, -10.75); - float8Vector.setValueCount(3); - - assertThat( - float8Vector.getValueIterable(), - IsIterableContainingInOrder.contains(16.32, null, -10.75)); - } - } - - @Test - public void testIntVectorIterable() { - try (final IntVector intVector = new IntVector("int", allocator)) { - intVector.allocateNew(3); - intVector.setSafe(0, 78); - intVector.setNull(1); - intVector.setSafe(2, -93); - intVector.setValueCount(3); - - assertThat(intVector.getValueIterable(), IsIterableContainingInOrder.contains(78, null, -93)); - } - } - - @Test - public void testIntervalDayVectorIterable() { - try (final IntervalDayVector intervalDayVector = - new IntervalDayVector("intervalDay", allocator)) { - intervalDayVector.allocateNew(3); - intervalDayVector.setSafe(0, 63, 0); - intervalDayVector.setNull(1); - intervalDayVector.setSafe(2, 555, 0); - intervalDayVector.setValueCount(3); - - final Duration value1 = Duration.ofDays(63); - final Duration value3 = Duration.ofDays(555); - assertThat( - intervalDayVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testIntervalMonthDayNanoVectorIterable() { - try (final IntervalMonthDayNanoVector intervalMonthDayNanoVector = - new IntervalMonthDayNanoVector("intervalMonthDayNano", allocator)) { - intervalMonthDayNanoVector.allocateNew(3); - intervalMonthDayNanoVector.setSafe(0, 3, 4, 0); - intervalMonthDayNanoVector.setNull(1); - intervalMonthDayNanoVector.setSafe(2, 7, 18, 0); - intervalMonthDayNanoVector.setValueCount(3); - - final PeriodDuration value1 = new PeriodDuration(Period.of(0, 3, 4), Duration.ofSeconds(0)); - final PeriodDuration value3 = new PeriodDuration(Period.of(0, 7, 18), Duration.ofSeconds(0)); - assertThat( - intervalMonthDayNanoVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testIntervalYearVectorIterable() { - try (final IntervalYearVector intervalYearVector = - new IntervalYearVector("intervalYear", allocator)) { - intervalYearVector.allocateNew(3); - intervalYearVector.setSafe(0, 3); - intervalYearVector.setNull(1); - intervalYearVector.setSafe(2, 17); - intervalYearVector.setValueCount(3); - - final Period value1 = Period.ofMonths(3); - final Period value3 = Period.ofMonths(17); - assertThat( - intervalYearVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testLargeVarBinaryVectorIterable() { - try (final LargeVarBinaryVector largeVarBinaryVector = - new LargeVarBinaryVector("largeVarBinary", allocator)) { - final byte[] value1 = new byte[] {0, 0, 0, 1}; - final byte[] value3 = new byte[] {1, 0, 0}; - - largeVarBinaryVector.allocateNew(3); - largeVarBinaryVector.setSafe(0, value1); - largeVarBinaryVector.setNull(1); - largeVarBinaryVector.setSafe(2, value3); - largeVarBinaryVector.setValueCount(3); - - assertThat( - largeVarBinaryVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testLargeVarCharVectorIterable() { - try (final LargeVarCharVector largeVarCharVector = - new LargeVarCharVector("largeVarChar", allocator)) { - final Text value1 = new Text("hello"); - final Text value3 = new Text("worlds"); - - largeVarCharVector.allocateNew(3); - largeVarCharVector.setSafe(0, value1); - largeVarCharVector.setNull(1); - largeVarCharVector.setSafe(2, value3); - largeVarCharVector.setValueCount(3); - - assertThat( - largeVarCharVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testNullVectorIterable() { - try (final NullVector nullVector = new NullVector("null", 3)) { - assertThat( - nullVector.getValueIterable(), IsIterableContainingInOrder.contains(null, null, null)); - } - } - - @Test - public void testSmallIntVectorIterable() { - try (final SmallIntVector smallIntVector = new SmallIntVector("smallInt", allocator)) { - smallIntVector.allocateNew(3); - smallIntVector.setSafe(0, 78); - smallIntVector.setNull(1); - smallIntVector.setSafe(2, -93); - smallIntVector.setValueCount(3); - - assertThat( - smallIntVector.getValueIterable(), - IsIterableContainingInOrder.contains((short) 78, null, (short) -93)); - } - } - - @Test - public void testTimeMicroVectorIterable() { - try (final TimeMicroVector timeMicroVector = new TimeMicroVector("timeMicro", allocator)) { - timeMicroVector.allocateNew(3); - timeMicroVector.setSafe(0, 70000); - timeMicroVector.setNull(1); - timeMicroVector.setSafe(2, 555); - timeMicroVector.setValueCount(3); - - assertThat( - timeMicroVector.getValueIterable(), - IsIterableContainingInOrder.contains(70000L, null, 555L)); - } - } - - @Test - public void testTimeMilliVectorIterable() { - try (final TimeMilliVector timeMilliVector = new TimeMilliVector("timeMilli", allocator)) { - timeMilliVector.allocateNew(3); - timeMilliVector.setSafe(0, 70000); - timeMilliVector.setNull(1); - timeMilliVector.setSafe(2, 555); - timeMilliVector.setValueCount(3); - - final LocalDateTime value1 = LocalDateTime.ofEpochSecond(70L, 0, ZoneOffset.ofHours(0)); - final LocalDateTime value3 = - LocalDateTime.ofEpochSecond(0L, 555000000, ZoneOffset.ofHours(0)); - assertThat( - timeMilliVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testTimeNanoVectorIterable() { - try (final TimeNanoVector timeNanoVector = new TimeNanoVector("timeNano", allocator)) { - timeNanoVector.allocateNew(3); - timeNanoVector.setSafe(0, 70000); - timeNanoVector.setNull(1); - timeNanoVector.setSafe(2, 555); - timeNanoVector.setValueCount(3); - - assertThat( - timeNanoVector.getValueIterable(), - IsIterableContainingInOrder.contains(70000L, null, 555L)); - } - } - - @Test - public void testTimeSecVectorIterable() { - try (final TimeSecVector timeSecVector = new TimeSecVector("timeSec", allocator)) { - timeSecVector.allocateNew(3); - timeSecVector.setSafe(0, 70000); - timeSecVector.setNull(1); - timeSecVector.setSafe(2, 555); - timeSecVector.setValueCount(3); - - assertThat( - timeSecVector.getValueIterable(), IsIterableContainingInOrder.contains(70000, null, 555)); - } - } - - @Test - public void testTimeStampMicroTZVectorIterable() { - try (final TimeStampMicroTZVector timeStampMicroTzVector = - new TimeStampMicroTZVector("timeStampMicroTZ", allocator, "UTC")) { - timeStampMicroTzVector.allocateNew(3); - timeStampMicroTzVector.setSafe(0, 70000); - timeStampMicroTzVector.setNull(1); - timeStampMicroTzVector.setSafe(2, 555); - timeStampMicroTzVector.setValueCount(3); - - assertThat( - timeStampMicroTzVector.getValueIterable(), - IsIterableContainingInOrder.contains(70000L, null, 555L)); - } - } - - @Test - public void testTimeStampMicroVectorIterable() { - try (final TimeStampMicroVector timeStampMicroVector = - new TimeStampMicroVector("timeStampMicro", allocator)) { - timeStampMicroVector.allocateNew(3); - timeStampMicroVector.setSafe(0, 70000); - timeStampMicroVector.setNull(1); - timeStampMicroVector.setSafe(2, 555); - timeStampMicroVector.setValueCount(3); - - final LocalDateTime value1 = LocalDateTime.ofEpochSecond(0L, 70000000, ZoneOffset.ofHours(0)); - final LocalDateTime value3 = LocalDateTime.ofEpochSecond(0L, 555000, ZoneOffset.ofHours(0)); - assertThat( - timeStampMicroVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testTimeStampMilliTZVectorIterable() { - try (final TimeStampMilliTZVector timeStampMilliTzVector = - new TimeStampMilliTZVector("timeStampMilliTZ", allocator, "UTC")) { - timeStampMilliTzVector.allocateNew(3); - timeStampMilliTzVector.setSafe(0, 70000); - timeStampMilliTzVector.setNull(1); - timeStampMilliTzVector.setSafe(2, 555); - timeStampMilliTzVector.setValueCount(3); - - assertThat( - timeStampMilliTzVector.getValueIterable(), - IsIterableContainingInOrder.contains(70000L, null, 555L)); - } - } - - @Test - public void testTimeStampMilliVectorIterable() { - try (final TimeStampMilliVector timeStampMilliVector = - new TimeStampMilliVector("timeStampMilli", allocator)) { - timeStampMilliVector.allocateNew(3); - timeStampMilliVector.setSafe(0, 70000); - timeStampMilliVector.setNull(1); - timeStampMilliVector.setSafe(2, 555); - timeStampMilliVector.setValueCount(3); - - final LocalDateTime value1 = LocalDateTime.ofEpochSecond(70L, 0, ZoneOffset.ofHours(0)); - final LocalDateTime value3 = - LocalDateTime.ofEpochSecond(0L, 555000000, ZoneOffset.ofHours(0)); - assertThat( - timeStampMilliVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testTimeStampNanoTZVectorIterable() { - try (final TimeStampNanoTZVector timeStampNanoTzVector = - new TimeStampNanoTZVector("timeStampNanoTZ", allocator, "UTC")) { - timeStampNanoTzVector.allocateNew(3); - timeStampNanoTzVector.setSafe(0, 70000); - timeStampNanoTzVector.setNull(1); - timeStampNanoTzVector.setSafe(2, 555); - timeStampNanoTzVector.setValueCount(3); - - assertThat( - timeStampNanoTzVector.getValueIterable(), - IsIterableContainingInOrder.contains(70000L, null, 555L)); - } - } - - @Test - public void testTimeStampNanoVectorIterable() { - try (final TimeStampNanoVector timeStampNanoVector = - new TimeStampNanoVector("timeStampNano", allocator)) { - timeStampNanoVector.allocateNew(3); - timeStampNanoVector.setSafe(0, 70000); - timeStampNanoVector.setNull(1); - timeStampNanoVector.setSafe(2, 555); - timeStampNanoVector.setValueCount(3); - - final LocalDateTime value1 = LocalDateTime.ofEpochSecond(0L, 70000, ZoneOffset.ofHours(0)); - final LocalDateTime value3 = LocalDateTime.ofEpochSecond(0L, 555, ZoneOffset.ofHours(0)); - assertThat( - timeStampNanoVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testTimeStampSecTZVectorIterable() { - try (final TimeStampSecTZVector timeStampSecTzVector = - new TimeStampSecTZVector("timeStampSecTZ", allocator, "UTC")) { - timeStampSecTzVector.allocateNew(3); - timeStampSecTzVector.setSafe(0, 70000); - timeStampSecTzVector.setNull(1); - timeStampSecTzVector.setSafe(2, 555); - timeStampSecTzVector.setValueCount(3); - - assertThat( - timeStampSecTzVector.getValueIterable(), - IsIterableContainingInOrder.contains(70000L, null, 555L)); - } - } - - @Test - public void testTimeStampSecVectorIterable() { - try (final TimeStampSecVector timeStampSecVector = - new TimeStampSecVector("timeStampSec", allocator)) { - timeStampSecVector.allocateNew(3); - timeStampSecVector.setSafe(0, 70000); - timeStampSecVector.setNull(1); - timeStampSecVector.setSafe(2, 555); - timeStampSecVector.setValueCount(3); - - final LocalDateTime value1 = LocalDateTime.ofEpochSecond(70000L, 0, ZoneOffset.ofHours(0)); - final LocalDateTime value3 = LocalDateTime.ofEpochSecond(555L, 0, ZoneOffset.ofHours(0)); - assertThat( - timeStampSecVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testTinyIntVectorIterable() { - try (final TinyIntVector tinyIntVector = new TinyIntVector("tinyInt", allocator)) { - tinyIntVector.allocateNew(3); - tinyIntVector.setSafe(0, 8); - tinyIntVector.setNull(1); - tinyIntVector.setSafe(2, -17); - tinyIntVector.setValueCount(3); - - assertThat( - tinyIntVector.getValueIterable(), - IsIterableContainingInOrder.contains((byte) 8, null, (byte) -17)); - } - } - - @Test - public void testUInt1VectorIterable() { - try (final UInt1Vector uint1Vector = new UInt1Vector("uint1", allocator)) { - uint1Vector.allocateNew(3); - uint1Vector.setSafe(0, 8); - uint1Vector.setNull(1); - uint1Vector.setSafe(2, 101); - uint1Vector.setValueCount(3); - - assertThat( - uint1Vector.getValueIterable(), - IsIterableContainingInOrder.contains((byte) 8, null, (byte) 101)); - } - } - - @Test - public void testUInt2VectorIterable() { - try (final UInt2Vector uint2Vector = new UInt2Vector("uint2", allocator)) { - uint2Vector.allocateNew(3); - uint2Vector.setSafe(0, 78); - uint2Vector.setNull(1); - uint2Vector.setSafe(2, 3456); - uint2Vector.setValueCount(3); - - assertThat( - uint2Vector.getValueIterable(), - IsIterableContainingInOrder.contains((char) 78, null, (char) 3456)); - } - } - - @Test - public void testUInt4VectorIterable() { - try (final UInt4Vector uint4Vector = new UInt4Vector("uint4", allocator)) { - uint4Vector.allocateNew(3); - uint4Vector.setSafe(0, 78); - uint4Vector.setNull(1); - uint4Vector.setSafe(2, 3456); - uint4Vector.setValueCount(3); - - assertThat( - uint4Vector.getValueIterable(), IsIterableContainingInOrder.contains(78, null, 3456)); - } - } - - @Test - public void testUInt8VectorIterable() { - try (final UInt8Vector uint8Vector = new UInt8Vector("uint8", allocator)) { - uint8Vector.allocateNew(3); - uint8Vector.setSafe(0, 6); - uint8Vector.setSafe(1, 2); - uint8Vector.setSafe(2, 19); - uint8Vector.setValueCount(3); - - assertThat(uint8Vector.getValueIterable(), IsIterableContainingInOrder.contains(6L, 2L, 19L)); - } - } - - @Test - public void testVarBinaryVectorIterable() { - try (final VarBinaryVector varBinaryVector = new VarBinaryVector("varBinary", allocator)) { - final byte[] value1 = new byte[] {0, 0, 0, 1}; - final byte[] value3 = new byte[] {1, 0, 0}; - - varBinaryVector.allocateNew(3); - varBinaryVector.setSafe(0, value1); - varBinaryVector.setNull(1); - varBinaryVector.setSafe(2, value3); - varBinaryVector.setValueCount(3); - - assertThat( - varBinaryVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testVarCharVectorIterable() { - try (final VarCharVector varCharVector = new VarCharVector("varChar", allocator)) { - final Text value1 = new Text("hello"); - final Text value3 = new Text("worlds"); - - varCharVector.allocateNew(3); - varCharVector.setSafe(0, value1); - varCharVector.setNull(1); - varCharVector.setSafe(2, value3); - varCharVector.setValueCount(3); - - assertThat( - varCharVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testViewVarBinaryVectorIterable() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("viewVarBinary", allocator)) { - final byte[] value1 = new byte[] {0, 0, 0, 1}; - final byte[] value3 = new byte[] {1, 0, 0}; - - viewVarBinaryVector.allocateNew(3); - viewVarBinaryVector.setSafe(0, value1); - viewVarBinaryVector.setNull(1); - viewVarBinaryVector.setSafe(2, value3); - viewVarBinaryVector.setValueCount(3); - - assertThat( - viewVarBinaryVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testViewVarCharVectorIterable() { - try (final ViewVarCharVector viewVarCharVector = - new ViewVarCharVector("viewVarChar", allocator)) { - final Text value1 = new Text("hello"); - final Text value3 = new Text("worlds"); - - viewVarCharVector.allocateNew(3); - viewVarCharVector.setSafe(0, value1); - viewVarCharVector.setNull(1); - viewVarCharVector.setSafe(2, value3); - viewVarCharVector.setValueCount(3); - - assertThat( - viewVarCharVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } - - @Test - public void testFIxedSizeListVectorIterable() { - try (final FixedSizeListVector fixedSizeListVector = - new FixedSizeListVector( - "fixedSizeList", allocator, FieldType.nullable(new ArrowType.FixedSizeList(3)), null)) { - final IntVector listVector = - (IntVector) - fixedSizeListVector - .addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())) - .getVector(); - listVector.setSafe(0, 1); - listVector.setSafe(1, 2); - listVector.setSafe(2, 3); - listVector.setSafe(3, 4); - listVector.setSafe(4, 5); - listVector.setSafe(5, 6); - listVector.setValueCount(6); - fixedSizeListVector.setValueCount(2); - fixedSizeListVector.setNotNull(0); - fixedSizeListVector.setNotNull(1); - - final List list1 = new ArrayList<>(); - list1.add(1); - list1.add(2); - list1.add(3); - final List list2 = new ArrayList<>(); - list2.add(4); - list2.add(5); - list2.add(6); - - assertThat( - fixedSizeListVector.getValueIterable(), - IsIterableContainingInOrder.contains(list1, list2)); - } - } - - @Test - public void testLargeListVectorIterable() { - try (final LargeListVector largeListVector = LargeListVector.empty("largeList", allocator)) { - UnionLargeListWriter writer = largeListVector.getWriter(); - writer.allocate(); - - writer.startList(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.integer().writeInt(3); - writer.endList(); - writer.startList(); - writer.integer().writeInt(4); - writer.integer().writeInt(5); - writer.endList(); - - largeListVector.setValueCount(2); - - final List list1 = new ArrayList<>(); - list1.add(1); - list1.add(2); - list1.add(3); - final List list2 = new ArrayList<>(); - list2.add(4); - list2.add(5); - - assertThat( - largeListVector.getValueIterable(), IsIterableContainingInOrder.contains(list1, list2)); - } - } - - @Test - public void testListVectorIterable() { - try (final ListVector listVector = ListVector.empty("largeList", allocator)) { - UnionListWriter writer = listVector.getWriter(); - writer.allocate(); - - writer.startList(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.integer().writeInt(3); - writer.endList(); - writer.startList(); - writer.integer().writeInt(4); - writer.integer().writeInt(5); - writer.endList(); - - listVector.setValueCount(2); - - final List list1 = new ArrayList<>(); - list1.add(1); - list1.add(2); - list1.add(3); - final List list2 = new ArrayList<>(); - list2.add(4); - list2.add(5); - - assertThat(listVector.getValueIterable(), IsIterableContainingInOrder.contains(list1, list2)); - } - } - - @Test - public void testListViewVectorIterable() { - try (final ListViewVector listViewVector = ListViewVector.empty("largeList", allocator)) { - UnionListViewWriter writer = listViewVector.getWriter(); - writer.allocate(); - - writer.startList(); - writer.integer().writeInt(1); - writer.integer().writeInt(2); - writer.integer().writeInt(3); - writer.endList(); - writer.startList(); - writer.integer().writeInt(4); - writer.integer().writeInt(5); - writer.endList(); - - listViewVector.setValueCount(2); - - final List list1 = new ArrayList<>(); - list1.add(1); - list1.add(2); - list1.add(3); - final List list2 = new ArrayList<>(); - list2.add(4); - list2.add(5); - - assertThat( - listViewVector.getValueIterable(), IsIterableContainingInOrder.contains(list1, list2)); - } - } - - @Test - public void testNonNullableStructVectorIterable() { - try (final NonNullableStructVector nonNullableStructVector = - NonNullableStructVector.empty("nonNullableStruct", allocator)) { - nonNullableStructVector.setValueCount(2); - - IntVector key1Vector = - nonNullableStructVector.addOrGet( - "key1", FieldType.notNullable(new ArrowType.Int(32, true)), IntVector.class); - IntVector key2Vector = - nonNullableStructVector.addOrGet( - "key2", FieldType.notNullable(new ArrowType.Int(32, true)), IntVector.class); - key1Vector.setSafe(0, 1); - key1Vector.setSafe(1, 3); - key2Vector.setSafe(0, 2); - key2Vector.setSafe(1, 4); - key1Vector.setValueCount(2); - key2Vector.setValueCount(2); - - final Map value1 = new HashMap<>(); - value1.put("key1", 1); - value1.put("key2", 2); - final Map value2 = new HashMap<>(); - value2.put("key1", 3); - value2.put("key2", 4); - - assertThat( - nonNullableStructVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, value2)); - } - } - - @Test - public void testStructVectorIterable() { - try (final StructVector structVector = StructVector.empty("struct", allocator)) { - structVector.addOrGetList("struct"); - NullableStructWriter structWriter = structVector.getWriter(); - structWriter.setPosition(0); - structWriter.start(); - structWriter.integer("key1").writeInt(1); - structWriter.integer("key2").writeInt(2); - structWriter.end(); - structWriter.setPosition(2); - structWriter.start(); - structWriter.integer("key1").writeInt(3); - structWriter.integer("key2").writeInt(4); - structWriter.end(); - structWriter.setValueCount(3); - - final Map value1 = new HashMap<>(); - value1.put("key1", 1); - value1.put("key2", 2); - final Map value3 = new HashMap<>(); - value3.put("key1", 3); - value3.put("key2", 4); - - assertThat( - structVector.getValueIterable(), - IsIterableContainingInOrder.contains(value1, null, value3)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java deleted file mode 100644 index 0ad3e14ac4973..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestVarCharListVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testVarCharListWithNulls() { - byte[] bytes = "a".getBytes(StandardCharsets.UTF_8); - try (ListVector vector = - new ListVector( - "VarList", allocator, FieldType.nullable(Types.MinorType.VARCHAR.getType()), null); - ArrowBuf tempBuf = allocator.buffer(bytes.length)) { - UnionListWriter writer = vector.getWriter(); - writer.allocate(); - - // populate input vector with the following records - // ["a"] - // null - // ["b"] - writer.setPosition(0); // optional - writer.startList(); - tempBuf.setBytes(0, bytes); - writer.writeVarChar(0, bytes.length, tempBuf); - writer.endList(); - - writer.setPosition(2); - writer.startList(); - bytes = "b".getBytes(StandardCharsets.UTF_8); - tempBuf.setBytes(0, bytes); - writer.writeVarChar(0, bytes.length, tempBuf); - writer.endList(); - - writer.setValueCount(2); - - assertEquals(2, vector.getValueCount()); - assertEquals(2, vector.getDataVector().getValueCount()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java deleted file mode 100644 index a4533dba3bd72..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVariableWidthViewVector.java +++ /dev/null @@ -1,2863 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.apache.arrow.vector.TestUtils.newVector; -import static org.apache.arrow.vector.TestUtils.newViewVarBinaryVector; -import static org.apache.arrow.vector.TestUtils.newViewVarCharVector; -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Random; -import java.util.function.BiConsumer; -import java.util.function.Function; -import java.util.stream.IntStream; -import java.util.stream.Stream; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.rounding.DefaultRoundingPolicy; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.memory.util.CommonUtil; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.holders.NullableViewVarBinaryHolder; -import org.apache.arrow.vector.holders.NullableViewVarCharHolder; -import org.apache.arrow.vector.holders.ValueHolder; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ReusableByteArray; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -public class TestVariableWidthViewVector { - - // short string (length <= 12) - private static final byte[] STR0 = "0123456".getBytes(StandardCharsets.UTF_8); - // short string (length <= 12) - private static final byte[] STR1 = "012345678912".getBytes(StandardCharsets.UTF_8); - // long string (length > 12) - private static final byte[] STR2 = "0123456789123".getBytes(StandardCharsets.UTF_8); - // long string (length > 12) - private static final byte[] STR3 = "01234567891234567".getBytes(StandardCharsets.UTF_8); - // short string (length <= 12) - private static final byte[] STR4 = "01234567".getBytes(StandardCharsets.UTF_8); - // short string (length <= 12) - private static final byte[] STR5 = "A1234A".getBytes(StandardCharsets.UTF_8); - // short string (length <= 12) - private static final byte[] STR6 = "B1234567B".getBytes(StandardCharsets.UTF_8); - // long string (length > 12) - private static final byte[] STR7 = "K01234567891234567K".getBytes(StandardCharsets.UTF_8); - // long string (length > 12) - private static final byte[] STR8 = "M012345678912345678M".getBytes(StandardCharsets.UTF_8); - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - private Random random; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(Integer.MAX_VALUE); - random = new Random(); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - public static void setBytes(int index, byte[] bytes, ViewVarCharVector vector) { - BitVectorHelper.setBit(vector.validityBuffer, index); - vector.setBytes(index, bytes, 0, bytes.length); - } - - @Test - public void testInlineAllocation() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(48, 3); - final int valueCount = 3; - viewVarCharVector.set(0, STR0); - viewVarCharVector.set(1, STR1); - viewVarCharVector.set(2, STR4); - viewVarCharVector.setValueCount(valueCount); - - byte[] view1 = viewVarCharVector.get(0); - byte[] view2 = viewVarCharVector.get(1); - byte[] view3 = viewVarCharVector.get(2); - - assertNotNull(view1); - assertNotNull(view2); - assertNotNull(view3); - - String str1 = new String(STR0, StandardCharsets.UTF_8); - String str2 = new String(STR1, StandardCharsets.UTF_8); - String str3 = new String(STR4, StandardCharsets.UTF_8); - - assertEquals(new String(view1, StandardCharsets.UTF_8), str1); - assertEquals(new String(view2, StandardCharsets.UTF_8), str2); - assertEquals(new String(view3, StandardCharsets.UTF_8), str3); - - assertTrue(viewVarCharVector.dataBuffers.isEmpty()); - - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), - StandardCharsets.UTF_8), - str1); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), - StandardCharsets.UTF_8), - str2); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), - StandardCharsets.UTF_8), - str3); - } - } - - @Test - public void testDataBufferBasedAllocationInSameBuffer() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(48, 4); - final int valueCount = 4; - String str4 = generateRandomString(34); - viewVarCharVector.set(0, STR1); - viewVarCharVector.set(1, STR2); - viewVarCharVector.set(2, STR3); - viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); - viewVarCharVector.setValueCount(valueCount); - - byte[] view1 = viewVarCharVector.get(0); - byte[] view2 = viewVarCharVector.get(1); - byte[] view3 = viewVarCharVector.get(2); - byte[] view4 = viewVarCharVector.get(3); - - assertNotNull(view1); - assertNotNull(view2); - assertNotNull(view3); - assertNotNull(view4); - - String str1 = new String(STR1, StandardCharsets.UTF_8); - String str2 = new String(STR2, StandardCharsets.UTF_8); - String str3 = new String(STR3, StandardCharsets.UTF_8); - - assertEquals(new String(view1, StandardCharsets.UTF_8), str1); - assertEquals(new String(view2, StandardCharsets.UTF_8), str2); - assertEquals(new String(view3, StandardCharsets.UTF_8), str3); - assertEquals(new String(view4, StandardCharsets.UTF_8), str4); - - assertEquals(1, viewVarCharVector.dataBuffers.size()); - - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), - StandardCharsets.UTF_8), - str1); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), - StandardCharsets.UTF_8), - str2); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), - StandardCharsets.UTF_8), - str3); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), - StandardCharsets.UTF_8), - str4); - } - } - - @Test - public void testDataBufferBasedAllocationInOtherBuffer() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(48, 4); - final int valueCount = 4; - String str4 = generateRandomString(35); - viewVarCharVector.set(0, STR1); - viewVarCharVector.set(1, STR2); - viewVarCharVector.set(2, STR3); - viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); - viewVarCharVector.setValueCount(valueCount); - - byte[] view1 = viewVarCharVector.get(0); - byte[] view2 = viewVarCharVector.get(1); - byte[] view3 = viewVarCharVector.get(2); - byte[] view4 = viewVarCharVector.get(3); - - assertNotNull(view1); - assertNotNull(view2); - assertNotNull(view3); - assertNotNull(view4); - - String str1 = new String(STR1, StandardCharsets.UTF_8); - String str2 = new String(STR2, StandardCharsets.UTF_8); - String str3 = new String(STR3, StandardCharsets.UTF_8); - - assertEquals(new String(view1, StandardCharsets.UTF_8), str1); - assertEquals(new String(view2, StandardCharsets.UTF_8), str2); - assertEquals(new String(view3, StandardCharsets.UTF_8), str3); - assertEquals(new String(view4, StandardCharsets.UTF_8), str4); - - assertEquals(2, viewVarCharVector.dataBuffers.size()); - - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), - StandardCharsets.UTF_8), - str1); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), - StandardCharsets.UTF_8), - str2); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), - StandardCharsets.UTF_8), - str3); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), - StandardCharsets.UTF_8), - str4); - } - } - - @Test - public void testSetSafe() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(1, 1); - byte[] str6 = generateRandomString(40).getBytes(); - final List strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5, str6); - - // set data to a position out of capacity index - Map expected = new HashMap<>(); - for (byte[] string : strings) { - int cap = viewVarCharVector.getValueCapacity(); - expected.put(cap, string); - viewVarCharVector.setSafe(cap, string); - } - int nullIndex = viewVarCharVector.getValueCapacity(); - viewVarCharVector.setNull(nullIndex); - int valueCount = nullIndex + 1; - viewVarCharVector.setValueCount(valueCount); - assertEquals(viewVarCharVector.getNullCount(), valueCount - strings.size()); - - assertEquals(128, viewVarCharVector.getValueCapacity()); - assertEquals(2, viewVarCharVector.dataBuffers.size()); - - for (int i = 0; i < viewVarCharVector.getValueCapacity(); i++) { - if (expected.containsKey(i)) { - assertArrayEquals(expected.get(i), viewVarCharVector.get(i)); - } else { - assertNull(viewVarCharVector.get(i)); - } - } - } - } - - @Test - public void testMixedAllocation() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(128, 6); - final int valueCount = 6; - String str4 = generateRandomString(35); - String str6 = generateRandomString(40); - viewVarCharVector.set(0, STR1); - viewVarCharVector.set(1, STR2); - viewVarCharVector.set(2, STR3); - viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); - viewVarCharVector.set(4, STR1); - viewVarCharVector.set(5, str6.getBytes(StandardCharsets.UTF_8)); - viewVarCharVector.setValueCount(valueCount); - - byte[] view1 = viewVarCharVector.get(0); - byte[] view2 = viewVarCharVector.get(1); - byte[] view3 = viewVarCharVector.get(2); - byte[] view4 = viewVarCharVector.get(3); - byte[] view5 = viewVarCharVector.get(4); - byte[] view6 = viewVarCharVector.get(5); - - assertNotNull(view1); - assertNotNull(view2); - assertNotNull(view3); - assertNotNull(view4); - assertNotNull(view5); - assertNotNull(view6); - - String str1 = new String(STR1, StandardCharsets.UTF_8); - String str2 = new String(STR2, StandardCharsets.UTF_8); - String str3 = new String(STR3, StandardCharsets.UTF_8); - - assertEquals(new String(view1, StandardCharsets.UTF_8), str1); - assertEquals(new String(view2, StandardCharsets.UTF_8), str2); - assertEquals(new String(view3, StandardCharsets.UTF_8), str3); - assertEquals(new String(view4, StandardCharsets.UTF_8), str4); - assertEquals(new String(view5, StandardCharsets.UTF_8), str1); - assertEquals(new String(view6, StandardCharsets.UTF_8), str6); - - assertEquals(1, viewVarCharVector.dataBuffers.size()); - - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), - StandardCharsets.UTF_8), - str1); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), - StandardCharsets.UTF_8), - str2); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), - StandardCharsets.UTF_8), - str3); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), - StandardCharsets.UTF_8), - str4); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(4)).getBuffer(), - StandardCharsets.UTF_8), - str1); - assertEquals( - new String( - Objects.requireNonNull(viewVarCharVector.getObject(5)).getBuffer(), - StandardCharsets.UTF_8), - str6); - } - } - - @Test - public void testSetNullableViewVarCharHolder() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(0, 0); - final List strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5); - - NullableViewVarCharHolder stringHolder = new NullableViewVarCharHolder(); - - // set not null - int size = strings.size(); - for (int i = 0; i < size; i++) { - setAndCheck(viewVarCharVector, i, strings.get(i), stringHolder); - } - - // set null - setAndCheck(viewVarCharVector, 6, null, stringHolder); - - // copy by holder - // len < 12 - copyAndCheck(viewVarCharVector, stringHolder, 0, 7); - // len > 12 - copyAndCheck(viewVarCharVector, stringHolder, 2, 8); - // null - copyAndCheck(viewVarCharVector, stringHolder, 6, 9); - - // test overwrite - for (int i = 0; i < size; i++) { - setAndCheck(viewVarCharVector, i, strings.get(size - i - 1), stringHolder); - } - - String longString = generateRandomString(128); - setAndCheck(viewVarCharVector, 6, longString.getBytes(), stringHolder); - } - } - - @Test - public void testSetNullableViewVarBinaryHolder() { - try (final ViewVarBinaryVector viewVarBinaryVector = - new ViewVarBinaryVector("myvector", allocator)) { - viewVarBinaryVector.allocateNew(0, 0); - final List strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5); - - NullableViewVarBinaryHolder holder = new NullableViewVarBinaryHolder(); - - // set not null - int size = strings.size(); - for (int i = 0; i < size; i++) { - setAndCheck(viewVarBinaryVector, i, strings.get(i), holder); - } - - // set null - setAndCheck(viewVarBinaryVector, 6, null, holder); - - // copy by holder - // len < 12 - copyAndCheck(viewVarBinaryVector, holder, 0, 7); - // len > 12 - copyAndCheck(viewVarBinaryVector, holder, 2, 8); - // null - copyAndCheck(viewVarBinaryVector, holder, 6, 9); - - // test overwrite - for (int i = 0; i < size; i++) { - setAndCheck(viewVarBinaryVector, i, strings.get(size - i - 1), holder); - } - - String longString = generateRandomString(128); - setAndCheck(viewVarBinaryVector, 6, longString.getBytes(), holder); - } - } - - private static void copyAndCheck( - BaseVariableWidthViewVector vector, ValueHolder holder, int fromIndex, int toIndex) { - if (vector instanceof ViewVarCharVector) { - ViewVarCharVector viewVarCharVector = (ViewVarCharVector) vector; - NullableViewVarCharHolder stringHolder = (NullableViewVarCharHolder) holder; - viewVarCharVector.get(fromIndex, stringHolder); - viewVarCharVector.setSafe(toIndex, stringHolder); - } - - if (vector instanceof ViewVarBinaryVector) { - ViewVarBinaryVector viewVarBinaryVector = (ViewVarBinaryVector) vector; - NullableViewVarBinaryHolder binaryHolder = (NullableViewVarBinaryHolder) holder; - viewVarBinaryVector.get(fromIndex, binaryHolder); - viewVarBinaryVector.setSafe(toIndex, binaryHolder); - } - - assertArrayEquals(vector.get(fromIndex), vector.get(toIndex)); - } - - private void setAndCheck( - ViewVarCharVector vector, int index, byte[] str, NullableViewVarCharHolder stringHolder) { - ArrowBuf buf = null; - if (null == str) { - stringHolder.isSet = 0; - } else { - buf = allocator.buffer(str.length); - buf.setBytes(0, str); - stringHolder.isSet = 1; - stringHolder.start = 0; - stringHolder.end = str.length; - stringHolder.buffer = buf; - } - vector.setSafe(index, stringHolder); - - // verify results - assertArrayEquals(str, vector.get(index)); - AutoCloseables.closeNoChecked(buf); - } - - private void setAndCheck( - ViewVarBinaryVector vector, int index, byte[] str, NullableViewVarBinaryHolder binaryHolder) { - ArrowBuf buf = null; - if (null == str) { - binaryHolder.isSet = 0; - } else { - buf = allocator.buffer(str.length); - buf.setBytes(0, str); - binaryHolder.isSet = 1; - binaryHolder.start = 0; - binaryHolder.end = str.length; - binaryHolder.buffer = buf; - } - vector.setSafe(index, binaryHolder); - - // verify results - assertArrayEquals(str, vector.get(index)); - AutoCloseables.closeNoChecked(buf); - } - - @Test - public void testAllocationIndexOutOfBounds() { - assertThrows( - IndexOutOfBoundsException.class, - () -> { - try (final ViewVarCharVector viewVarCharVector = - new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(32, 3); - final int valueCount = 3; - viewVarCharVector.set(0, STR1); - viewVarCharVector.set(1, STR2); - viewVarCharVector.set(2, STR2); - viewVarCharVector.setValueCount(valueCount); - } - }); - } - - @Test - public void testSizeOfViewBufferElements() { - try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - int valueCount = 100; - int currentSize = 0; - vector.setInitialCapacity(valueCount); - vector.allocateNew(); - vector.setValueCount(valueCount); - for (int i = 0; i < valueCount; i++) { - currentSize += i; - vector.setSafe(i, new byte[i]); - } - assertEquals(currentSize, vector.sizeOfViewBufferElements()); - } - } - - @Test - public void testNullableVarType1() { - - // Create a new value vector for 1024 integers. - try (final ViewVarCharVector vector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024 * 10, 1024); - - vector.set(0, STR1); - vector.set(1, STR2); - vector.set(2, STR3); - vector.setSafe(3, STR3, 1, STR3.length - 1); - vector.setSafe(4, STR3, 2, STR3.length - 2); - ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3); - vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1); - vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2); - - // Set with convenience function - Text txt = new Text("foo"); - vector.setSafe(7, txt.getBytes(), 0, (int) txt.getLength()); - - // Check the sample strings. - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3)); - assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4)); - assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5)); - assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6)); - - // Check returning a Text object - assertEquals(txt, vector.getObject(7)); - - // Ensure null value throws. - assertNull(vector.get(8)); - } - } - - @Test - public void testGetTextRepeatedly() { - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - ValueVectorDataPopulator.setVector(vector, STR1, STR2); - vector.setValueCount(2); - - /* check the vector output */ - Text text = new Text(); - vector.read(0, text); - assertArrayEquals(STR1, text.getBytes()); - vector.read(1, text); - assertArrayEquals(STR2, text.getBytes()); - } - } - - @Test - public void testNullableVarType2() { - try (final ViewVarBinaryVector vector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024 * 10, 1024); - vector.set(0, STR1); - vector.set(1, STR2); - vector.set(2, STR3); - vector.setSafe(3, STR3, 1, STR3.length - 1); - vector.setSafe(4, STR3, 2, STR3.length - 2); - ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3); - vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1); - vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2); - - // Check the sample strings. - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3)); - assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4)); - assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5)); - assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6)); - - // Ensure null value throws. - assertNull(vector.get(7)); - } - } - - @Test - public void testGetBytesRepeatedly() { - try (ViewVarBinaryVector vector = new ViewVarBinaryVector("", allocator)) { - vector.allocateNew(5, 1); - - final String str = "hello world!!!"; - final String str2 = "foo"; - vector.setSafe(0, str.getBytes(StandardCharsets.UTF_8)); - vector.setSafe(1, str2.getBytes(StandardCharsets.UTF_8)); - - // verify results - ReusableByteArray reusableByteArray = new ReusableByteArray(); - vector.read(0, reusableByteArray); - assertArrayEquals( - str.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange( - reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); - byte[] oldBuffer = reusableByteArray.getBuffer(); - - vector.read(1, reusableByteArray); - assertArrayEquals( - str2.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange( - reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); - - // There should not have been any reallocation since the newer value is smaller in length. - assertSame(oldBuffer, reusableByteArray.getBuffer()); - } - } - - @Test - public void testReAllocVariableWidthViewVector() { - try (final ViewVarCharVector vector = - newVector( - ViewVarCharVector.class, EMPTY_SCHEMA_PATH, Types.MinorType.VIEWVARCHAR, allocator)) { - final int capacityLimit = 4095; - final int overLimitIndex = 200; - vector.setInitialCapacity(capacityLimit); - vector.allocateNew(); - - int initialCapacity = vector.getValueCapacity(); - assertTrue(initialCapacity >= capacityLimit); - - /* Put values in indexes that fall within the initial allocation */ - vector.setSafe(0, STR1, 0, STR1.length); - vector.setSafe(initialCapacity - 1, STR2, 0, STR2.length); - - /* the set calls above should NOT have triggered a realloc */ - assertEquals(initialCapacity, vector.getValueCapacity()); - - /* Now try to put values in space that falls beyond the initial allocation */ - vector.setSafe(initialCapacity + overLimitIndex, STR3, 0, STR3.length); - - /* Check valueCapacity is more than initial allocation */ - assertTrue(initialCapacity * 2 <= vector.getValueCapacity()); - - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(initialCapacity - 1)); - assertArrayEquals(STR3, vector.get(initialCapacity + overLimitIndex)); - - // Set the valueCount to be more than valueCapacity of current allocation. This is possible - // for ValueVectors - // as we don't call setSafe for null values, but we do call setValueCount when the current - // batch is processed. - vector.setValueCount(vector.getValueCapacity() + overLimitIndex); - } - } - - @Test - public void testSetSafeWithArrowBufNoExcessAllocs() { - final int numValues = BaseVariableWidthViewVector.INITIAL_VALUE_ALLOCATION * 2; - final byte[] valueBytes = "hello world!!!".getBytes(StandardCharsets.UTF_8); - final int valueBytesLength = valueBytes.length; - final int isSet = 1; - try (final ViewVarCharVector fromVector = - newVector( - ViewVarCharVector.class, - EMPTY_SCHEMA_PATH, - Types.MinorType.VIEWVARCHAR, - allocator); - final ViewVarCharVector toVector = - newVector( - ViewVarCharVector.class, - EMPTY_SCHEMA_PATH, - Types.MinorType.VIEWVARCHAR, - allocator)) { - /* - * Populate the `fromVector` with `numValues` with byte-arrays, each of size `valueBytesLength`. - */ - fromVector.setInitialCapacity(numValues); - fromVector.allocateNew(); - for (int i = 0; i < numValues; ++i) { - fromVector.setSafe(i, valueBytes, 0 /*start*/, valueBytesLength); - } - fromVector.setValueCount(numValues); - ArrowBuf fromDataBuffer = fromVector.getDataBuffer(); - assertTrue(numValues * valueBytesLength <= fromDataBuffer.capacity()); - - /* - * Copy the entries one-by-one from 'fromVector' to 'toVector', but use the setSafe with - * ArrowBuf API (instead of setSafe with byte-array). - */ - toVector.setInitialCapacity(numValues); - toVector.allocateNew(); - for (int i = 0; i < numValues; i++) { - int start = fromVector.getTotalValueLengthUpToIndex(i); - // across variable - // width implementations - int end = fromVector.getTotalValueLengthUpToIndex(i + 1); - toVector.setSafe(i, isSet, start, end, fromDataBuffer); - } - - /* - * Since the 'fromVector' and 'toVector' have the same initial capacity, and were populated - * with the same varchar elements, the allocations and hence, the final capacity should be - * the same. - */ - assertEquals(fromDataBuffer.capacity(), toVector.getDataBuffer().capacity()); - } - } - - @Test - public void testSetLastSetUsage() { - try (final ViewVarCharVector vector = new ViewVarCharVector("myvector", allocator)) { - vector.allocateNew(1024 * 10, 1024); - - setBytes(0, STR1, vector); - setBytes(1, STR2, vector); - setBytes(2, STR3, vector); - setBytes(3, STR4, vector); - - /* Check current lastSet */ - assertEquals(-1, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - - /* - * If we don't do setLastSe(3) before setValueCount(), then the latter will corrupt - * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays. - * Run the test by commenting on the next line, and we should see incorrect vector output. - */ - vector.setLastSet(3); - vector.setValueCount(20); - - /* Check current lastSet */ - assertEquals(19, vector.getLastSet()); - - /* Check the vector output again */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - - assertEquals(0, vector.getValueLength(4)); - assertEquals(0, vector.getValueLength(5)); - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - assertEquals(0, vector.getValueLength(10)); - assertEquals(0, vector.getValueLength(11)); - assertEquals(0, vector.getValueLength(12)); - assertEquals(0, vector.getValueLength(13)); - assertEquals(0, vector.getValueLength(14)); - assertEquals(0, vector.getValueLength(15)); - assertEquals(0, vector.getValueLength(16)); - assertEquals(0, vector.getValueLength(17)); - assertEquals(0, vector.getValueLength(18)); - assertEquals(0, vector.getValueLength(19)); - } - } - - @Test - public void testFillEmptiesUsage() { - try (final ViewVarCharVector vector = new ViewVarCharVector("myvector", allocator)) { - vector.allocateNew(1024 * 10, 1024); - - setBytes(0, STR1, vector); - setBytes(1, STR2, vector); - setBytes(2, STR3, vector); - setBytes(3, STR4, vector); - - /* Check current lastSet */ - assertEquals(-1, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - - vector.setLastSet(3); - /* fill empty byte arrays from index [4, 9] */ - vector.fillEmpties(10); - - /* Check current lastSet */ - assertEquals(9, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertEquals(0, vector.getValueLength(4)); - assertEquals(0, vector.getValueLength(5)); - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - - setBytes(10, STR1, vector); - setBytes(11, STR2, vector); - - vector.setLastSet(11); - /* fill empty byte arrays from index [12, 14] */ - vector.setValueCount(15); - - /* Check current lastSet */ - assertEquals(14, vector.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - assertEquals(0, vector.getValueLength(4)); - assertEquals(0, vector.getValueLength(5)); - assertEquals(0, vector.getValueLength(6)); - assertEquals(0, vector.getValueLength(7)); - assertEquals(0, vector.getValueLength(8)); - assertEquals(0, vector.getValueLength(9)); - assertArrayEquals(STR1, vector.get(10)); - assertArrayEquals(STR2, vector.get(11)); - assertEquals(0, vector.getValueLength(12)); - assertEquals(0, vector.getValueLength(13)); - assertEquals(0, vector.getValueLength(14)); - } - } - - @Test - public void testGetBufferAddress1() { - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - - setVector(vector, STR1, STR2, STR3, STR4); - vector.setValueCount(15); - - /* check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR3, vector.get(2)); - assertArrayEquals(STR4, vector.get(3)); - - List buffers = vector.getFieldBuffers(); - long bitAddress = vector.getValidityBufferAddress(); - long dataAddress = vector.getDataBufferAddress(); - - assertEquals(3, buffers.size()); - assertEquals(bitAddress, buffers.get(0).memoryAddress()); - assertEquals(dataAddress, buffers.get(1).memoryAddress()); - } - } - - @Test - public void testSetInitialCapacityInViews() { - try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - - /* use the default 16 data bytes on average per element */ - final int viewSize = BaseVariableWidthViewVector.ELEMENT_SIZE; - int defaultCapacity = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION / viewSize; - vector.setInitialCapacity(defaultCapacity); - vector.allocateNew(); - assertEquals(defaultCapacity, vector.getValueCapacity()); - assertEquals( - CommonUtil.nextPowerOfTwo(defaultCapacity * viewSize), vector.getDataBuffer().capacity()); - - double density = 4.0; - final int valueCount = 5; - vector.setInitialCapacity(valueCount, density); - vector.allocateNew(); - assertEquals(8, vector.getValueCapacity()); - assertEquals(128, vector.getDataBuffer().capacity()); - int initialDataBufferSize = (int) (valueCount * density); - // making sure a databuffer is allocated - vector.set(4, "01234567890123456".getBytes(StandardCharsets.UTF_8)); - assertEquals(vector.dataBuffers.size(), 1); - ArrowBuf dataBuf = vector.dataBuffers.get(0); - try (ArrowBuf tempBuf = vector.allocator.buffer(initialDataBufferSize)) { - // replicating a new buffer allocation process when a new buffer is added to the - // data buffer when inserting an element with length > 12 - assertEquals(tempBuf.capacity(), dataBuf.capacity()); - } - } - } - - @Test - public void testGetPointerVariableWidthViews() { - final String[] sampleData = - new String[] { - "abc", - "1234567890123", - "def", - null, - "hello world java", - "aaaaa", - "world", - "2019", - null, - "0717" - }; - - try (ViewVarCharVector vec1 = new ViewVarCharVector("vec1", allocator); - ViewVarCharVector vec2 = new ViewVarCharVector("vec2", allocator)) { - - vec1.allocateNew((long) sampleData.length * 16, sampleData.length); - vec2.allocateNew((long) sampleData.length * 16, sampleData.length); - - for (int i = 0; i < sampleData.length; i++) { - String str = sampleData[i]; - if (str != null) { - vec1.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8)); - vec2.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8)); - } else { - vec1.setNull(i); - - vec2.setNull(i); - } - } - - ArrowBufPointer ptr1 = new ArrowBufPointer(); - ArrowBufPointer ptr2 = new ArrowBufPointer(); - - for (int i = 0; i < sampleData.length; i++) { - vec1.getDataPointer(i, ptr1); - vec2.getDataPointer(i, ptr2); - - assertTrue(ptr1.equals(ptr2)); - assertTrue(ptr2.equals(ptr2)); - } - } - } - - @Test - public void testGetNullFromVariableWidthViewVector() { - try (final ViewVarCharVector varCharViewVector = - new ViewVarCharVector("viewvarcharvec", allocator); - final ViewVarBinaryVector varBinaryViewVector = - new ViewVarBinaryVector("viewvarbinary", allocator)) { - varCharViewVector.allocateNew(16, 1); - varBinaryViewVector.allocateNew(16, 1); - - varCharViewVector.setNull(0); - varBinaryViewVector.setNull(0); - - assertNull(varCharViewVector.get(0)); - assertNull(varBinaryViewVector.get(0)); - } - } - - @Test - public void testVariableWidthViewVectorNullHashCode() { - try (ViewVarCharVector viewVarChar = new ViewVarCharVector("view var char vector", allocator)) { - viewVarChar.allocateNew(100, 1); - viewVarChar.setValueCount(1); - - viewVarChar.set(0, "abc".getBytes(StandardCharsets.UTF_8)); - viewVarChar.setNull(0); - - assertEquals(0, viewVarChar.hashCode(0)); - } - } - - @Test - public void testUnloadVariableWidthViewVector() { - try (final ViewVarCharVector viewVarCharVector = - new ViewVarCharVector("view var char", allocator)) { - viewVarCharVector.allocateNew(16, 2); - viewVarCharVector.setValueCount(2); - viewVarCharVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8)); - - List bufs = viewVarCharVector.getFieldBuffers(); - assertEquals(2, bufs.size()); - - ArrowBuf viewBuf = bufs.get(1); - - assertEquals(32, viewBuf.writerIndex()); - final String longString = "012345678901234"; - viewVarCharVector.set(1, longString.getBytes(StandardCharsets.UTF_8)); - - bufs = viewVarCharVector.getFieldBuffers(); - assertEquals(3, bufs.size()); - - ArrowBuf referenceBuf = bufs.get(2); - assertEquals(longString.length(), referenceBuf.writerIndex()); - } - } - - @Test - public void testUnSupportedOffSet() { - // offset is not a feature required in ViewVarCharVector - assertThrows( - UnsupportedOperationException.class, - () -> { - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - - setVector(vector, STR1, STR2); - vector.setValueCount(2); - - /* check the vector output */ - assertArrayEquals(STR1, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - - vector.getOffsetBuffer(); - } - }); - } - - private void validateViewBuffer( - int index, - ViewVarCharVector vector, - byte[] expectedData, - int expectedBufId, - int expectedOffSet) { - final ArrowBuf viewBuffer = vector.viewBuffer; - int writePosition = index * BaseVariableWidthViewVector.ELEMENT_SIZE; - final int prefixBufWidth = BaseVariableWidthViewVector.PREFIX_WIDTH; - final int lengthBufWidth = BaseVariableWidthViewVector.LENGTH_WIDTH; - int length = viewBuffer.getInt(writePosition); - - // validate length of the view - assertEquals(expectedData.length, length); - - byte[] prefixBytes = new byte[prefixBufWidth]; - viewBuffer.getBytes(writePosition + lengthBufWidth, prefixBytes); - - // validate the prefix - byte[] expectedPrefixBytes = new byte[prefixBufWidth]; - System.arraycopy(expectedData, 0, expectedPrefixBytes, 0, prefixBufWidth); - assertArrayEquals(expectedPrefixBytes, prefixBytes); - - if (length > 12) { - /// validate bufId - int bufId = viewBuffer.getInt(writePosition + lengthBufWidth + prefixBufWidth); - assertEquals(expectedBufId, bufId); - // validate offset - int offset = - viewBuffer.getInt( - writePosition - + lengthBufWidth - + prefixBufWidth - + BaseVariableWidthViewVector.BUF_INDEX_WIDTH); - assertEquals(expectedOffSet, offset); - } - // validate retrieved data - assertArrayEquals(expectedData, vector.get(index)); - } - - @Test - public void testOverwriteShortFromLongString() { - /*NA: not applicable */ - // Overwriting at the beginning of the buffer. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 1); - // set short string - vector.set(0, STR0); - vector.setValueCount(1); - assertEquals(0, vector.dataBuffers.size()); - assertArrayEquals(STR0, vector.get(0)); - - validateViewBuffer(0, vector, STR0, /*NA*/ -1, /*NA*/ -1); - - // set long string - vector.set(0, STR3); - vector.setValueCount(1); - assertEquals(1, vector.dataBuffers.size()); - assertArrayEquals(STR3, vector.get(0)); - - validateViewBuffer(0, vector, STR3, 0, 0); - } - - // Overwriting in the middle of the buffer when existing buffers are all shorts. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(48, 3); - // set short string 1 - vector.set(0, STR0); - // set short string 2 - vector.set(1, STR5); - // set short string 3 - vector.set(2, STR6); - vector.setValueCount(3); - - // overwrite index 1 with a long string - vector.set(1, STR7); - vector.setValueCount(3); - - validateViewBuffer(0, vector, STR0, /*NA*/ -1, /*NA*/ -1); - validateViewBuffer(1, vector, STR7, 0, 0); - validateViewBuffer(2, vector, STR6, /*NA*/ -1, /*NA*/ -1); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(80, 5); - // set short string 1 - vector.set(0, STR0); - // set long string 1 - vector.set(1, STR3); - // set short string 2 - vector.set(2, STR5); - // set short string 3 - vector.set(3, STR6); - // set long string 2 - vector.set(4, STR7); - vector.setValueCount(5); - - // overwrite index 2 with a long string - vector.set(2, STR8); - vector.setValueCount(5); - - validateViewBuffer(0, vector, STR0, /*NA*/ -1, /*NA*/ -1); - validateViewBuffer(1, vector, STR3, 0, 0); - // Since we did overwrite index 2 with STR8, and as we are using append-only approach, - // it will be appended to the data buffer. - // Thus, it will be stored in the dataBuffer in order i.e. [STR3, STR7, STR8]. - validateViewBuffer(2, vector, STR8, 0, STR3.length + STR7.length); - validateViewBuffer(3, vector, STR6, /*NA*/ -1, /*NA*/ -1); - validateViewBuffer(4, vector, STR7, 0, STR3.length); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - // Here the short string is overwritten with a long string, and its length is larger than - // the remaining capacity of the existing data buffer. - // This would allocate a new buffer in the data buffers. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(80, 5); - // set short string 1 - vector.set(0, STR0); - // set long string 1 - vector.set(1, STR3); - // set short string 2 - vector.set(2, STR5); - // set short string 3 - vector.set(3, STR6); - // set long string 2 - vector.set(4, STR7); - - vector.setValueCount(5); - - // overwrite index 2 with a long string - String longString = generateRandomString(128); - byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8); - // since the append-only approach is used and the remaining capacity - // is not enough to store the new string; a new buffer will be allocated. - final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); - final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); - assertTrue(remainingCapacity < longStringBytes.length); - vector.set(2, longStringBytes); - vector.setValueCount(5); - - validateViewBuffer(0, vector, STR0, /*NA*/ -1, /*NA*/ -1); - validateViewBuffer(1, vector, STR3, 0, 0); - // overwritten long string will be stored in the new data buffer. - validateViewBuffer(2, vector, longStringBytes, 1, 0); - validateViewBuffer(3, vector, STR6, /*NA*/ -1, /*NA*/ -1); - validateViewBuffer(4, vector, STR7, 0, STR3.length); - } - } - - @Test - public void testOverwriteLongFromShortString() { - // Overwriting at the beginning of the buffer. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 1); - // set short string - vector.set(0, STR3); - vector.setValueCount(1); - // set long string - vector.set(0, STR0); - vector.setValueCount(1); - - validateViewBuffer(0, vector, STR0, /*NA*/ -1, /*NA*/ -1); - } - - // Overwriting in the middle of the buffer when existing buffers are all longs. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(48, 3); - // set long string 1 - vector.set(0, STR3); - // set long string 2 - vector.set(1, STR8); - // set long string 3 - vector.set(2, STR7); - vector.setValueCount(3); - - // overwrite index 1 with a short string - vector.set(1, STR6); - vector.setValueCount(3); - - validateViewBuffer(0, vector, STR3, 0, 0); - validateViewBuffer(1, vector, STR6, /*NA*/ -1, /*NA*/ -1); - // since the append-only approach is used, - // STR8 will still be in the first data buffer in dataBuffers. - validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(80, 5); - // set long string 1 - vector.set(0, STR3); - // set short string 1 - vector.set(1, STR5); - // set long string 2 - vector.set(2, STR7); - // set long string 3 - vector.set(3, STR8); - // set short string 2 - vector.set(4, STR6); - vector.setValueCount(5); - - // overwrite index 2 with a short string - vector.set(2, STR0); - vector.setValueCount(5); - - validateViewBuffer(0, vector, STR3, 0, 0); - validateViewBuffer(1, vector, STR5, /*NA*/ -1, /*NA*/ -1); - validateViewBuffer(2, vector, STR0, /*NA*/ -1, /*NA*/ -1); - // since the append-only approach is used, - // STR7 will still be in the first data buffer in dataBuffers. - validateViewBuffer(3, vector, STR8, 0, STR3.length + STR7.length); - validateViewBuffer(4, vector, STR6, /*NA*/ -1, /*NA*/ -1); - } - } - - @Test - public void testOverwriteLongFromAShorterLongString() { - // Overwriting at the beginning of the buffer. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 1); - // set long string - vector.set(0, STR7); - vector.setValueCount(1); - // set shorter long string, since append-only approach is used and the remaining capacity - // is not enough to store the new string; a new buffer will be allocated. - final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); - final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); - assertTrue(remainingCapacity < STR3.length); - // set shorter long string - vector.set(0, STR3); - vector.setValueCount(1); - - validateViewBuffer(0, vector, STR3, 1, 0); - } - - // Overwriting in the middle of the buffer when existing buffers are all longs. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - // extra memory is allocated - vector.allocateNew(128, 3); - // set long string 1 - vector.set(0, STR3); - // set long string 2 - vector.set(1, STR8); - // set long string 3 - vector.set(2, STR7); - vector.setValueCount(3); - - // overwrite index 1 with a shorter long string - // Since append-only approach is used - // and the remaining capacity is enough to store in the same data buffer.; - final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); - final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); - assertTrue(remainingCapacity > STR2.length); - vector.set(1, STR2); - vector.setValueCount(3); - - validateViewBuffer(0, vector, STR3, 0, 0); - // since the append-only approach is used, - // STR8 will still be in the first data buffer in dataBuffers. - validateViewBuffer(1, vector, STR2, 0, STR3.length + STR8.length + STR7.length); - validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(128, 5); - // set long string 1 - vector.set(0, STR3); - // set short string 1 - vector.set(1, STR5); - // set long string 2 - vector.set(2, STR7); - // set long string 3 - vector.set(3, STR8); - // set short string 2 - vector.set(4, STR6); - vector.setValueCount(5); - - // overwrite index 2 with a shorter long string - // Since append-only approach is used - // and the remaining capacity is enough to store in the same data buffer.; - final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); - final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); - assertTrue(remainingCapacity > STR2.length); - vector.set(2, STR2); - vector.setValueCount(5); - - validateViewBuffer(0, vector, STR3, 0, 0); - validateViewBuffer(1, vector, STR5, /*NA*/ -1, /*NA*/ -1); - // since the append-only approach is used, - // STR7 will still be in the first data buffer in dataBuffers. - validateViewBuffer(2, vector, STR2, 0, STR3.length + STR7.length + STR8.length); - validateViewBuffer(3, vector, STR8, 0, STR3.length + STR7.length); - validateViewBuffer(4, vector, STR6, /*NA*/ -1, /*NA*/ -1); - } - } - - @Test - public void testOverwriteLongFromALongerLongString() { - // Overwriting at the beginning of the buffer. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 1); - // set long string - vector.set(0, STR3); - vector.setValueCount(1); - // set longer long string, since append-only approach is used and the remaining capacity - // is not enough to store the new string; a new buffer will be allocated. - final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); - final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); - assertTrue(remainingCapacity < STR7.length); - // set longer long string - vector.set(0, STR7); - vector.setValueCount(1); - - validateViewBuffer(0, vector, STR7, 1, 0); - } - - // Overwriting in the middle of the buffer when existing buffers are all longs. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - // extra memory is allocated - vector.allocateNew(48, 3); - // set long string 1 - vector.set(0, STR3); - // set long string 2 - vector.set(1, STR8); - // set long string 3 - vector.set(2, STR7); - vector.setValueCount(3); - - // overwrite index 1 with a longer long string - // the remaining capacity is not enough to store in the same data buffer - // since a new buffer is added to the dataBuffers - final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); - final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); - String longerString = generateRandomString(35); - byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); - assertTrue(remainingCapacity < longerStringBytes.length); - - vector.set(1, longerStringBytes); - vector.setValueCount(3); - - validateViewBuffer(0, vector, STR3, 0, 0); - validateViewBuffer(1, vector, longerStringBytes, 1, 0); - // since the append-only approach is used, - // STR8 will still be in the first data buffer in dataBuffers. - validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(128, 5); - // set long string 1 - vector.set(0, STR3); - // set short string 1 - vector.set(1, STR5); - // set long string 2 - vector.set(2, STR7); - // set long string 3 - vector.set(3, STR2); - // set short string 2 - vector.set(4, STR6); - vector.setValueCount(5); - - // overwrite index 2 with a longer long string - // the remaining capacity is enough to store in the same data buffer - final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); - final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); - String longerString = generateRandomString(24); - byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); - assertTrue(remainingCapacity > longerStringBytes.length); - - vector.set(2, longerStringBytes); - vector.setValueCount(5); - - validateViewBuffer(0, vector, STR3, 0, 0); - validateViewBuffer(1, vector, STR5, /*NA*/ -1, /*NA*/ -1); - // since the append-only approach is used, - // STR7 will still be in the first data buffer in dataBuffers. - validateViewBuffer(2, vector, longerStringBytes, 0, STR3.length + STR7.length + STR2.length); - validateViewBuffer(3, vector, STR2, 0, STR3.length + STR7.length); - validateViewBuffer(4, vector, STR6, /*NA*/ -1, /*NA*/ -1); - } - } - - @Test - public void testSafeOverwriteShortFromLongString() { - /*NA: not applicable */ - // Overwriting at the beginning of the buffer. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 1); - // set short string - vector.setSafe(0, STR0); - vector.setValueCount(1); - assertEquals(0, vector.dataBuffers.size()); - assertArrayEquals(STR0, vector.get(0)); - - // set long string - vector.setSafe(0, STR3); - vector.setValueCount(1); - assertEquals(1, vector.dataBuffers.size()); - assertArrayEquals(STR3, vector.get(0)); - } - - // Overwriting in the middle of the buffer when existing buffers are all shorts. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 3); - // set short string 1 - vector.setSafe(0, STR0); - // set short string 2 - vector.setSafe(1, STR5); - // set short string 3 - vector.setSafe(2, STR6); - vector.setValueCount(3); - - // overwrite index 1 with a long string - vector.setSafe(1, STR7); - vector.setValueCount(3); - - assertArrayEquals(STR0, vector.get(0)); - assertArrayEquals(STR7, vector.get(1)); - assertArrayEquals(STR6, vector.get(2)); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 5); - // set short string 1 - vector.setSafe(0, STR0); - // set long string 1 - vector.setSafe(1, STR3); - // set short string 2 - vector.setSafe(2, STR5); - // set short string 3 - vector.setSafe(3, STR6); - // set long string 2 - vector.setSafe(4, STR7); - vector.setValueCount(5); - - // overwrite index 2 with a long string - vector.setSafe(2, STR8); - vector.setValueCount(5); - - assertArrayEquals(STR0, vector.get(0)); - assertArrayEquals(STR3, vector.get(1)); - assertArrayEquals(STR8, vector.get(2)); - assertArrayEquals(STR6, vector.get(3)); - assertArrayEquals(STR7, vector.get(4)); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 5); - // set short string 1 - vector.setSafe(0, STR0); - // set long string 1 - vector.setSafe(1, STR3); - // set short string 2 - vector.setSafe(2, STR5); - // set short string 3 - vector.setSafe(3, STR6); - // set long string 2 - vector.setSafe(4, STR7); - - vector.setValueCount(5); - - // overwrite index 2 with a long string - String longString = generateRandomString(128); - byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8); - - vector.setSafe(2, longStringBytes); - vector.setValueCount(5); - - assertArrayEquals(STR0, vector.get(0)); - assertArrayEquals(STR3, vector.get(1)); - assertArrayEquals(longStringBytes, vector.get(2)); - assertArrayEquals(STR6, vector.get(3)); - assertArrayEquals(STR7, vector.get(4)); - } - } - - @Test - public void testSafeOverwriteLongFromShortString() { - // Overwriting at the beginning of the buffer. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 1); - // set short string - vector.setSafe(0, STR3); - vector.setValueCount(1); - // set long string - vector.setSafe(0, STR0); - vector.setValueCount(1); - - assertArrayEquals(STR0, vector.get(0)); - } - - // Overwriting in the middle of the buffer when existing buffers are all longs. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 3); - // set long string 1 - vector.setSafe(0, STR3); - // set long string 2 - vector.setSafe(1, STR8); - // set long string 3 - vector.setSafe(2, STR7); - vector.setValueCount(3); - - // overwrite index 1 with a short string - vector.setSafe(1, STR6); - vector.setValueCount(3); - - assertArrayEquals(STR3, vector.get(0)); - assertArrayEquals(STR6, vector.get(1)); - assertArrayEquals(STR7, vector.get(2)); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 5); - // set long string 1 - vector.setSafe(0, STR3); - // set short string 1 - vector.setSafe(1, STR5); - // set long string 2 - vector.setSafe(2, STR7); - // set long string 3 - vector.setSafe(3, STR8); - // set short string 2 - vector.setSafe(4, STR6); - vector.setValueCount(5); - - // overwrite index 2 with a short string - vector.setSafe(2, STR0); - vector.setValueCount(5); - - assertArrayEquals(STR3, vector.get(0)); - assertArrayEquals(STR5, vector.get(1)); - assertArrayEquals(STR0, vector.get(2)); - assertArrayEquals(STR8, vector.get(3)); - assertArrayEquals(STR6, vector.get(4)); - } - } - - @Test - public void testSafeOverwriteLongFromAShorterLongString() { - // Overwriting at the beginning of the buffer. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 1); - // set long string - vector.setSafe(0, STR7); - vector.setValueCount(1); - // set shorter long string - vector.setSafe(0, STR3); - vector.setValueCount(1); - - assertArrayEquals(STR3, vector.get(0)); - } - - // Overwriting in the middle of the buffer when existing buffers are all longs. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - // extra memory is allocated - vector.allocateNew(16, 3); - // set long string 1 - vector.setSafe(0, STR3); - // set long string 2 - vector.setSafe(1, STR8); - // set long string 3 - vector.setSafe(2, STR7); - vector.setValueCount(3); - - // overwrite index 1 with a shorter long string - vector.setSafe(1, STR2); - vector.setValueCount(3); - - assertArrayEquals(STR3, vector.get(0)); - assertArrayEquals(STR2, vector.get(1)); - assertArrayEquals(STR7, vector.get(2)); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 5); - // set long string 1 - vector.setSafe(0, STR3); - // set short string 1 - vector.setSafe(1, STR5); - // set long string 2 - vector.setSafe(2, STR7); - // set long string 3 - vector.setSafe(3, STR8); - // set short string 2 - vector.setSafe(4, STR6); - vector.setValueCount(5); - - // overwrite index 2 with a shorter long string - vector.setSafe(2, STR2); - vector.setValueCount(5); - - assertArrayEquals(STR3, vector.get(0)); - assertArrayEquals(STR5, vector.get(1)); - assertArrayEquals(STR2, vector.get(2)); - assertArrayEquals(STR8, vector.get(3)); - assertArrayEquals(STR6, vector.get(4)); - } - } - - @Test - public void testSafeOverwriteLongFromALongerLongString() { - // Overwriting at the beginning of the buffer. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 1); - // set long string - vector.setSafe(0, STR3); - vector.setValueCount(1); - // set longer long string - vector.setSafe(0, STR7); - vector.setValueCount(1); - - assertArrayEquals(STR7, vector.get(0)); - } - - // Overwriting in the middle of the buffer when existing buffers are all longs. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - // extra memory is allocated - vector.allocateNew(16, 3); - // set long string 1 - vector.setSafe(0, STR3); - // set long string 2 - vector.setSafe(1, STR8); - // set long string 3 - vector.setSafe(2, STR7); - vector.setValueCount(3); - - String longerString = generateRandomString(35); - byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); - - vector.setSafe(1, longerStringBytes); - vector.setValueCount(3); - - assertArrayEquals(STR3, vector.get(0)); - assertArrayEquals(longerStringBytes, vector.get(1)); - assertArrayEquals(STR7, vector.get(2)); - } - - // Overwriting in the middle of the buffer with a mix of short and long strings. - try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { - vector.allocateNew(16, 5); - // set long string 1 - vector.setSafe(0, STR3); - // set short string 1 - vector.setSafe(1, STR5); - // set long string 2 - vector.setSafe(2, STR7); - // set long string 3 - vector.setSafe(3, STR2); - // set short string 2 - vector.setSafe(4, STR6); - vector.setValueCount(5); - - String longerString = generateRandomString(24); - byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); - - vector.setSafe(2, longerStringBytes); - vector.setValueCount(5); - - assertArrayEquals(STR3, vector.get(0)); - assertArrayEquals(STR5, vector.get(1)); - assertArrayEquals(longerStringBytes, vector.get(2)); - assertArrayEquals(STR2, vector.get(3)); - assertArrayEquals(STR6, vector.get(4)); - } - } - - @Test - public void testVectorLoadUnloadInLine() { - - try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector", allocator)) { - - setVector(vector1, STR0, STR1, STR4, STR5, STR6); - - assertEquals(4, vector1.getLastSet()); - vector1.setValueCount(15); - assertEquals(14, vector1.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR0, vector1.get(0)); - assertArrayEquals(STR1, vector1.get(1)); - assertArrayEquals(STR4, vector1.get(2)); - assertArrayEquals(STR5, vector1.get(3)); - assertArrayEquals(STR6, vector1.get(4)); - - Field field = vector1.getField(); - String fieldName = field.getName(); - - List fields = new ArrayList<>(); - List fieldVectors = new ArrayList<>(); - - fields.add(field); - fieldVectors.add(vector1); - - Schema schema = new Schema(fields); - - VectorSchemaRoot schemaRoot1 = - new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); - VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); - - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); - VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - - VectorLoader vectorLoader = new VectorLoader(schemaRoot2); - vectorLoader.load(recordBatch); - - ViewVarCharVector vector2 = (ViewVarCharVector) schemaRoot2.getVector(fieldName); - /* - * lastSet would have internally been set by VectorLoader.load() when it invokes - * loadFieldBuffers. - */ - assertEquals(14, vector2.getLastSet()); - vector2.setValueCount(25); - assertEquals(24, vector2.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR0, vector2.get(0)); - assertArrayEquals(STR1, vector2.get(1)); - assertArrayEquals(STR4, vector2.get(2)); - assertArrayEquals(STR5, vector2.get(3)); - assertArrayEquals(STR6, vector2.get(4)); - } - } - } - - @Test - public void testVectorLoadUnload() { - - try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector", allocator)) { - - setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); - - assertEquals(5, vector1.getLastSet()); - vector1.setValueCount(15); - assertEquals(14, vector1.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector1.get(0)); - assertArrayEquals(STR2, vector1.get(1)); - assertArrayEquals(STR3, vector1.get(2)); - assertArrayEquals(STR4, vector1.get(3)); - assertArrayEquals(STR5, vector1.get(4)); - assertArrayEquals(STR6, vector1.get(5)); - - Field field = vector1.getField(); - String fieldName = field.getName(); - - List fields = new ArrayList<>(); - List fieldVectors = new ArrayList<>(); - - fields.add(field); - fieldVectors.add(vector1); - - Schema schema = new Schema(fields); - - VectorSchemaRoot schemaRoot1 = - new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); - VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); - - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); - VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - - VectorLoader vectorLoader = new VectorLoader(schemaRoot2); - vectorLoader.load(recordBatch); - - ViewVarCharVector vector2 = (ViewVarCharVector) schemaRoot2.getVector(fieldName); - /* - * lastSet would have internally been set by VectorLoader.load() when it invokes - * loadFieldBuffers. - */ - assertEquals(14, vector2.getLastSet()); - vector2.setValueCount(25); - assertEquals(24, vector2.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector2.get(0)); - assertArrayEquals(STR2, vector2.get(1)); - assertArrayEquals(STR3, vector2.get(2)); - assertArrayEquals(STR4, vector2.get(3)); - assertArrayEquals(STR5, vector2.get(4)); - assertArrayEquals(STR6, vector2.get(5)); - } - } - } - - static Stream vectorCreatorProvider() { - return Stream.of( - Arguments.of( - (Function) - (allocator -> - newVector( - ViewVarBinaryVector.class, - EMPTY_SCHEMA_PATH, - Types.MinorType.VIEWVARBINARY, - allocator))), - Arguments.of( - (Function) - (allocator -> - newVector( - ViewVarCharVector.class, - EMPTY_SCHEMA_PATH, - Types.MinorType.VIEWVARCHAR, - allocator)))); - } - - @ParameterizedTest - @MethodSource({"vectorCreatorProvider"}) - public void testCopyFromWithNulls( - Function vectorCreator) { - try (final BaseVariableWidthViewVector vector = vectorCreator.apply(allocator); - final BaseVariableWidthViewVector vector2 = vectorCreator.apply(allocator)) { - final int initialCapacity = 1024; - vector.setInitialCapacity(initialCapacity); - vector.allocateNew(); - int capacity = vector.getValueCapacity(); - assertTrue(capacity >= initialCapacity); - - // setting number of values such that we have enough space in the initial allocation - // to avoid re-allocation. This is to test copyFrom() without re-allocation. - final int numberOfValues = initialCapacity / 2 / ViewVarCharVector.ELEMENT_SIZE; - - final String prefixString = generateRandomString(12); - - for (int i = 0; i < numberOfValues; i++) { - if (i % 3 == 0) { - // null values - vector.setNull(i); - } else if (i % 3 == 1) { - // short strings - byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); - vector.set(i, b, 0, b.length); - } else { - // long strings - byte[] b = (i + prefixString).getBytes(StandardCharsets.UTF_8); - vector.set(i, b, 0, b.length); - } - } - - assertEquals(capacity, vector.getValueCapacity()); - - vector.setValueCount(numberOfValues); - - for (int i = 0; i < numberOfValues; i++) { - if (i % 3 == 0) { - assertNull(vector.getObject(i)); - } else if (i % 3 == 1) { - assertArrayEquals( - Integer.toString(i).getBytes(StandardCharsets.UTF_8), - vector.get(i), - "unexpected value at index: " + i); - } else { - assertArrayEquals( - (i + prefixString).getBytes(StandardCharsets.UTF_8), - vector.get(i), - "unexpected value at index: " + i); - } - } - - vector2.setInitialCapacity(initialCapacity); - vector2.allocateNew(); - int capacity2 = vector2.getValueCapacity(); - assertEquals(capacity2, capacity); - - for (int i = 0; i < numberOfValues; i++) { - vector2.copyFrom(i, i, vector); - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else if (i % 3 == 1) { - assertArrayEquals( - Integer.toString(i).getBytes(StandardCharsets.UTF_8), - vector2.get(i), - "unexpected value at index: " + i); - } else { - assertArrayEquals( - (i + prefixString).getBytes(StandardCharsets.UTF_8), - vector2.get(i), - "unexpected value at index: " + i); - } - } - - assertEquals(capacity, vector2.getValueCapacity()); - - vector2.setValueCount(numberOfValues); - - for (int i = 0; i < numberOfValues; i++) { - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else if (i % 3 == 1) { - assertArrayEquals( - Integer.toString(i).getBytes(StandardCharsets.UTF_8), - vector2.get(i), - "unexpected value at index: " + i); - } else { - assertArrayEquals( - (i + prefixString).getBytes(StandardCharsets.UTF_8), - vector2.get(i), - "unexpected value at index: " + i); - } - } - } - } - - @ParameterizedTest - @MethodSource("vectorCreatorProvider") - public void testCopyFromSafeWithNulls( - Function vectorCreator) { - try (final BaseVariableWidthViewVector vector = vectorCreator.apply(allocator); - final BaseVariableWidthViewVector vector2 = vectorCreator.apply(allocator)) { - - final int initialCapacity = 4096; - vector.setInitialCapacity(initialCapacity); - vector.allocateNew(); - int capacity = vector.getValueCapacity(); - assertTrue(capacity >= initialCapacity); - - final int numberOfValues = initialCapacity / ViewVarCharVector.ELEMENT_SIZE; - - final String prefixString = generateRandomString(12); - - for (int i = 0; i < numberOfValues; i++) { - if (i % 3 == 0) { - // null values - vector.setNull(i); - } else if (i % 3 == 1) { - // short strings - byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); - vector.setSafe(i, b, 0, b.length); - } else { - // long strings - byte[] b = (i + prefixString).getBytes(StandardCharsets.UTF_8); - vector.setSafe(i, b, 0, b.length); - } - } - - /* NO reAlloc() should have happened in setSafe() */ - assertEquals(capacity, vector.getValueCapacity()); - - vector.setValueCount(numberOfValues); - - for (int i = 0; i < numberOfValues; i++) { - if (i % 3 == 0) { - assertNull(vector.getObject(i)); - } else if (i % 3 == 1) { - assertArrayEquals( - Integer.toString(i).getBytes(StandardCharsets.UTF_8), - vector.get(i), - "unexpected value at index: " + i); - } else { - assertArrayEquals( - (i + prefixString).getBytes(StandardCharsets.UTF_8), - vector.get(i), - "unexpected value at index: " + i); - } - } - - vector2.setInitialCapacity(initialCapacity); - vector2.allocateNew(); - int capacity2 = vector2.getValueCapacity(); - assertEquals(capacity2, capacity); - - for (int i = 0; i < numberOfValues; i++) { - vector2.copyFromSafe(i, i, vector); - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else if (i % 3 == 1) { - assertArrayEquals( - Integer.toString(i).getBytes(StandardCharsets.UTF_8), - vector2.get(i), - "unexpected value at index: " + i); - } else { - assertArrayEquals( - (i + prefixString).getBytes(StandardCharsets.UTF_8), - vector2.get(i), - "unexpected value at index: " + i); - } - } - - /* NO reAlloc() should have happened in setSafe() */ - assertEquals(capacity, vector2.getValueCapacity()); - - vector2.setValueCount(numberOfValues); - - for (int i = 0; i < numberOfValues; i++) { - if (i % 3 == 0) { - assertNull(vector2.getObject(i)); - } else if (i % 3 == 1) { - assertArrayEquals( - Integer.toString(i).getBytes(StandardCharsets.UTF_8), - vector2.get(i), - "unexpected value at index: " + i); - } else { - assertArrayEquals( - (i + prefixString).getBytes(StandardCharsets.UTF_8), - vector2.get(i), - "unexpected value at index: " + i); - } - } - - // make it reallocate - int valueCapacity = vector2.getValueCapacity(); - for (int i = 0; i < numberOfValues; i++) { - int thisIndex = i + valueCapacity; - vector2.copyFromSafe(i, thisIndex, vector); - if (i % 3 == 0) { - assertNull(vector2.getObject(thisIndex)); - } else if (i % 3 == 1) { - assertArrayEquals( - Integer.toString(i).getBytes(StandardCharsets.UTF_8), - vector2.get(thisIndex), - "unexpected value at index: " + i); - } else { - assertArrayEquals( - (i + prefixString).getBytes(StandardCharsets.UTF_8), - vector2.get(thisIndex), - "unexpected value at index: " + i); - } - } - - // test target vector with different initialCapacity - try (final BaseVariableWidthViewVector vector3 = vectorCreator.apply(allocator)) { - vector3.setInitialCapacity(16); - vector3.allocateNew(); - for (int i = 0; i < numberOfValues; i++) { - vector3.copyFromSafe(i, i, vector); - if (i % 3 == 0) { - assertNull(vector3.getObject(i)); - } else { - assertArrayEquals(vector.get(i), vector3.get(i)); - } - } - } - - // test overwrite a used vector by copy - try (final BaseVariableWidthViewVector targetVector = vectorCreator.apply(allocator)) { - - targetVector.setInitialCapacity(initialCapacity); - targetVector.allocateNew(); - - // source vector: null, short, long... - // target vector: long, null, short... - for (int i = 0; i < numberOfValues; i++) { - if (i % 3 == 0) { - // long strings - byte[] b = (i + prefixString).getBytes(StandardCharsets.UTF_8); - targetVector.set(i, b, 0, b.length); - } else if (i % 3 == 1) { - // null values - targetVector.setNull(i); - } else { - // short strings - byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); - targetVector.set(i, b, 0, b.length); - } - } - targetVector.setValueCount(numberOfValues); - - for (int i = 0; i < numberOfValues; i++) { - targetVector.copyFromSafe(i, i, vector); - if (i % 3 == 0) { - assertNull(targetVector.getObject(i)); - } else { - assertArrayEquals(vector.get(i), targetVector.get(i)); - } - } - } - } - } - - public byte[] generateRandomBinaryData(int size) { - byte[] binaryData = new byte[size]; - random.nextBytes(binaryData); - return binaryData; - } - - private byte[][] generateBinaryDataArray(int size, int length) { - byte[][] binaryDataArray = new byte[size][]; - for (int i = 0; i < size; i++) { - binaryDataArray[i] = generateRandomBinaryData(length); - } - return binaryDataArray; - } - - private void testSplitAndTransferOnSlicedBufferHelper( - BaseVariableWidthViewVector targetVector, - BaseVariableWidthViewVector sourceVector, - int startIndex, - int length, - byte[][] data) { - sourceVector.allocateNew(1024 * 10, 1024); - - for (int i = 0; i < data.length; i++) { - sourceVector.set(i, data[i]); - } - sourceVector.setValueCount(data.length); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(startIndex, length, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // The validity buffer is sliced from the same buffer.See - // BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - } - - /** - * ARROW-7831: this checks a slice taken off a buffer is still readable after that buffer's - * allocator is closed. With short strings. - */ - @Test - public void testSplitAndTransferWithShortStringOnSlicedBuffer() { - final byte[][] data = new byte[][] {STR4, STR5, STR6}; - final int startIndex = 0; - final int length = 2; - - BiConsumer validateVector = - (targetVector, expectedData) -> { - IntStream.range(startIndex, length) - .forEach(i -> assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); - }; - - try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { - try (final ViewVarCharVector sourceVector = - newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - testSplitAndTransferOnSlicedBufferHelper( - targetVector, sourceVector, startIndex, length, data); - } - validateVector.accept(targetVector, data); - } - - final byte[][] binaryData = generateBinaryDataArray(3, 10); - - try (final ViewVarBinaryVector targetVector = - newViewVarBinaryVector("split-target", allocator)) { - try (final ViewVarBinaryVector sourceVector = - newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - testSplitAndTransferOnSlicedBufferHelper( - targetVector, sourceVector, startIndex, length, binaryData); - } - validateVector.accept(targetVector, binaryData); - } - } - - /** - * ARROW-7831: this checks a slice taken off a buffer is still readable after that buffer's - * allocator is closed. With a long string included. - */ - @Test - public void testSplitAndTransferWithLongStringsOnSlicedBuffer() { - final byte[][] data = new byte[][] {STR2, STR5, STR6}; - final int startIndex = 0; - final int length = 2; - - BiConsumer validateVector = - (targetVector, expectedData) -> { - IntStream.range(startIndex, length) - .forEach(i -> assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); - }; - - try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { - try (final ViewVarCharVector sourceVector = - newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - testSplitAndTransferOnSlicedBufferHelper( - targetVector, sourceVector, startIndex, length, data); - } - validateVector.accept(targetVector, data); - } - - final byte[][] binaryData = generateBinaryDataArray(3, 18); - try (final ViewVarBinaryVector targetVector = - newViewVarBinaryVector("split-target", allocator)) { - try (final ViewVarBinaryVector sourceVector = - newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - testSplitAndTransferOnSlicedBufferHelper( - targetVector, sourceVector, startIndex, length, binaryData); - } - validateVector.accept(targetVector, binaryData); - } - } - - private void testSplitAndTransferOnSlicedVectorHelper( - BaseVariableWidthViewVector sourceVector, - BaseVariableWidthViewVector targetVector, - int startIndex, - int length, - byte[][] data) { - sourceVector.allocateNew(1024 * 10, 1024); - - for (int i = 0; i < data.length; i++) { - sourceVector.set(i, data[i]); - } - sourceVector.setValueCount(data.length); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(startIndex, length, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - // The validity buffer is sliced from the same buffer.See - // BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - for (int i = startIndex; i < length; i++) { - assertArrayEquals(data[i], targetVector.get(i - startIndex)); - } - } - - /** - * ARROW-7831: this checks a vector that got sliced is still readable after the slice's allocator - * got closed. With short strings. - */ - @Test - public void testSplitAndTransferWithShortStringsOnSlicedVector() { - byte[][] data = new byte[][] {STR4, STR5, STR6}; - final int startIndex = 0; - final int length = 2; - - BiConsumer validateVector = - (sourceVector, expectedData) -> { - IntStream.range(startIndex, length) - .forEach(i -> assertArrayEquals(expectedData[i], sourceVector.get(i))); - }; - - try (final ViewVarCharVector sourceVector = - newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { - testSplitAndTransferOnSlicedVectorHelper( - sourceVector, targetVector, startIndex, length, data); - } - validateVector.accept(sourceVector, data); - } - - byte[][] binaryData = generateBinaryDataArray(3, 10); - try (final ViewVarBinaryVector sourceVector = - newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - try (final ViewVarBinaryVector targetVector = - newViewVarBinaryVector("split-target", allocator)) { - testSplitAndTransferOnSlicedVectorHelper( - sourceVector, targetVector, startIndex, length, binaryData); - } - validateVector.accept(sourceVector, binaryData); - } - } - - /** - * ARROW-7831: this checks a vector that got sliced is still readable after the slice's allocator - * got closed. With a long string included. - */ - @Test - public void testSplitAndTransferWithLongStringsOnSlicedVector() { - final byte[][] data = new byte[][] {STR2, STR5, STR6}; - final int startIndex = 0; - final int length = 2; - - BiConsumer validateVector = - (sourceVector, expectedData) -> { - IntStream.range(startIndex, length) - .forEach(i -> assertArrayEquals(expectedData[i], sourceVector.get(i))); - }; - - try (final ViewVarCharVector sourceVector = - newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { - testSplitAndTransferOnSlicedVectorHelper( - sourceVector, targetVector, startIndex, length, data); - } - validateVector.accept(sourceVector, data); - } - - final byte[][] binaryData = generateBinaryDataArray(3, 20); - try (final ViewVarBinaryVector sourceVector = - newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - try (final ViewVarBinaryVector targetVector = - newViewVarBinaryVector("split-target", allocator)) { - testSplitAndTransferOnSlicedVectorHelper( - sourceVector, targetVector, startIndex, length, binaryData); - } - validateVector.accept(sourceVector, binaryData); - } - } - - private void testSplitAndTransferOnValiditySplitHelper( - BaseVariableWidthViewVector targetVector, - BaseVariableWidthViewVector sourceVector, - int startIndex, - int length, - byte[][] data) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, new byte[0]); - sourceVector.setNull(1); - for (int i = 0; i < data.length; i++) { - if (data[i] == null) { - sourceVector.setNull(i); - } else { - sourceVector.set(i, data[i]); - } - } - sourceVector.setValueCount(data.length); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(startIndex, length, targetVector); - // the allocation only consists in the size needed for the validity buffer - final long validitySize = - DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( - BaseValueVector.getValidityBufferSizeFromCount(2)); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem + validitySize < allocator.getAllocatedMemory()); - // The validity is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Since values up to the startIndex are empty/null validity refcnt should not change. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - for (int i = startIndex; i < startIndex + length; i++) { - assertArrayEquals(data[i], targetVector.get(i - startIndex)); - } - - for (int i = 0; i < data.length; i++) { - if (data[i] == null) { - assertTrue(sourceVector.isNull(i)); - } else { - assertArrayEquals(data[i], sourceVector.get(i)); - } - } - } - - /** - * ARROW-7831: this checks a validity splitting where the validity buffer is sliced from the same - * buffer. In the case where all the values up to the start of the slice are null/empty. With - * short strings. - */ - @Test - public void testSplitAndTransferWithShortStringsOnValiditySplit() { - final byte[][] data = new byte[][] {new byte[0], null, STR4, STR5, STR6}; - final int startIndex = 2; - final int length = 2; - - try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator); - final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - testSplitAndTransferOnValiditySplitHelper( - targetVector, sourceVector, startIndex, length, data); - } - - final byte[][] binaryData = generateBinaryDataArray(5, 10); - binaryData[0] = new byte[0]; - binaryData[1] = null; - try (final ViewVarBinaryVector targetVector = - newViewVarBinaryVector("split-target", allocator); - final ViewVarBinaryVector sourceVector = - newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - testSplitAndTransferOnValiditySplitHelper( - targetVector, sourceVector, startIndex, length, binaryData); - } - } - - /** - * ARROW-7831: this checks a validity splitting where the validity buffer is sliced from the same - * buffer. In the case where all the values up to the start of the slice are null/empty. With long - * strings. - */ - @Test - public void testSplitAndTransferWithLongStringsOnValiditySplit() { - final byte[][] data = new byte[][] {new byte[0], null, STR1, STR2, STR3}; - final int startIndex = 2; - final int length = 2; - - try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator); - final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - testSplitAndTransferOnValiditySplitHelper( - targetVector, sourceVector, startIndex, length, data); - } - - final byte[][] binaryData = generateBinaryDataArray(5, 18); - binaryData[0] = new byte[0]; - binaryData[1] = null; - - try (final ViewVarBinaryVector targetVector = - newViewVarBinaryVector("split-target", allocator); - final ViewVarBinaryVector sourceVector = - newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - testSplitAndTransferOnValiditySplitHelper( - targetVector, sourceVector, startIndex, length, data); - } - } - - private void testSplitAndTransferOnAllocatorToAllocator( - BaseVariableWidthViewVector targetVector, - BaseVariableWidthViewVector sourceVector, - int startIndex, - int length, - byte[][] data) { - sourceVector.allocateNew(50, data.length); - - for (int i = 0; i < data.length; i++) { - sourceVector.set(i, data[i]); - } - sourceVector.setValueCount(data.length); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(startIndex, length, targetVector); - - if (sourceVector.getDataBuffers().isEmpty()) { - // no extra allocation as strings are all inline - assertEquals(allocatedMem, allocator.getAllocatedMemory()); - } else { - // extra allocation as some strings are not inline - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - } - - // the refcnts of each buffer for this test should be the same as what - // the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - for (int i = 0; i < data.length; i++) { - assertArrayEquals(data[i], sourceVector.get(i)); - } - } - - /** - * ARROW-7831: ensures that data is transferred from one allocator to another in case of 0-index - * start special cases. With short strings. - */ - @Test - public void testSplitAndTransferWithShortStringsOnAllocatorToAllocator() { - final int maxAllocation = 512; - final byte[][] data = new byte[][] {STR4, STR5, STR6}; - final int startIndex = 0; - final int length = 2; - - BiConsumer validateVector = - (targetVector, expectedData) -> { - IntStream.range(startIndex, length) - .forEach(i -> assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); - }; - - try (final BufferAllocator targetAllocator = - allocator.newChildAllocator("target-alloc", 256, maxAllocation); - final ViewVarCharVector targetVector = - newViewVarCharVector("split-target", targetAllocator)) { - try (final BufferAllocator sourceAllocator = - allocator.newChildAllocator("source-alloc", 256, maxAllocation); - final ViewVarCharVector sourceVector = - newViewVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - testSplitAndTransferOnAllocatorToAllocator( - targetVector, sourceVector, startIndex, length, data); - } - validateVector.accept(targetVector, data); - } - - final byte[][] binaryData = generateBinaryDataArray(3, 10); - try (final BufferAllocator targetAllocator = - allocator.newChildAllocator("target-alloc", 256, maxAllocation); - final ViewVarBinaryVector targetVector = - newViewVarBinaryVector("split-target", targetAllocator)) { - try (final BufferAllocator sourceAllocator = - allocator.newChildAllocator("source-alloc", 256, maxAllocation); - final ViewVarBinaryVector sourceVector = - newViewVarBinaryVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - testSplitAndTransferOnAllocatorToAllocator( - targetVector, sourceVector, startIndex, length, binaryData); - } - validateVector.accept(targetVector, binaryData); - } - } - - /** - * ARROW-7831: ensures that data is transferred from one allocator to another in case of 0-index - * start special cases. With long strings. - */ - @Test - public void testSplitAndTransferWithLongStringsOnAllocatorToAllocator() { - final int initialReservation = 1024; - // Here we have the target vector being transferred with a long string - // hence, the data buffer will be allocated. - // The default data buffer allocation takes - // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * - // BaseVariableWidthViewVector.ELEMENT_SIZE - final byte[][] data = new byte[][] {STR1, STR2, STR3}; - final int startIndex = 0; - final int length = 2; - - BiConsumer validateVector = - (targetVector, expectedData) -> { - IntStream.range(startIndex, length) - .forEach(i -> assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); - }; - - final int maxAllocation = - initialReservation - + BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION - * BaseVariableWidthViewVector.ELEMENT_SIZE; - try (final BufferAllocator targetAllocator = - allocator.newChildAllocator("target-alloc", initialReservation, maxAllocation); - final ViewVarCharVector targetVector = - newViewVarCharVector("split-target", targetAllocator)) { - try (final BufferAllocator sourceAllocator = - allocator.newChildAllocator("source-alloc", initialReservation, maxAllocation); - final ViewVarCharVector sourceVector = - newViewVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - testSplitAndTransferOnAllocatorToAllocator( - targetVector, sourceVector, startIndex, length, data); - } - validateVector.accept(targetVector, data); - } - - final byte[][] binaryData = generateBinaryDataArray(3, 18); - - try (final BufferAllocator targetAllocator = - allocator.newChildAllocator("target-alloc", initialReservation, maxAllocation); - final ViewVarBinaryVector targetVector = - newViewVarBinaryVector("split-target", targetAllocator)) { - try (final BufferAllocator sourceAllocator = - allocator.newChildAllocator("source-alloc", initialReservation, maxAllocation); - final ViewVarBinaryVector sourceVector = - newViewVarBinaryVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - testSplitAndTransferOnAllocatorToAllocator( - targetVector, sourceVector, startIndex, length, binaryData); - } - validateVector.accept(targetVector, binaryData); - } - } - - private void testReallocAfterVectorTransferHelper( - BaseVariableWidthViewVector vector, byte[] str1, byte[] str2) { - /* 4096 values with 16 bytes per record */ - final int bytesPerRecord = 32; - vector.allocateNew(4096 * bytesPerRecord, 4096); - int valueCapacity = vector.getValueCapacity(); - assertTrue(valueCapacity >= 4096); - - /* populate the vector */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - vector.set(i, str1); - } else { - vector.set(i, str2); - } - } - - /* Check the vector output */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(str1, vector.get(i)); - } else { - assertArrayEquals(str2, vector.get(i)); - } - } - - /* trigger first realloc */ - vector.setSafe(valueCapacity, str2, 0, str2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { - vector.reallocViewBuffer(); - vector.reallocViewDataBuffer(); - } - - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, str1); - } else { - vector.set(i, str2); - } - } - - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(str1, vector.get(i)); - } else { - assertArrayEquals(str2, vector.get(i)); - } - } - - /* trigger second realloc */ - vector.setSafe(valueCapacity + bytesPerRecord, str2, 0, str2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { - vector.reallocViewBuffer(); - vector.reallocViewDataBuffer(); - } - - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, str1); - } else { - vector.set(i, str2); - } - } - - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(str1, vector.get(i)); - } else { - assertArrayEquals(str2, vector.get(i)); - } - } - - /* We are potentially working with 4x the size of vector buffer - * that we initially started with. - * Now let's transfer the vector. - */ - - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - BaseVariableWidthViewVector toVector = (BaseVariableWidthViewVector) transferPair.getTo(); - valueCapacity = toVector.getValueCapacity(); - - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(str1, toVector.get(i)); - } else { - assertArrayEquals(str2, toVector.get(i)); - } - } - toVector.close(); - } - - @Test - public void testReallocAfterVectorTransfer() { - try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - testReallocAfterVectorTransferHelper(vector, STR1, STR2); - } - - try (final ViewVarBinaryVector vector = new ViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - testReallocAfterVectorTransferHelper( - vector, generateRandomBinaryData(12), generateRandomBinaryData(13)); - } - } - - private void testSplitAndTransferWithMultipleDataBuffersHelper( - BaseVariableWidthViewVector sourceVector, - BaseVariableWidthViewVector targetVector, - int startIndex, - int length, - byte[][] data) { - sourceVector.allocateNew(48, 4); - - for (int i = 0; i < data.length; i++) { - sourceVector.set(i, data[i]); - } - sourceVector.setValueCount(data.length); - - // we should have multiple data buffers - assertTrue(sourceVector.getDataBuffers().size() > 1); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - // split and transfer with slice starting at the beginning: - // this should not allocate anything new - sourceVector.splitAndTransferTo(startIndex, length, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // the refcnts of each buffer for this test should be the same as what - // the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - for (int i = 0; i < data.length; i++) { - assertArrayEquals(data[i], sourceVector.get(i)); - } - } - - /** - * ARROW-7831: ensures that data is transferred from one allocator to another in case of 0-index - * start special cases. With long strings and multiple data buffers. Check multi-data buffer - * source copying - */ - @Test - public void testSplitAndTransferWithMultipleDataBuffers() { - final String str4 = generateRandomString(35); - final byte[][] data = new byte[][] {STR1, STR2, STR3, str4.getBytes(StandardCharsets.UTF_8)}; - final int startIndex = 1; - final int length = 3; - - BiConsumer validateVector = - (targetVector, expectedData) -> { - IntStream.range(startIndex, length) - .forEach(i -> assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); - }; - - try (final ViewVarCharVector targetVector = new ViewVarCharVector("target", allocator)) { - try (final ViewVarCharVector sourceVector = new ViewVarCharVector("source", allocator)) { - testSplitAndTransferWithMultipleDataBuffersHelper( - sourceVector, targetVector, startIndex, length, data); - } - validateVector.accept(targetVector, data); - } - - try (final ViewVarBinaryVector targetVector = new ViewVarBinaryVector("target", allocator)) { - try (final ViewVarBinaryVector sourceVector = new ViewVarBinaryVector("source", allocator)) { - testSplitAndTransferWithMultipleDataBuffersHelper( - sourceVector, targetVector, startIndex, length, data); - } - validateVector.accept(targetVector, data); - } - } - - @Test - public void testVectorLoadUnloadOnMixedTypes() { - - try (final IntVector vector1 = new IntVector("myvector", allocator); - final ViewVarCharVector vector2 = new ViewVarCharVector("myviewvector", allocator)) { - - final int valueCount = 15; - - setVector(vector1, 1, 2, 3, 4, 5, 6); - vector1.setValueCount(valueCount); - - setVector(vector2, STR1, STR2, STR3, STR4, STR5, STR6); - vector1.setValueCount(valueCount); - - /* Check the vector output */ - assertEquals(1, vector1.get(0)); - assertEquals(2, vector1.get(1)); - assertEquals(3, vector1.get(2)); - assertEquals(4, vector1.get(3)); - assertEquals(5, vector1.get(4)); - assertEquals(6, vector1.get(5)); - - Field field1 = vector1.getField(); - String fieldName1 = field1.getName(); - - Field field2 = vector2.getField(); - String fieldName2 = field2.getName(); - - List fields = new ArrayList<>(2); - List fieldVectors = new ArrayList<>(2); - - fields.add(field1); - fields.add(field2); - fieldVectors.add(vector1); - fieldVectors.add(vector2); - - Schema schema = new Schema(fields); - - VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, valueCount); - VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); - - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); - VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - - // validating recordBatch contains an output for variadicBufferCounts - assertFalse(recordBatch.getVariadicBufferCounts().isEmpty()); - assertEquals(1, recordBatch.getVariadicBufferCounts().size()); - - VectorLoader vectorLoader = new VectorLoader(schemaRoot2); - vectorLoader.load(recordBatch); - - IntVector vector3 = (IntVector) schemaRoot2.getVector(fieldName1); - vector3.setValueCount(25); - - /* Check the vector output */ - assertEquals(1, vector3.get(0)); - assertEquals(2, vector3.get(1)); - assertEquals(3, vector3.get(2)); - assertEquals(4, vector3.get(3)); - assertEquals(5, vector3.get(4)); - assertEquals(6, vector3.get(5)); - - ViewVarCharVector vector4 = (ViewVarCharVector) schemaRoot2.getVector(fieldName2); - vector4.setValueCount(25); - - /* Check the vector output */ - assertArrayEquals(STR1, vector4.get(0)); - assertArrayEquals(STR2, vector4.get(1)); - assertArrayEquals(STR3, vector4.get(2)); - assertArrayEquals(STR4, vector4.get(3)); - assertArrayEquals(STR5, vector4.get(4)); - assertArrayEquals(STR6, vector4.get(5)); - } - } - } - - private String generateRandomString(int length) { - Random random = new Random(); - StringBuilder sb = new StringBuilder(length); - for (int i = 0; i < length; i++) { - sb.append(random.nextInt(10)); // 0-9 - } - return sb.toString(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java deleted file mode 100644 index 75ac1be463172..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.Arrays; -import java.util.Collections; -import org.apache.arrow.memory.AllocationListener; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.rounding.DefaultRoundingPolicy; -import org.apache.arrow.memory.rounding.RoundingPolicy; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; -import org.apache.arrow.vector.types.pojo.ArrowType.Duration; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestVectorAlloc { - private BufferAllocator rootAllocator; - - private BufferAllocator policyAllocator; - - @BeforeEach - public void init() { - rootAllocator = new RootAllocator(Long.MAX_VALUE); - policyAllocator = - new RootAllocator(AllocationListener.NOOP, Integer.MAX_VALUE, new CustomPolicy()); - } - - @AfterEach - public void terminate() throws Exception { - rootAllocator.close(); - policyAllocator.close(); - } - - private static Field field(String name, ArrowType type) { - return new Field(name, new FieldType(true, type, null), Collections.emptyList()); - } - - @Test - public void testVectorAllocWithField() { - Schema schema = - new Schema( - Arrays.asList( - field("TINYINT", MinorType.TINYINT.getType()), - field("SMALLINT", MinorType.SMALLINT.getType()), - field("INT", MinorType.INT.getType()), - field("BIGINT", MinorType.BIGINT.getType()), - field("UINT1", MinorType.UINT1.getType()), - field("UINT2", MinorType.UINT2.getType()), - field("UINT4", MinorType.UINT4.getType()), - field("UINT8", MinorType.UINT8.getType()), - field("FLOAT4", MinorType.FLOAT4.getType()), - field("FLOAT8", MinorType.FLOAT8.getType()), - field("UTF8", MinorType.VARCHAR.getType()), - field("VARBINARY", MinorType.VARBINARY.getType()), - field("BIT", MinorType.BIT.getType()), - field("DECIMAL", new Decimal(38, 5, 128)), - field("FIXEDSIZEBINARY", new FixedSizeBinary(50)), - field("DATEDAY", MinorType.DATEDAY.getType()), - field("DATEMILLI", MinorType.DATEMILLI.getType()), - field("TIMESEC", MinorType.TIMESEC.getType()), - field("TIMEMILLI", MinorType.TIMEMILLI.getType()), - field("TIMEMICRO", MinorType.TIMEMICRO.getType()), - field("TIMENANO", MinorType.TIMENANO.getType()), - field("TIMESTAMPSEC", MinorType.TIMESTAMPSEC.getType()), - field("TIMESTAMPMILLI", MinorType.TIMESTAMPMILLI.getType()), - field("TIMESTAMPMICRO", MinorType.TIMESTAMPMICRO.getType()), - field("TIMESTAMPNANO", MinorType.TIMESTAMPNANO.getType()), - field("TIMESTAMPSECTZ", new Timestamp(TimeUnit.SECOND, "PST")), - field("TIMESTAMPMILLITZ", new Timestamp(TimeUnit.MILLISECOND, "PST")), - field("TIMESTAMPMICROTZ", new Timestamp(TimeUnit.MICROSECOND, "PST")), - field("TIMESTAMPNANOTZ", new Timestamp(TimeUnit.NANOSECOND, "PST")), - field("INTERVALDAY", MinorType.INTERVALDAY.getType()), - field("INTERVALYEAR", MinorType.INTERVALYEAR.getType()), - field("DURATION", new Duration(TimeUnit.MILLISECOND)))); - - try (BufferAllocator allocator = rootAllocator.newChildAllocator("child", 0, Long.MAX_VALUE)) { - for (Field field : schema.getFields()) { - try (FieldVector vector = field.createVector(allocator)) { - assertEquals( - vector.getMinorType(), - Types.getMinorTypeForArrowType(field.getFieldType().getType())); - vector.allocateNew(); - } - } - } - } - - private static final int CUSTOM_SEGMENT_SIZE = 200; - - /** A custom rounding policy that rounds the size to the next multiple of 200. */ - private static class CustomPolicy implements RoundingPolicy { - - @Override - public long getRoundedSize(long requestSize) { - return (requestSize + CUSTOM_SEGMENT_SIZE - 1) / CUSTOM_SEGMENT_SIZE * CUSTOM_SEGMENT_SIZE; - } - } - - @Test - public void testFixedWidthVectorAllocation() { - try (IntVector vec1 = new IntVector("vec", policyAllocator); - IntVector vec2 = new IntVector("vec", rootAllocator)) { - assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy); - vec1.allocateNew(50); - long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getDataBuffer().capacity(); - - // the total capacity must be a multiple of the segment size - assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0); - - assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy); - vec2.allocateNew(50); - totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getDataBuffer().capacity(); - - // the total capacity must be a power of two - assertEquals(0, totalCapacity & (totalCapacity - 1)); - } - } - - @Test - public void testVariableWidthVectorAllocation() { - try (VarCharVector vec1 = new VarCharVector("vec", policyAllocator); - VarCharVector vec2 = new VarCharVector("vec", rootAllocator)) { - assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy); - vec1.allocateNew(50); - long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getOffsetBuffer().capacity(); - - // the total capacity must be a multiple of the segment size - assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0); - - assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy); - vec2.allocateNew(50); - totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getOffsetBuffer().capacity(); - - // the total capacity must be a power of two - assertEquals(0, totalCapacity & (totalCapacity - 1)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java deleted file mode 100644 index f5ec42c71c209..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DataSizeRoundingUtil; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestVectorReAlloc { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testFixedType() { - try (final UInt4Vector vector = new UInt4Vector("", allocator)) { - vector.setInitialCapacity(512); - vector.allocateNew(); - - assertTrue(vector.getValueCapacity() >= 512); - int initialCapacity = vector.getValueCapacity(); - - try { - vector.set(initialCapacity, 0); - fail("Expected out of bounds exception"); - } catch (Exception e) { - // ok - } - - vector.reAlloc(); - assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - - vector.set(initialCapacity, 100); - assertEquals(100, vector.get(initialCapacity)); - } - } - - @Test - public void testNullableType() { - try (final VarCharVector vector = new VarCharVector("", allocator)) { - vector.setInitialCapacity(512); - vector.allocateNew(); - - assertTrue(vector.getValueCapacity() >= 512); - int initialCapacity = vector.getValueCapacity(); - - try { - vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - fail("Expected out of bounds exception"); - } catch (Exception e) { - // ok - } - - vector.reAlloc(); - assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); - - vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - assertEquals(new String(vector.get(initialCapacity), StandardCharsets.UTF_8), "foo"); - } - } - - @Test - public void testListType() { - try (final ListVector vector = ListVector.empty("", allocator)) { - vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - - vector.setInitialCapacity(512); - vector.allocateNew(); - - assertEquals(512, vector.getValueCapacity()); - - try { - vector.getInnerValueCountAt(2014); - fail("Expected out of bounds exception"); - } catch (Exception e) { - // ok - } - - vector.reAlloc(); - assertEquals(1024, vector.getValueCapacity()); - assertEquals(0, vector.getOffsetBuffer().getInt(2014 * ListVector.OFFSET_WIDTH)); - } - } - - @Test - public void testStructType() { - try (final StructVector vector = StructVector.empty("", allocator)) { - vector.addOrGet("", FieldType.nullable(MinorType.INT.getType()), IntVector.class); - - vector.setInitialCapacity(512); - vector.allocateNew(); - - assertEquals(512, vector.getValueCapacity()); - - try { - vector.getObject(513); - fail("Expected out of bounds exception"); - } catch (Exception e) { - // ok - } - - vector.reAlloc(); - assertEquals(1024, vector.getValueCapacity()); - assertNull(vector.getObject(513)); - } - } - - @Test - public void testVariableWidthTypeSetNullValues() { - // Test ARROW-11223 bug is fixed - try (final BaseVariableWidthVector v1 = new VarCharVector("var1", allocator)) { - v1.setInitialCapacity(512); - v1.allocateNew(); - int numNullValues1 = v1.getValueCapacity() + 1; - for (int i = 0; i < numNullValues1; i++) { - v1.setNull(i); - } - assertTrue(v1.getBufferSizeFor(numNullValues1) > 0); - } - - try (final BaseLargeVariableWidthVector v2 = new LargeVarCharVector("var2", allocator)) { - v2.setInitialCapacity(512); - v2.allocateNew(); - int numNullValues2 = v2.getValueCapacity() + 1; - for (int i = 0; i < numNullValues2; i++) { - v2.setNull(i); - } - assertTrue(v2.getBufferSizeFor(numNullValues2) > 0); - } - } - - @Test - public void testFixedAllocateAfterReAlloc() throws Exception { - try (final IntVector vector = new IntVector("", allocator)) { - /* - * Allocate the default size, and then, reAlloc. This should double the allocation. - */ - vector.allocateNewSafe(); // Initial allocation - vector.reAlloc(); // Double the allocation size. - int savedValueCapacity = vector.getValueCapacity(); - - /* - * Clear and allocate again. - */ - vector.clear(); - vector.allocateNewSafe(); - - /* - * Verify that the buffer sizes haven't changed. - */ - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } - - @Test - public void testVariableAllocateAfterReAlloc() throws Exception { - try (final VarCharVector vector = new VarCharVector("", allocator)) { - /* - * Allocate the default size, and then, reAlloc. This should double the allocation. - */ - vector.allocateNewSafe(); // Initial allocation - vector.reAlloc(); // Double the allocation size. - int savedValueCapacity = vector.getValueCapacity(); - long savedValueBufferSize = vector.valueBuffer.capacity(); - - /* - * Clear and allocate again. - */ - vector.clear(); - vector.allocateNewSafe(); - - /* - * Verify that the buffer sizes haven't changed. - */ - assertEquals(vector.getValueCapacity(), savedValueCapacity); - assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); - } - } - - @Test - public void testLargeVariableAllocateAfterReAlloc() throws Exception { - try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { - /* - * Allocate the default size, and then, reAlloc. This should double the allocation. - */ - vector.allocateNewSafe(); // Initial allocation - vector.reAlloc(); // Double the allocation size. - int savedValueCapacity = vector.getValueCapacity(); - long savedValueBufferSize = vector.valueBuffer.capacity(); - - /* - * Clear and allocate again. - */ - vector.clear(); - vector.allocateNewSafe(); - - /* - * Verify that the buffer sizes haven't changed. - */ - assertEquals(vector.getValueCapacity(), savedValueCapacity); - assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); - } - } - - @Test - public void testVarCharAllocateNew() throws Exception { - final int count = 6000; - - try (final VarCharVector vector = new VarCharVector("", allocator)) { - vector.allocateNew(count); - - // verify that the validity buffer and value buffer have capacity for at least 'count' - // elements. - assertTrue( - vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - assertTrue( - vector.getOffsetBuffer().capacity() - >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); - } - } - - @Test - public void testLargeVarCharAllocateNew() throws Exception { - final int count = 6000; - - try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { - vector.allocateNew(count); - - // verify that the validity buffer and value buffer have capacity for at least 'count' - // elements. - assertTrue( - vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - assertTrue( - vector.getOffsetBuffer().capacity() - >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); - } - } - - @Test - public void testVarCharAllocateNewUsingHelper() throws Exception { - final int count = 6000; - - try (final VarCharVector vector = new VarCharVector("", allocator)) { - AllocationHelper.allocateNew(vector, count); - - // verify that the validity buffer and value buffer have capacity for at least 'count' - // elements. - assertTrue( - vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - assertTrue( - vector.getOffsetBuffer().capacity() - >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); - } - } - - @Test - public void testLargeVarCharAllocateNewUsingHelper() throws Exception { - final int count = 6000; - - try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { - AllocationHelper.allocateNew(vector, count); - - // verify that the validity buffer and value buffer have capacity for at least 'count' - // elements. - assertTrue( - vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - assertTrue( - vector.getOffsetBuffer().capacity() - >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); - } - } - - @Test - public void testFixedRepeatedClearAndSet() throws Exception { - try (final IntVector vector = new IntVector("", allocator)) { - vector.allocateNewSafe(); // Initial allocation - vector.clear(); // clear vector. - vector.setSafe(0, 10); - int savedValueCapacity = vector.getValueCapacity(); - - for (int i = 0; i < 1024; ++i) { - vector.clear(); // clear vector. - vector.setSafe(0, 10); - } - - // should be deterministic, and not cause a run-away increase in capacity. - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } - - @Test - public void testVariableRepeatedClearAndSet() throws Exception { - try (final VarCharVector vector = new VarCharVector("", allocator)) { - vector.allocateNewSafe(); // Initial allocation - - vector.clear(); // clear vector. - vector.setSafe(0, "hello world".getBytes(StandardCharsets.UTF_8)); - int savedValueCapacity = vector.getValueCapacity(); - - for (int i = 0; i < 1024; ++i) { - vector.clear(); // clear vector. - vector.setSafe(0, "hello world".getBytes(StandardCharsets.UTF_8)); - } - - // should be deterministic, and not cause a run-away increase in capacity. - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } - - @Test - public void testRepeatedValueVectorClearAndSet() throws Exception { - try (final ListVector vector = - new ListVector("", allocator, FieldType.nullable(MinorType.INT.getType()), null)) { - vector.allocateNewSafe(); // Initial allocation - UnionListWriter writer = vector.getWriter(); - - vector.clear(); // clear vector. - writer.setPosition(0); // optional - writer.startList(); - writer.writeInt(0); - writer.endList(); - int savedValueCapacity = vector.getValueCapacity(); - - for (int i = 0; i < 1024; ++i) { - vector.clear(); // clear vector. - writer.setPosition(0); // optional - writer.startList(); - writer.writeInt(i); - writer.endList(); - } - - // should be deterministic, and not cause a run-away increase in capacity. - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } - - @Test - public void testStructVectorClearAndSet() throws Exception { - try (final StructVector vector = StructVector.empty("v", allocator)) { - vector.allocateNewSafe(); // Initial allocation - - NullableStructWriter writer = vector.getWriter(); - - vector.clear(); // clear vector. - writer.setPosition(0); // optional - writer.start(); - writer.integer("int").writeInt(0); - writer.end(); - int savedValueCapacity = vector.getValueCapacity(); - - for (int i = 0; i < 1024; ++i) { - vector.clear(); // clear vector. - writer.setPosition(0); // optional - writer.start(); - writer.integer("int").writeInt(i); - writer.end(); - } - - // should be deterministic, and not cause a run-away increase in capacity. - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } - - @Test - public void testFixedSizeListVectorClearAndSet() { - try (final FixedSizeListVector vector = - new FixedSizeListVector( - "", allocator, FieldType.nullable(new ArrowType.FixedSizeList(2)), null)) { - vector.allocateNewSafe(); // Initial allocation - UnionFixedSizeListWriter writer = vector.getWriter(); - - vector.clear(); // clear vector. - writer.setPosition(0); // optional - writer.startList(); - writer.writeInt(0); - writer.writeInt(1); - writer.endList(); - int savedValueCapacity = vector.getValueCapacity(); - - for (int i = 0; i < 1024; ++i) { - vector.clear(); // clear vector. - writer.setPosition(0); // optional - writer.startList(); - writer.writeInt(i); - writer.writeInt(i + 1); - writer.endList(); - } - - // should be deterministic, and not cause a run-away increase in capacity. - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } - - @Test - public void testUnionVectorClearAndSet() { - try (final UnionVector vector = - new UnionVector("", allocator, /* field type */ null, /* call-back */ null)) { - vector.allocateNewSafe(); // Initial allocation - - NullableIntHolder holder = new NullableIntHolder(); - holder.isSet = 1; - holder.value = 1; - - vector.clear(); // clear vector. - vector.setType(0, MinorType.INT); - vector.setSafe(0, holder); - int savedValueCapacity = vector.getValueCapacity(); - - for (int i = 0; i < 1024; ++i) { - vector.clear(); // clear vector. - vector.setType(0, MinorType.INT); - vector.setSafe(0, holder); - } - - // should be deterministic, and not cause a run-away increase in capacity. - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } - - @Test - public void testDenseUnionVectorClearAndSet() { - try (final DenseUnionVector vector = new DenseUnionVector("", allocator, null, null)) { - vector.allocateNewSafe(); // Initial allocation - - NullableIntHolder holder = new NullableIntHolder(); - holder.isSet = 1; - holder.value = 1; - - byte intTypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType())); - - vector.clear(); - vector.setTypeId(0, intTypeId); - vector.setSafe(0, holder); - - int savedValueCapacity = vector.getValueCapacity(); - - for (int i = 0; i < 1024; ++i) { - vector.clear(); - vector.setTypeId(0, intTypeId); - vector.setSafe(0, holder); - } - - // should be deterministic, and not cause a run-away increase in capacity. - assertEquals(vector.getValueCapacity(), savedValueCapacity); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java deleted file mode 100644 index 28d73a8fdfff9..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestVectorReset { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - private void resetVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) { - long[] sizeBefore = new long[bufs.length]; - for (int i = 0; i < bufs.length; i++) { - sizeBefore[i] = bufs[i].capacity(); - } - vector.reset(); - for (int i = 0; i < bufs.length; i++) { - assertEquals(sizeBefore[i], bufs[i].capacity()); - verifyBufferZeroed(bufs[i]); - } - assertEquals(0, vector.getValueCount()); - } - - private void verifyBufferZeroed(ArrowBuf buf) { - for (int i = 0; i < buf.capacity(); i++) { - assertTrue((byte) 0 == buf.getByte(i)); - } - } - - @Test - public void testFixedTypeReset() { - try (final UInt4Vector vector = new UInt4Vector("UInt4", allocator)) { - vector.allocateNewSafe(); - vector.setNull(0); - vector.setValueCount(1); - resetVectorAndVerify(vector, vector.getBuffers(false)); - } - } - - @Test - public void testVariableTypeReset() { - try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) { - vector.allocateNewSafe(); - vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); - vector.setLastSet(0); - vector.setValueCount(1); - resetVectorAndVerify(vector, vector.getBuffers(false)); - assertEquals(-1, vector.getLastSet()); - } - } - - @Test - public void testVariableViewTypeReset() { - try (final ViewVarCharVector vector = new ViewVarCharVector("ViewVarChar", allocator)) { - vector.allocateNewSafe(); - vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); - vector.setLastSet(0); - vector.setValueCount(1); - resetVectorAndVerify(vector, vector.getBuffers(false)); - assertEquals(-1, vector.getLastSet()); - } - } - - @Test - public void testLargeVariableTypeReset() { - try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) { - vector.allocateNewSafe(); - vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); - vector.setLastSet(0); - vector.setValueCount(1); - resetVectorAndVerify(vector, vector.getBuffers(false)); - assertEquals(-1, vector.getLastSet()); - } - } - - @Test - public void testListTypeReset() { - try (final ListVector variableList = - new ListVector( - "VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null); - final FixedSizeListVector fixedList = - new FixedSizeListVector( - "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null); - final ListViewVector variableViewList = - new ListViewVector( - "VarListView", allocator, FieldType.nullable(MinorType.INT.getType()), null)) { - // ListVector - variableList.allocateNewSafe(); - variableList.startNewValue(0); - variableList.endValue(0, 0); - variableList.setValueCount(1); - resetVectorAndVerify(variableList, variableList.getBuffers(false)); - assertEquals(-1, variableList.getLastSet()); - - // FixedSizeListVector - fixedList.allocateNewSafe(); - fixedList.setNull(0); - fixedList.setValueCount(1); - resetVectorAndVerify(fixedList, fixedList.getBuffers(false)); - - // ListViewVector - variableViewList.allocateNewSafe(); - variableViewList.startNewValue(0); - variableViewList.endValue(0, 0); - variableViewList.setValueCount(1); - resetVectorAndVerify(variableViewList, variableViewList.getBuffers(false)); - } - } - - @Test - public void testStructTypeReset() { - try (final NonNullableStructVector nonNullableStructVector = - new NonNullableStructVector( - "Struct", allocator, FieldType.nullable(MinorType.INT.getType()), null); - final StructVector structVector = - new StructVector( - "NullableStruct", allocator, FieldType.nullable(MinorType.INT.getType()), null)) { - // NonNullableStructVector - nonNullableStructVector.allocateNewSafe(); - IntVector structChild = - nonNullableStructVector.addOrGet( - "child", FieldType.nullable(new Int(32, true)), IntVector.class); - structChild.setNull(0); - nonNullableStructVector.setValueCount(1); - resetVectorAndVerify(nonNullableStructVector, nonNullableStructVector.getBuffers(false)); - - // StructVector - structVector.allocateNewSafe(); - structVector.setNull(0); - structVector.setValueCount(1); - resetVectorAndVerify(structVector, structVector.getBuffers(false)); - } - } - - @Test - public void testUnionTypeReset() { - try (final UnionVector vector = - new UnionVector("Union", allocator, /* field type */ null, /* call-back */ null); - final IntVector dataVector = new IntVector("Int", allocator)) { - vector.getBufferSize(); - vector.allocateNewSafe(); - dataVector.allocateNewSafe(); - vector.addVector(dataVector); - dataVector.setNull(0); - vector.setValueCount(1); - resetVectorAndVerify(vector, vector.getBuffers(false)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java deleted file mode 100644 index 50f61d311e60e..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestVectorSchemaRoot { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() { - allocator.close(); - } - - @Test - public void testResetRowCount() { - final int size = 20; - try (final BitVector vec1 = new BitVector("bit", allocator); - final IntVector vec2 = new IntVector("int", allocator)) { - VectorSchemaRoot vsr = VectorSchemaRoot.of(vec1, vec2); - - vsr.allocateNew(); - assertEquals(0, vsr.getRowCount()); - - for (int i = 0; i < size; i++) { - vec1.setSafe(i, i % 2); - vec2.setSafe(i, i); - } - vsr.setRowCount(size); - checkCount(vec1, vec2, vsr, size); - - vsr.allocateNew(); - checkCount(vec1, vec2, vsr, 0); - - for (int i = 0; i < size; i++) { - vec1.setSafe(i, i % 2); - vec2.setSafe(i, i); - } - vsr.setRowCount(size); - checkCount(vec1, vec2, vsr, size); - - vsr.clear(); - checkCount(vec1, vec2, vsr, 0); - } - } - - private void checkCount(BitVector vec1, IntVector vec2, VectorSchemaRoot vsr, int count) { - assertEquals(vec1.getValueCount(), count); - assertEquals(vec2.getValueCount(), count); - assertEquals(vsr.getRowCount(), count); - } - - private VectorSchemaRoot createBatch() { - FieldType varCharType = new FieldType(true, new ArrowType.Utf8(), /*dictionary=*/ null); - FieldType listType = new FieldType(true, new ArrowType.List(), /*dictionary=*/ null); - - // create the schema - List schemaFields = new ArrayList<>(); - Field childField = new Field("varCharCol", varCharType, null); - List childFields = new ArrayList<>(); - childFields.add(childField); - schemaFields.add(new Field("listCol", listType, childFields)); - Schema schema = new Schema(schemaFields); - - VectorSchemaRoot schemaRoot = VectorSchemaRoot.create(schema, allocator); - // get and allocate the vector - ListVector vector = (ListVector) schemaRoot.getVector("listCol"); - vector.allocateNew(); - - // write data to the vector - UnionListWriter writer = vector.getWriter(); - - writer.setPosition(0); - - // write data vector(0) - writer.startList(); - - // write data vector(0)(0) - writer.list().startList(); - - // According to the schema above, the list element should have varchar type. - // When we write a big int, the original writer cannot handle this, so the writer will - // be promoted, and the vector structure will be different from the schema. - writer.list().bigInt().writeBigInt(0); - writer.list().bigInt().writeBigInt(1); - writer.list().endList(); - - // write data vector(0)(1) - writer.list().startList(); - writer.list().float8().writeFloat8(3.0D); - writer.list().float8().writeFloat8(7.0D); - writer.list().endList(); - - // finish data vector(0) - writer.endList(); - - writer.setPosition(1); - - // write data vector(1) - writer.startList(); - - // write data vector(1)(0) - writer.list().startList(); - writer.list().integer().writeInt(3); - writer.list().integer().writeInt(2); - writer.list().endList(); - - // finish data vector(1) - writer.endList(); - - vector.setValueCount(2); - - return schemaRoot; - } - - @Test - public void testAddVector() { - try (final IntVector intVector1 = new IntVector("intVector1", allocator); - final IntVector intVector2 = new IntVector("intVector2", allocator); - final IntVector intVector3 = new IntVector("intVector3", allocator); ) { - - VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector1, intVector2)); - assertEquals(2, original.getFieldVectors().size()); - - VectorSchemaRoot newRecordBatch = original.addVector(1, intVector3); - assertEquals(3, newRecordBatch.getFieldVectors().size()); - assertEquals(intVector3, newRecordBatch.getFieldVectors().get(1)); - - original.close(); - newRecordBatch.close(); - } - } - - @Test - public void testRemoveVector() { - try (final IntVector intVector1 = new IntVector("intVector1", allocator); - final IntVector intVector2 = new IntVector("intVector2", allocator); - final IntVector intVector3 = new IntVector("intVector3", allocator); ) { - - VectorSchemaRoot original = - new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3)); - assertEquals(3, original.getFieldVectors().size()); - - VectorSchemaRoot newRecordBatch = original.removeVector(0); - assertEquals(2, newRecordBatch.getFieldVectors().size()); - assertEquals(intVector2, newRecordBatch.getFieldVectors().get(0)); - assertEquals(intVector3, newRecordBatch.getFieldVectors().get(1)); - - original.close(); - newRecordBatch.close(); - } - } - - @Test - public void testSlice() { - try (final IntVector intVector = new IntVector("intVector", allocator); - final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) { - final int numRows = 10; - intVector.setValueCount(numRows); - float4Vector.setValueCount(numRows); - for (int i = 0; i < numRows; i++) { - intVector.setSafe(i, i); - float4Vector.setSafe(i, i + 0.1f); - } - - final VectorSchemaRoot original = - new VectorSchemaRoot(Arrays.asList(intVector, float4Vector)); - - for (int sliceIndex = 0; sliceIndex < numRows; sliceIndex++) { - for (int sliceLength = 0; sliceIndex + sliceLength <= numRows; sliceLength++) { - try (VectorSchemaRoot slice = original.slice(sliceIndex, sliceLength)) { - assertEquals(sliceLength, slice.getRowCount()); - // validate data - final IntVector childIntVector = (IntVector) slice.getFieldVectors().get(0); - final Float4Vector childFloatVector = (Float4Vector) slice.getFieldVectors().get(1); - for (int i = 0; i < sliceLength; i++) { - final int originalIndex = i + sliceIndex; - assertEquals(originalIndex, childIntVector.get(i)); - assertEquals(originalIndex + 0.1f, childFloatVector.get(i), 0); - } - } - } - } - - original.close(); - } - } - - @Test - public void testSliceWithInvalidParam() { - assertThrows( - IllegalArgumentException.class, - () -> { - try (final IntVector intVector = new IntVector("intVector", allocator); - final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) { - intVector.setValueCount(10); - float4Vector.setValueCount(10); - for (int i = 0; i < 10; i++) { - intVector.setSafe(i, i); - float4Vector.setSafe(i, i + 0.1f); - } - final VectorSchemaRoot original = - new VectorSchemaRoot(Arrays.asList(intVector, float4Vector)); - - original.slice(0, 20); - } - }); - } - - @Test - public void testEquals() { - try (final IntVector intVector1 = new IntVector("intVector1", allocator); - final IntVector intVector2 = new IntVector("intVector2", allocator); - final IntVector intVector3 = new IntVector("intVector3", allocator); ) { - - intVector1.setValueCount(5); - for (int i = 0; i < 5; i++) { - intVector1.set(i, i); - } - - VectorSchemaRoot root1 = - new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3)); - - VectorSchemaRoot root2 = new VectorSchemaRoot(Arrays.asList(intVector1, intVector2)); - - VectorSchemaRoot root3 = - new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3)); - - assertFalse(root1.equals(root2)); - assertTrue(root1.equals(root3)); - - root1.close(); - root2.close(); - root3.close(); - } - } - - @Test - public void testApproxEquals() { - try (final Float4Vector float4Vector1 = new Float4Vector("floatVector", allocator); - final Float4Vector float4Vector2 = new Float4Vector("floatVector", allocator); - final Float4Vector float4Vector3 = new Float4Vector("floatVector", allocator); ) { - - float4Vector1.setValueCount(5); - float4Vector2.setValueCount(5); - float4Vector3.setValueCount(5); - final float epsilon = 1.0E-6f; - for (int i = 0; i < 5; i++) { - float4Vector1.set(i, i); - float4Vector2.set(i, i + epsilon * 2); - float4Vector3.set(i, i + epsilon / 2); - } - - VectorSchemaRoot root1 = new VectorSchemaRoot(Arrays.asList(float4Vector1)); - - VectorSchemaRoot root2 = new VectorSchemaRoot(Arrays.asList(float4Vector2)); - - VectorSchemaRoot root3 = new VectorSchemaRoot(Arrays.asList(float4Vector3)); - - assertFalse(root1.approxEquals(root2)); - assertTrue(root1.approxEquals(root3)); - - root1.close(); - root2.close(); - root3.close(); - } - } - - @Test - public void testSchemaSync() { - // create vector schema root - try (VectorSchemaRoot schemaRoot = createBatch()) { - Schema newSchema = - new Schema( - schemaRoot.getFieldVectors().stream() - .map(vec -> vec.getField()) - .collect(Collectors.toList())); - - assertNotEquals(newSchema, schemaRoot.getSchema()); - assertTrue(schemaRoot.syncSchema()); - assertEquals(newSchema, schemaRoot.getSchema()); - - // no schema update this time. - assertFalse(schemaRoot.syncSchema()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java deleted file mode 100644 index 6121fb67fefb7..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector; - -import static java.util.Arrays.asList; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.complex.writer.BigIntWriter; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestVectorUnloadLoad { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testNullCodec() { - final Schema schema = new Schema(Collections.emptyList()); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - root.setRowCount(1); - final VectorUnloader unloader = - new VectorUnloader(root, /*includeNulls*/ true, /*codec*/ null, /*alignBuffers*/ true); - unloader.getRecordBatch().close(); - } - } - - @Test - public void testUnloadLoad() throws IOException { - int count = 10000; - Schema schema; - - try (BufferAllocator originalVectorsAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - NonNullableStructVector parent = - NonNullableStructVector.empty("parent", originalVectorsAllocator)) { - - // write some data - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - IntWriter intWriter = rootWriter.integer("int"); - BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); - for (int i = 0; i < count; i++) { - intWriter.setPosition(i); - intWriter.writeInt(i); - bigIntWriter.setPosition(i); - bigIntWriter.writeBigInt(i); - } - writer.setValueCount(count); - - // unload it - FieldVector root = parent.getChild("root"); - schema = new Schema(root.getField().getChildren()); - VectorUnloader vectorUnloader = newVectorUnloader(root); - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); - VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - - // load it - VectorLoader vectorLoader = new VectorLoader(newRoot); - - vectorLoader.load(recordBatch); - - FieldReader intReader = newRoot.getVector("int").getReader(); - FieldReader bigIntReader = newRoot.getVector("bigInt").getReader(); - for (int i = 0; i < count; i++) { - intReader.setPosition(i); - assertEquals(i, intReader.readInteger().intValue()); - bigIntReader.setPosition(i); - assertEquals(i, bigIntReader.readLong().longValue()); - } - } - } - } - - @Test - public void testUnloadLoadAddPadding() throws IOException { - int count = 10000; - Schema schema; - try (BufferAllocator originalVectorsAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - NonNullableStructVector parent = - NonNullableStructVector.empty("parent", originalVectorsAllocator)) { - - // write some data - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - ListWriter list = rootWriter.list("list"); - IntWriter intWriter = list.integer(); - for (int i = 0; i < count; i++) { - list.setPosition(i); - list.startList(); - for (int j = 0; j < i % 4 + 1; j++) { - intWriter.writeInt(i); - } - list.endList(); - } - writer.setValueCount(count); - - // unload it - FieldVector root = parent.getChild("root"); - schema = new Schema(root.getField().getChildren()); - VectorUnloader vectorUnloader = newVectorUnloader(root); - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); - VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - List oldBuffers = recordBatch.getBuffers(); - List newBuffers = new ArrayList<>(); - for (ArrowBuf oldBuffer : oldBuffers) { - long l = oldBuffer.readableBytes(); - if (l % 64 != 0) { - // pad - l = l + 64 - l % 64; - } - ArrowBuf newBuffer = allocator.buffer(l); - for (long i = oldBuffer.readerIndex(); i < oldBuffer.writerIndex(); i++) { - newBuffer.setByte(i - oldBuffer.readerIndex(), oldBuffer.getByte(i)); - } - newBuffer.readerIndex(0); - newBuffer.writerIndex(l); - newBuffers.add(newBuffer); - } - - try (ArrowRecordBatch newBatch = - new ArrowRecordBatch(recordBatch.getLength(), recordBatch.getNodes(), newBuffers); ) { - // load it - VectorLoader vectorLoader = new VectorLoader(newRoot); - - vectorLoader.load(newBatch); - - FieldReader reader = newRoot.getVector("list").getReader(); - for (int i = 0; i < count; i++) { - reader.setPosition(i); - List expected = new ArrayList<>(); - for (int j = 0; j < i % 4 + 1; j++) { - expected.add(i); - } - assertEquals(expected, reader.readObject()); - } - } - - for (ArrowBuf newBuf : newBuffers) { - newBuf.getReferenceManager().release(); - } - } - } - } - - /** - * The validity buffer can be empty if: - all values are defined. - all values are null. - * - * @throws IOException on error - */ - @Test - public void testLoadValidityBuffer() throws IOException { - Schema schema = - new Schema( - asList( - new Field( - "intDefined", - FieldType.nullable(new ArrowType.Int(32, true)), - Collections.emptyList()), - new Field( - "intNull", - FieldType.nullable(new ArrowType.Int(32, true)), - Collections.emptyList()))); - int count = 10; - ArrowBuf[] values = new ArrowBuf[4]; - for (int i = 0; i < 4; i += 2) { - ArrowBuf buf1 = allocator.buffer(BitVectorHelper.getValidityBufferSize(count)); - ArrowBuf buf2 = allocator.buffer(count * 4); // integers - buf1.setZero(0, buf1.capacity()); - buf2.setZero(0, buf2.capacity()); - values[i] = buf1; - values[i + 1] = buf2; - for (int j = 0; j < count; j++) { - if (i == 2) { - BitVectorHelper.unsetBit(buf1, j); - } else { - BitVectorHelper.setBit(buf1, j); - } - - buf2.setInt(j * 4, j); - } - buf1.writerIndex((int) Math.ceil(count / 8)); - buf2.writerIndex(count * 4); - } - - /* - * values[0] - validity buffer for first vector - * values[1] - data buffer for first vector - * values[2] - validity buffer for second vector - * values[3] - data buffer for second vector - */ - - try (ArrowRecordBatch recordBatch = - new ArrowRecordBatch( - count, - asList(new ArrowFieldNode(count, 0), new ArrowFieldNode(count, count)), - asList(values[0], values[1], values[2], values[3])); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); - VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - - // load it - VectorLoader vectorLoader = new VectorLoader(newRoot); - - vectorLoader.load(recordBatch); - - IntVector intDefinedVector = (IntVector) newRoot.getVector("intDefined"); - IntVector intNullVector = (IntVector) newRoot.getVector("intNull"); - for (int i = 0; i < count; i++) { - assertFalse(intDefinedVector.isNull(i), "#" + i); - assertEquals(i, intDefinedVector.get(i), "#" + i); - assertTrue(intNullVector.isNull(i), "#" + i); - } - intDefinedVector.setSafe(count + 10, 1234); - assertTrue(intDefinedVector.isNull(count + 1)); - // empty slots should still default to unset - intDefinedVector.setSafe(count + 1, 789); - assertFalse(intDefinedVector.isNull(count + 1)); - assertEquals(789, intDefinedVector.get(count + 1)); - assertTrue(intDefinedVector.isNull(count)); - assertTrue(intDefinedVector.isNull(count + 2)); - assertTrue(intDefinedVector.isNull(count + 3)); - assertTrue(intDefinedVector.isNull(count + 4)); - assertTrue(intDefinedVector.isNull(count + 5)); - assertTrue(intDefinedVector.isNull(count + 6)); - assertTrue(intDefinedVector.isNull(count + 7)); - assertTrue(intDefinedVector.isNull(count + 8)); - assertTrue(intDefinedVector.isNull(count + 9)); - assertFalse(intDefinedVector.isNull(count + 10)); - assertEquals(1234, intDefinedVector.get(count + 10)); - } finally { - for (ArrowBuf arrowBuf : values) { - arrowBuf.getReferenceManager().release(); - } - } - } - - @Test - public void testUnloadLoadDuplicates() throws IOException { - int count = 10; - Schema schema = - new Schema( - asList( - new Field( - "duplicate", - FieldType.nullable(new ArrowType.Int(32, true)), - Collections.emptyList()), - new Field( - "duplicate", - FieldType.nullable(new ArrowType.Int(32, true)), - Collections.emptyList()))); - - try (BufferAllocator originalVectorsAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); ) { - List sources = new ArrayList<>(); - for (Field field : schema.getFields()) { - FieldVector vector = field.createVector(originalVectorsAllocator); - vector.allocateNew(); - sources.add(vector); - IntVector intVector = (IntVector) vector; - for (int i = 0; i < count; i++) { - intVector.set(i, i); - } - intVector.setValueCount(count); - } - - try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), sources, count)) { - VectorUnloader vectorUnloader = new VectorUnloader(root); - try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = - allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); - VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator); ) { - // load it - VectorLoader vectorLoader = new VectorLoader(newRoot); - vectorLoader.load(recordBatch); - - List targets = newRoot.getFieldVectors(); - assertEquals(sources.size(), targets.size()); - for (int k = 0; k < sources.size(); k++) { - IntVector src = (IntVector) sources.get(k); - IntVector tgt = (IntVector) targets.get(k); - assertEquals(src.getValueCount(), tgt.getValueCount()); - for (int i = 0; i < count; i++) { - assertEquals(src.get(i), tgt.get(i)); - } - } - } - } - } - } - - public static VectorUnloader newVectorUnloader(FieldVector root) { - Schema schema = new Schema(root.getField().getChildren()); - int valueCount = root.getValueCount(); - List fields = root.getChildrenFromFields(); - VectorSchemaRoot vsr = new VectorSchemaRoot(schema.getFields(), fields, valueCount); - return new VectorUnloader(vsr); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java deleted file mode 100644 index 08da786eb272c..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java +++ /dev/null @@ -1,1075 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.Charset; -import java.util.Arrays; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.ZeroVector; -import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter; -import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -public class TestRangeEqualsVisitor { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - private static final Charset utf8Charset = Charset.forName("UTF-8"); - private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset); - private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); - private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); - private static final byte[] STR4 = "12345678901234A".getBytes(utf8Charset); - private static final byte[] STR5 = "A2345678901234ABC".getBytes(utf8Charset); - private static final byte[] STR6 = "AB45678901234ABCD".getBytes(utf8Charset); - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testIntVectorEqualsWithNull() { - try (final IntVector vector1 = new IntVector("int", allocator); - final IntVector vector2 = new IntVector("int", allocator)) { - - setVector(vector1, 1, 2); - setVector(vector2, 1, null); - - assertFalse(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testEqualsWithTypeChange() { - try (final IntVector vector1 = new IntVector("vector", allocator); - final IntVector vector2 = new IntVector("vector", allocator); - final BigIntVector vector3 = new BigIntVector("vector", allocator)) { - - setVector(vector1, 1, 2); - setVector(vector2, 1, 2); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - Range range = new Range(0, 0, 2); - assertTrue(vector1.accept(visitor, range)); - // visitor left vector changed, will reset and check type again - assertFalse(vector3.accept(visitor, range)); - } - } - - @Test - public void testBaseFixedWidthVectorRangeEqual() { - try (final IntVector vector1 = new IntVector("int", allocator); - final IntVector vector2 = new IntVector("int", allocator)) { - - setVector(vector1, 1, 2, 3, 4, 5); - setVector(vector2, 11, 2, 3, 4, 55); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); - } - } - - @Test - public void testBaseVariableVectorRangeEquals() { - try (final VarCharVector vector1 = new VarCharVector("varchar", allocator); - final VarCharVector vector2 = new VarCharVector("varchar", allocator)) { - - setVector(vector1, STR1, STR2, STR3, STR2, STR1); - setVector(vector2, STR1, STR2, STR3, STR2, STR1); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); - } - } - - @Test - public void testBaseVariableViewVectorRangeEquals() { - try (final ViewVarCharVector vector1 = new ViewVarCharVector("varchar", allocator); - final ViewVarCharVector vector2 = new ViewVarCharVector("varchar", allocator)) { - - setVector(vector1, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4); - setVector(vector2, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - // inclusion of long string in the middle - assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); - assertFalse(visitor.rangeEquals(new Range(0, 1, 4))); - // inclusion of long string at the start - assertTrue(visitor.rangeEquals(new Range(2, 2, 4))); - assertFalse(visitor.rangeEquals(new Range(2, 5, 4))); - // inclusion of long string at the end - assertTrue(visitor.rangeEquals(new Range(4, 4, 4))); - // unequal range - assertTrue(visitor.rangeEquals(new Range(8, 0, 3))); - assertFalse(visitor.rangeEquals(new Range(4, 5, 3))); - - // checking the same ranges when nulls are set - - vector1.setNull(1); - vector2.setNull(1); - - vector1.setNull(3); - vector2.setNull(3); - - vector1.setNull(5); - vector2.setNull(5); - - vector1.setNull(9); - vector2.setNull(9); - - // inclusion of long string in the middle - assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); - assertFalse(visitor.rangeEquals(new Range(0, 1, 4))); - // inclusion of long string at the start - assertTrue(visitor.rangeEquals(new Range(2, 2, 4))); - assertFalse(visitor.rangeEquals(new Range(2, 5, 4))); - // inclusion of long string at the end - assertTrue(visitor.rangeEquals(new Range(4, 4, 4))); - // unequal range - assertTrue(visitor.rangeEquals(new Range(8, 0, 3))); - assertFalse(visitor.rangeEquals(new Range(4, 5, 3))); - } - } - - @Test - public void testListVectorWithDifferentChild() { - try (final ListVector vector1 = ListVector.empty("list", allocator); - final ListVector vector2 = ListVector.empty("list", allocator); ) { - - vector1.allocateNew(); - vector1.initializeChildrenFromFields( - Arrays.asList(Field.nullable("child", new ArrowType.Int(32, true)))); - - vector2.allocateNew(); - vector2.initializeChildrenFromFields( - Arrays.asList(Field.nullable("child", new ArrowType.Int(64, true)))); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertFalse(visitor.rangeEquals(new Range(0, 0, 0))); - } - } - - @Test - public void testListViewVectorWithDifferentChild() { - try (final ListViewVector vector1 = ListViewVector.empty("listview", allocator); - final ListViewVector vector2 = ListViewVector.empty("listview", allocator); ) { - - vector1.allocateNew(); - vector1.initializeChildrenFromFields( - Arrays.asList(Field.nullable("child", new ArrowType.Int(32, true)))); - - vector2.allocateNew(); - vector2.initializeChildrenFromFields( - Arrays.asList(Field.nullable("child", new ArrowType.Int(64, true)))); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertFalse(visitor.rangeEquals(new Range(0, 0, 0))); - } - } - - @Test - public void testLargeListViewVectorWithDifferentChild() { - try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector vector2 = - LargeListViewVector.empty("largelistview", allocator); ) { - - vector1.allocateNew(); - vector1.initializeChildrenFromFields( - Arrays.asList(Field.nullable("child", new ArrowType.Int(32, true)))); - - vector2.allocateNew(); - vector2.initializeChildrenFromFields( - Arrays.asList(Field.nullable("child", new ArrowType.Int(64, true)))); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertFalse(visitor.rangeEquals(new Range(0, 0, 0))); - } - } - - @Test - public void testListVectorRangeEquals() { - try (final ListVector vector1 = ListVector.empty("list", allocator); - final ListVector vector2 = ListVector.empty("list", allocator); ) { - - UnionListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeListVector(writer1, new int[] {1, 2}); - writeListVector(writer1, new int[] {3, 4}); - writeListVector(writer1, new int[] {5, 6}); - writeListVector(writer1, new int[] {7, 8}); - writeListVector(writer1, new int[] {9, 10}); - writer1.setValueCount(5); - - UnionListWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeListVector(writer2, new int[] {0, 0}); - writeListVector(writer2, new int[] {3, 4}); - writeListVector(writer2, new int[] {5, 6}); - writeListVector(writer2, new int[] {7, 8}); - writeListVector(writer2, new int[] {0, 0}); - writer2.setValueCount(5); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); - } - } - - @Test - public void testListViewVectorRangeEquals() { - try (final ListViewVector vector1 = ListViewVector.empty("listview", allocator); - final ListViewVector vector2 = ListViewVector.empty("listview", allocator); ) { - - UnionListViewWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeListViewVector(writer1, new int[] {1, 2}); - writeListViewVector(writer1, new int[] {3, 4}); - writeListViewVector(writer1, new int[] {5, 6}); - writeListViewVector(writer1, new int[] {7, 8}); - writeListViewVector(writer1, new int[] {9, 10}); - writer1.setValueCount(5); - - UnionListViewWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeListViewVector(writer2, new int[] {0, 0}); - writeListViewVector(writer2, new int[] {3, 4}); - writeListViewVector(writer2, new int[] {5, 6}); - writeListViewVector(writer2, new int[] {7, 8}); - writeListViewVector(writer2, new int[] {0, 0}); - writer2.setValueCount(5); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); - } - } - - @Test - public void testLargeListViewVectorRangeEquals() { - try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector vector2 = - LargeListViewVector.empty("largelistview", allocator); ) { - - UnionLargeListViewWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeLargeListViewVector(writer1, new int[] {1, 2}); - writeLargeListViewVector(writer1, new int[] {3, 4}); - writeLargeListViewVector(writer1, new int[] {5, 6}); - writeLargeListViewVector(writer1, new int[] {7, 8}); - writeLargeListViewVector(writer1, new int[] {9, 10}); - writer1.setValueCount(5); - - UnionLargeListViewWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeLargeListViewVector(writer2, new int[] {0, 0}); - writeLargeListViewVector(writer2, new int[] {3, 4}); - writeLargeListViewVector(writer2, new int[] {5, 6}); - writeLargeListViewVector(writer2, new int[] {7, 8}); - writeLargeListViewVector(writer2, new int[] {0, 0}); - writer2.setValueCount(5); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); - } - } - - @Test - public void testBitVectorRangeEquals() { - try (final BitVector vector1 = new BitVector("v1", allocator); - final BitVector vector2 = new BitVector("v2", allocator); ) { - - boolean[] v1 = new boolean[] {true, false, true, true, true}; - boolean[] v2 = new boolean[] {false, true, true, true, false}; - vector1.setValueCount(5); - for (int i = 0; i < 5; i++) { - vector1.set(i, v1[i] ? 1 : 0); - } - vector2.setValueCount(5); - for (int i = 0; i < 5; i++) { - vector2.set(i, v2[i] ? 1 : 0); - } - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.compareBaseFixedWidthVectors(new Range(1, 0, 4))); - assertFalse(visitor.compareBaseFixedWidthVectors(new Range(0, 0, 5))); - } - } - - @Test - public void testFixedSizeListVectorRangeEquals() { - try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("list", 2, allocator); - final FixedSizeListVector vector2 = FixedSizeListVector.empty("list", 2, allocator); ) { - - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - // set some values - writeFixedSizeListVector(writer1, new int[] {1, 2}); - writeFixedSizeListVector(writer1, new int[] {3, 4}); - writeFixedSizeListVector(writer1, new int[] {5, 6}); - writeFixedSizeListVector(writer1, new int[] {7, 8}); - writeFixedSizeListVector(writer1, new int[] {9, 10}); - writer1.setValueCount(5); - - UnionFixedSizeListWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - // set some values - writeFixedSizeListVector(writer2, new int[] {0, 0}); - writeFixedSizeListVector(writer2, new int[] {3, 4}); - writeFixedSizeListVector(writer2, new int[] {5, 6}); - writeFixedSizeListVector(writer2, new int[] {7, 8}); - writeFixedSizeListVector(writer2, new int[] {0, 0}); - writer2.setValueCount(5); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); - assertFalse(visitor.rangeEquals(new Range(0, 0, 5))); - } - } - - @Test - public void testLargeVariableWidthVectorRangeEquals() { - try (final LargeVarCharVector vector1 = new LargeVarCharVector("vector1", allocator); - final LargeVarCharVector vector2 = new LargeVarCharVector("vector2", allocator)) { - setVector(vector1, "aaa", "bbb", "ccc", null, "ddd"); - setVector(vector2, "ccc", "aaa", "bbb", null, "ddd"); - - RangeEqualsVisitor visitor = - new RangeEqualsVisitor( - vector1, - vector2, - (v1, v2) -> - new TypeEqualsVisitor(v2, /*check name*/ false, /*check metadata*/ false) - .equals(v1)); - - assertFalse( - visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 0, /*length*/ 1))); - assertTrue(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 1, /*length*/ 1))); - assertFalse( - visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 0, /*length*/ 3))); - assertTrue(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 1, /*length*/ 2))); - assertTrue(visitor.rangeEquals(new Range(/*left start*/ 3, /*right start*/ 3, /*length*/ 1))); - assertTrue(visitor.rangeEquals(new Range(/*left start*/ 3, /*right start*/ 3, /*length*/ 2))); - assertFalse( - visitor.rangeEquals(new Range(/*left start*/ 2, /*right start*/ 2, /*length*/ 2))); - } - } - - @Test - public void testStructVectorRangeEquals() { - try (final StructVector vector1 = StructVector.empty("struct", allocator); - final StructVector vector2 = StructVector.empty("struct", allocator); ) { - vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - NullableStructWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - writeStructVector(writer1, 0, 0L); - writeStructVector(writer1, 1, 10L); - writeStructVector(writer1, 2, 20L); - writeStructVector(writer1, 3, 30L); - writeStructVector(writer1, 4, 40L); - writeStructVector(writer1, 5, 50L); - writer1.setValueCount(6); - - NullableStructWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - writeStructVector(writer2, 0, 0L); - writeStructVector(writer2, 2, 20L); - writeStructVector(writer2, 3, 30L); - writeStructVector(writer2, 4, 40L); - writeStructVector(writer2, 0, 0L); - writer2.setValueCount(5); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.rangeEquals(new Range(2, 1, 3))); - - // different nullability but same values - vector1.setNull(3); - assertFalse(visitor.rangeEquals(new Range(2, 1, 3))); - // both null and same values - vector2.setNull(2); - assertTrue(visitor.rangeEquals(new Range(2, 1, 3))); - // both not null but different values - assertFalse(visitor.rangeEquals(new Range(2, 1, 4))); - // both null but different values - vector1.setNull(5); - vector2.setNull(4); - assertTrue(visitor.rangeEquals(new Range(2, 1, 4))); - } - } - - @Test - public void testUnionVectorRangeEquals() { - try (final UnionVector vector1 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); - final UnionVector vector2 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); ) { - - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 10; - uInt4Holder.isSet = 1; - - final NullableIntHolder intHolder = new NullableIntHolder(); - uInt4Holder.value = 20; - uInt4Holder.isSet = 1; - - vector1.setType(0, Types.MinorType.UINT4); - vector1.setSafe(0, uInt4Holder); - - vector1.setType(1, Types.MinorType.INT); - vector1.setSafe(1, intHolder); - - vector1.setType(2, Types.MinorType.INT); - vector1.setSafe(2, intHolder); - vector1.setValueCount(3); - - vector2.setType(0, Types.MinorType.UINT4); - vector2.setSafe(0, uInt4Holder); - - vector2.setType(1, Types.MinorType.INT); - vector2.setSafe(1, intHolder); - - vector2.setType(2, Types.MinorType.INT); - vector2.setSafe(2, intHolder); - vector2.setValueCount(3); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertTrue(visitor.rangeEquals(new Range(1, 1, 2))); - } - } - - /** - * Test comparing two union vectors. The two vectors are different in total, but have a range with - * equal values. - */ - @Test - public void testUnionVectorSubRangeEquals() { - try (final UnionVector vector1 = new UnionVector("union", allocator, null, null); - final UnionVector vector2 = new UnionVector("union", allocator, null, null); ) { - - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 10; - uInt4Holder.isSet = 1; - - final NullableIntHolder intHolder = new NullableIntHolder(); - intHolder.value = 20; - intHolder.isSet = 1; - - vector1.setType(0, Types.MinorType.UINT4); - vector1.setSafe(0, uInt4Holder); - - vector1.setType(1, Types.MinorType.INT); - vector1.setSafe(1, intHolder); - - vector1.setType(2, Types.MinorType.INT); - vector1.setSafe(2, intHolder); - - vector1.setType(3, Types.MinorType.INT); - vector1.setSafe(3, intHolder); - - vector1.setValueCount(4); - - vector2.setType(0, Types.MinorType.UINT4); - vector2.setSafe(0, uInt4Holder); - - vector2.setType(1, Types.MinorType.INT); - vector2.setSafe(1, intHolder); - - vector2.setType(2, Types.MinorType.INT); - vector2.setSafe(2, intHolder); - - vector2.setType(3, Types.MinorType.UINT4); - vector2.setSafe(3, uInt4Holder); - - vector2.setValueCount(4); - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); - assertFalse(visitor.rangeEquals(new Range(0, 0, 4))); - assertTrue(visitor.rangeEquals(new Range(1, 1, 2))); - } - } - - @Test - public void testDenseUnionVectorEquals() { - final NullableIntHolder intHolder = new NullableIntHolder(); - intHolder.isSet = 1; - intHolder.value = 100; - - final NullableBigIntHolder bigIntHolder = new NullableBigIntHolder(); - bigIntHolder.isSet = 1; - bigIntHolder.value = 200L; - - final NullableFloat4Holder float4Holder = new NullableFloat4Holder(); - float4Holder.isSet = 1; - float4Holder.value = 400F; - - final NullableFloat8Holder float8Holder = new NullableFloat8Holder(); - float8Holder.isSet = 1; - float8Holder.value = 800D; - - try (DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null); - DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null)) { - vector1.allocateNew(); - vector2.allocateNew(); - - // populate vector1: {100, 200L, null, 400F, 800D} - byte intTypeId = - vector1.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType())); - byte longTypeId = - vector1.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType())); - byte floatTypeId = - vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType())); - byte doubleTypeId = - vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType())); - - vector1.setTypeId(0, intTypeId); - vector1.setSafe(0, intHolder); - - vector1.setTypeId(1, longTypeId); - vector1.setSafe(1, bigIntHolder); - - vector1.setTypeId(3, floatTypeId); - vector1.setSafe(3, float4Holder); - - vector1.setTypeId(4, doubleTypeId); - vector1.setSafe(4, float8Holder); - - vector1.setValueCount(5); - - // populate vector2: {400F, null, 200L, null, 400F, 800D, 100} - intTypeId = vector2.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType())); - longTypeId = - vector2.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType())); - floatTypeId = - vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType())); - doubleTypeId = - vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType())); - - vector2.setTypeId(0, floatTypeId); - vector2.setSafe(0, float4Holder); - - vector2.setTypeId(2, longTypeId); - vector2.setSafe(2, bigIntHolder); - - vector2.setTypeId(4, floatTypeId); - vector2.setSafe(4, float4Holder); - - vector2.setTypeId(5, doubleTypeId); - vector2.setSafe(5, float8Holder); - - vector2.setTypeId(6, intTypeId); - vector2.setSafe(6, intHolder); - - vector2.setValueCount(7); - - // compare ranges - TypeEqualsVisitor typeVisitor = - new TypeEqualsVisitor(vector2, /* check name */ false, /* check meta data */ true); - RangeEqualsVisitor equalsVisitor = - new RangeEqualsVisitor(vector1, vector2, (left, right) -> typeVisitor.equals(left)); - - // different ranges {100, 200L} != {400F, null} - assertFalse(equalsVisitor.rangeEquals(new Range(0, 0, 2))); - - // different ranges without null {100, 200L} != {400F, null} - assertFalse(equalsVisitor.rangeEquals(new Range(3, 5, 2))); - - // equal ranges {200L, null, 400F, 800D} - assertTrue(equalsVisitor.rangeEquals(new Range(1, 2, 4))); - - // equal ranges without null {400F, 800D} - assertTrue(equalsVisitor.rangeEquals(new Range(3, 4, 2))); - - // equal ranges with only null {null} - assertTrue(equalsVisitor.rangeEquals(new Range(2, 3, 1))); - - // equal ranges with single element {100} - assertTrue(equalsVisitor.rangeEquals(new Range(0, 6, 1))); - - // different ranges with single element {100} != {200L} - assertFalse(equalsVisitor.rangeEquals(new Range(0, 2, 1))); - } - } - - @Disabled - @Test - public void testEqualsWithOutTypeCheck() { - try (final IntVector intVector = new IntVector("int", allocator); - final ZeroVector zeroVector = new ZeroVector("zero")) { - - assertTrue(VectorEqualsVisitor.vectorEquals(intVector, zeroVector, null)); - assertTrue(VectorEqualsVisitor.vectorEquals(zeroVector, intVector, null)); - } - } - - @Test - public void testFloat4ApproxEquals() { - try (final Float4Vector vector1 = new Float4Vector("float", allocator); - final Float4Vector vector2 = new Float4Vector("float", allocator); - final Float4Vector vector3 = new Float4Vector("float", allocator)) { - - final float epsilon = 1.0E-6f; - setVector(vector1, 1.1f, 2.2f); - setVector(vector2, 1.1f + epsilon / 2, 2.2f + epsilon / 2); - setVector(vector3, 1.1f + epsilon * 2, 2.2f + epsilon * 2); - - Range range = new Range(0, 0, vector1.getValueCount()); - - ApproxEqualsVisitor visitor12 = new ApproxEqualsVisitor(vector1, vector2, epsilon, epsilon); - assertTrue(visitor12.rangeEquals(range)); - - ApproxEqualsVisitor visitor13 = new ApproxEqualsVisitor(vector1, vector3, epsilon, epsilon); - assertFalse(visitor13.rangeEquals(range)); - } - } - - @Test - public void testFloat8ApproxEquals() { - try (final Float8Vector vector1 = new Float8Vector("float", allocator); - final Float8Vector vector2 = new Float8Vector("float", allocator); - final Float8Vector vector3 = new Float8Vector("float", allocator)) { - - final float epsilon = 1.0E-6f; - setVector(vector1, 1.1, 2.2); - setVector(vector2, 1.1 + epsilon / 2, 2.2 + epsilon / 2); - setVector(vector3, 1.1 + epsilon * 2, 2.2 + epsilon * 2); - - Range range = new Range(0, 0, vector1.getValueCount()); - assertTrue(new ApproxEqualsVisitor(vector1, vector2, epsilon, epsilon).rangeEquals(range)); - assertFalse(new ApproxEqualsVisitor(vector1, vector3, epsilon, epsilon).rangeEquals(range)); - } - } - - @Test - public void testStructVectorApproxEquals() { - try (final StructVector right = StructVector.empty("struct", allocator); - final StructVector left1 = StructVector.empty("struct", allocator); - final StructVector left2 = StructVector.empty("struct", allocator)) { - right.addOrGet( - "f0", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - Float4Vector.class); - right.addOrGet( - "f1", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Float8Vector.class); - left1.addOrGet( - "f0", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - Float4Vector.class); - left1.addOrGet( - "f1", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Float8Vector.class); - left2.addOrGet( - "f0", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - Float4Vector.class); - left2.addOrGet( - "f1", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), - Float8Vector.class); - - final float epsilon = 1.0E-6f; - - NullableStructWriter rightWriter = right.getWriter(); - rightWriter.allocate(); - writeStructVector(rightWriter, 1.1f, 2.2); - writeStructVector(rightWriter, 2.02f, 4.04); - rightWriter.setValueCount(2); - - NullableStructWriter leftWriter1 = left1.getWriter(); - leftWriter1.allocate(); - writeStructVector(leftWriter1, 1.1f + epsilon / 2, 2.2 + epsilon / 2); - writeStructVector(leftWriter1, 2.02f - epsilon / 2, 4.04 - epsilon / 2); - leftWriter1.setValueCount(2); - - NullableStructWriter leftWriter2 = left2.getWriter(); - leftWriter2.allocate(); - writeStructVector(leftWriter2, 1.1f + epsilon * 2, 2.2 + epsilon * 2); - writeStructVector(leftWriter2, 2.02f - epsilon * 2, 4.04 - epsilon * 2); - leftWriter2.setValueCount(2); - - Range range = new Range(0, 0, right.getValueCount()); - assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range)); - assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range)); - } - } - - @Test - public void testUnionVectorApproxEquals() { - try (final UnionVector right = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); - final UnionVector left1 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); - final UnionVector left2 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); ) { - - final NullableFloat4Holder float4Holder = new NullableFloat4Holder(); - float4Holder.value = 1.01f; - float4Holder.isSet = 1; - - final NullableFloat8Holder float8Holder = new NullableFloat8Holder(); - float8Holder.value = 2.02f; - float8Holder.isSet = 1; - - final float epsilon = 1.0E-6f; - - right.setType(0, Types.MinorType.FLOAT4); - right.setSafe(0, float4Holder); - right.setType(1, Types.MinorType.FLOAT8); - right.setSafe(1, float8Holder); - right.setValueCount(2); - - float4Holder.value += epsilon / 2; - float8Holder.value += epsilon / 2; - - left1.setType(0, Types.MinorType.FLOAT4); - left1.setSafe(0, float4Holder); - left1.setType(1, Types.MinorType.FLOAT8); - left1.setSafe(1, float8Holder); - left1.setValueCount(2); - - float4Holder.value += epsilon * 2; - float8Holder.value += epsilon * 2; - - left2.setType(0, Types.MinorType.FLOAT4); - left2.setSafe(0, float4Holder); - left2.setType(1, Types.MinorType.FLOAT8); - left2.setSafe(1, float8Holder); - left2.setValueCount(2); - - Range range = new Range(0, 0, right.getValueCount()); - assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range)); - assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range)); - } - } - - @Test - public void testDenseUnionVectorApproxEquals() { - final NullableFloat4Holder float4Holder = new NullableFloat4Holder(); - float4Holder.isSet = 1; - - final NullableFloat8Holder float8Holder = new NullableFloat8Holder(); - float8Holder.isSet = 1; - - final float floatEpsilon = 0.02F; - final double doubleEpsilon = 0.02; - - try (final DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null); - final DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null); - final DenseUnionVector vector3 = new DenseUnionVector("vector2", allocator, null, null)) { - - vector1.allocateNew(); - vector2.allocateNew(); - vector3.allocateNew(); - - // populate vector1: {1.0f, 2.0D} - byte floatTypeId = - vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType())); - byte doubleTypeId = - vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType())); - - float4Holder.value = 1.0f; - vector1.setTypeId(0, floatTypeId); - vector1.setSafe(0, float4Holder); - float8Holder.value = 2.0D; - vector1.setTypeId(1, doubleTypeId); - vector1.setSafe(1, float8Holder); - vector1.setValueCount(2); - - // populate vector2: {1.01f, 2.01D} - floatTypeId = - vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType())); - doubleTypeId = - vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType())); - - float4Holder.value = 1.01f; - vector2.setTypeId(0, floatTypeId); - vector2.setSafe(0, float4Holder); - float8Holder.value = 2.01D; - vector2.setTypeId(1, doubleTypeId); - vector2.setSafe(1, float8Holder); - vector2.setValueCount(2); - - // populate vector3: {1.05f, 2.05D} - floatTypeId = - vector3.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType())); - doubleTypeId = - vector3.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType())); - - float4Holder.value = 1.05f; - vector3.setTypeId(0, floatTypeId); - vector3.setSafe(0, float4Holder); - float8Holder.value = 2.05D; - vector3.setTypeId(1, doubleTypeId); - vector3.setSafe(1, float8Holder); - vector3.setValueCount(2); - - // verify comparison results - Range range = new Range(0, 0, 2); - - // compare vector1 and vector2 - ApproxEqualsVisitor approxEqualsVisitor = - new ApproxEqualsVisitor( - vector1, - vector2, - new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon), - new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon), - (v1, v2) -> - new TypeEqualsVisitor(v2, /* check name */ false, /* check meta */ true) - .equals(v1)); - assertTrue(approxEqualsVisitor.rangeEquals(range)); - - // compare vector1 and vector3 - approxEqualsVisitor = - new ApproxEqualsVisitor( - vector1, - vector3, - new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon), - new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon), - (v1, v2) -> - new TypeEqualsVisitor(v2, /* check name */ false, /* check meta */ true) - .equals(v1)); - assertFalse(approxEqualsVisitor.rangeEquals(range)); - } - } - - @Test - public void testListVectorApproxEquals() { - try (final ListVector right = ListVector.empty("list", allocator); - final ListVector left1 = ListVector.empty("list", allocator); - final ListVector left2 = ListVector.empty("list", allocator); ) { - - final float epsilon = 1.0E-6f; - - UnionListWriter rightWriter = right.getWriter(); - rightWriter.allocate(); - writeListVector(rightWriter, new double[] {1, 2}); - writeListVector(rightWriter, new double[] {1.01, 2.02}); - rightWriter.setValueCount(2); - - UnionListWriter leftWriter1 = left1.getWriter(); - leftWriter1.allocate(); - writeListVector(leftWriter1, new double[] {1, 2}); - writeListVector(leftWriter1, new double[] {1.01 + epsilon / 2, 2.02 - epsilon / 2}); - leftWriter1.setValueCount(2); - - UnionListWriter leftWriter2 = left2.getWriter(); - leftWriter2.allocate(); - writeListVector(leftWriter2, new double[] {1, 2}); - writeListVector(leftWriter2, new double[] {1.01 + epsilon * 2, 2.02 - epsilon * 2}); - leftWriter2.setValueCount(2); - - Range range = new Range(0, 0, right.getValueCount()); - assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range)); - assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range)); - } - } - - @Test - public void testListViewVectorApproxEquals() { - try (final ListViewVector right = ListViewVector.empty("listview", allocator); - final ListViewVector left1 = ListViewVector.empty("listview", allocator); - final ListViewVector left2 = ListViewVector.empty("listview", allocator); ) { - - final float epsilon = 1.0E-6f; - - UnionListViewWriter rightWriter = right.getWriter(); - rightWriter.allocate(); - writeListViewVector(rightWriter, new double[] {1, 2}); - writeListViewVector(rightWriter, new double[] {1.01, 2.02}); - rightWriter.setValueCount(2); - - UnionListViewWriter leftWriter1 = left1.getWriter(); - leftWriter1.allocate(); - writeListViewVector(leftWriter1, new double[] {1, 2}); - writeListViewVector(leftWriter1, new double[] {1.01 + epsilon / 2, 2.02 - epsilon / 2}); - leftWriter1.setValueCount(2); - - UnionListViewWriter leftWriter2 = left2.getWriter(); - leftWriter2.allocate(); - writeListViewVector(leftWriter2, new double[] {1, 2}); - writeListViewVector(leftWriter2, new double[] {1.01 + epsilon * 2, 2.02 - epsilon * 2}); - leftWriter2.setValueCount(2); - - Range range = new Range(0, 0, right.getValueCount()); - assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range)); - assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range)); - } - } - - @Test - public void testLargeListViewVectorApproxEquals() { - try (final LargeListViewVector right = LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector left1 = LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector left2 = LargeListViewVector.empty("largelistview", allocator); ) { - - final float epsilon = 1.0E-6f; - - UnionLargeListViewWriter rightWriter = right.getWriter(); - rightWriter.allocate(); - writeLargeListViewVector(rightWriter, new double[] {1, 2}); - writeLargeListViewVector(rightWriter, new double[] {1.01, 2.02}); - rightWriter.setValueCount(2); - - UnionLargeListViewWriter leftWriter1 = left1.getWriter(); - leftWriter1.allocate(); - writeLargeListViewVector(leftWriter1, new double[] {1, 2}); - writeLargeListViewVector(leftWriter1, new double[] {1.01 + epsilon / 2, 2.02 - epsilon / 2}); - leftWriter1.setValueCount(2); - - UnionLargeListViewWriter leftWriter2 = left2.getWriter(); - leftWriter2.allocate(); - writeLargeListViewVector(leftWriter2, new double[] {1, 2}); - writeLargeListViewVector(leftWriter2, new double[] {1.01 + epsilon * 2, 2.02 - epsilon * 2}); - leftWriter2.setValueCount(2); - - Range range = new Range(0, 0, right.getValueCount()); - assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range)); - assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range)); - } - } - - private void writeStructVector(NullableStructWriter writer, int value1, long value2) { - writer.start(); - writer.integer("f0").writeInt(value1); - writer.bigInt("f1").writeBigInt(value2); - writer.end(); - } - - private void writeStructVector(NullableStructWriter writer, float value1, double value2) { - writer.start(); - writer.float4("f0").writeFloat4(value1); - writer.float8("f1").writeFloat8(value2); - writer.end(); - } - - private void writeListVector(UnionListWriter writer, int[] values) { - writer.startList(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endList(); - } - - private void writeListViewVector(UnionListViewWriter writer, int[] values) { - writer.startListView(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endListView(); - } - - private void writeLargeListViewVector(UnionLargeListViewWriter writer, int[] values) { - writer.startListView(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endListView(); - } - - private void writeFixedSizeListVector(UnionFixedSizeListWriter writer, int[] values) { - writer.startList(); - for (int v : values) { - writer.integer().writeInt(v); - } - writer.endList(); - } - - private void writeListVector(UnionListWriter writer, double[] values) { - writer.startList(); - for (double v : values) { - writer.float8().writeFloat8(v); - } - writer.endList(); - } - - private void writeListViewVector(UnionListViewWriter writer, double[] values) { - writer.startListView(); - for (double v : values) { - writer.float8().writeFloat8(v); - } - writer.endListView(); - } - - private void writeLargeListViewVector(UnionLargeListViewWriter writer, double[] values) { - writer.startListView(); - for (double v : values) { - writer.float8().writeFloat8(v); - } - writer.endListView(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java deleted file mode 100644 index ce029493473bb..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.compare; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.ViewVarBinaryVector; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestTypeEqualsVisitor { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testTypeEqualsWithName() { - try (final IntVector right = new IntVector("int", allocator); - final IntVector left1 = new IntVector("int", allocator); - final IntVector left2 = new IntVector("int2", allocator)) { - - TypeEqualsVisitor visitor = new TypeEqualsVisitor(right); - assertTrue(visitor.equals(left1)); - assertFalse(visitor.equals(left2)); - } - } - - @Test - public void testTypeEqualsWithMetadata() { - Map metadata = new HashMap<>(); - metadata.put("key1", "value1"); - FieldType typeWithoutMeta = new FieldType(true, new ArrowType.Int(32, true), null, null); - FieldType typeWithMeta = new FieldType(true, new ArrowType.Int(32, true), null, metadata); - - try (IntVector right = - (IntVector) typeWithoutMeta.createNewSingleVector("int", allocator, null); - IntVector left1 = - (IntVector) typeWithoutMeta.createNewSingleVector("int", allocator, null); - IntVector left2 = (IntVector) typeWithMeta.createNewSingleVector("int", allocator, null)) { - - TypeEqualsVisitor visitor = new TypeEqualsVisitor(right); - assertTrue(visitor.equals(left1)); - assertFalse(visitor.equals(left2)); - } - } - - @Test - public void testListTypeEquals() { - try (final ListVector right = ListVector.empty("list", allocator); - final ListVector left1 = ListVector.empty("list", allocator); - final ListVector left2 = ListVector.empty("list", allocator)) { - - right.addOrGetVector(FieldType.nullable(new ArrowType.Utf8())); - left1.addOrGetVector(FieldType.nullable(new ArrowType.Utf8())); - left2.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeBinary(2))); - - TypeEqualsVisitor visitor = new TypeEqualsVisitor(right); - assertTrue(visitor.equals(left1)); - assertFalse(visitor.equals(left2)); - } - } - - @Test - public void testListViewTypeEquals() { - try (final ListViewVector right = ListViewVector.empty("listview", allocator); - final ListViewVector left1 = ListViewVector.empty("listview", allocator); - final ListViewVector left2 = ListViewVector.empty("listview", allocator)) { - - right.addOrGetVector(FieldType.nullable(new ArrowType.Utf8())); - left1.addOrGetVector(FieldType.nullable(new ArrowType.Utf8())); - left2.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeBinary(2))); - - TypeEqualsVisitor visitor = new TypeEqualsVisitor(right); - assertTrue(visitor.equals(left1)); - assertFalse(visitor.equals(left2)); - } - } - - @Test - public void testLargeListViewTypeEquals() { - try (final LargeListViewVector right = LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector left1 = LargeListViewVector.empty("largelistview", allocator); - final LargeListViewVector left2 = LargeListViewVector.empty("largelistview", allocator)) { - - right.addOrGetVector(FieldType.nullable(new ArrowType.Utf8())); - left1.addOrGetVector(FieldType.nullable(new ArrowType.Utf8())); - left2.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeBinary(2))); - - TypeEqualsVisitor visitor = new TypeEqualsVisitor(right); - assertTrue(visitor.equals(left1)); - assertFalse(visitor.equals(left2)); - } - } - - @Test - public void testStructTypeEquals() { - try (final StructVector right = StructVector.empty("struct", allocator); - final StructVector left1 = StructVector.empty("struct", allocator); - final StructVector left2 = StructVector.empty("struct", allocator)) { - - right.addOrGet("child", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - left1.addOrGet("child", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - left2.addOrGet("child2", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - - TypeEqualsVisitor visitor = new TypeEqualsVisitor(right); - assertTrue(visitor.equals(left1)); - assertFalse(visitor.equals(left2)); - } - } - - @Test - public void testUnionTypeEquals() { - try (final UnionVector right = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); - final UnionVector left1 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); - final UnionVector left2 = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null)) { - - right.addVector(new IntVector("int", allocator)); - left1.addVector(new IntVector("int", allocator)); - left2.addVector(new BigIntVector("bigint", allocator)); - - TypeEqualsVisitor visitor = new TypeEqualsVisitor(right); - assertTrue(visitor.equals(left1)); - assertFalse(visitor.equals(left2)); - } - } - - @Test - public void testDenseUnionTypeEquals() { - try (DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null); - DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null)) { - vector1.allocateNew(); - vector2.allocateNew(); - - // set children for vector1 - byte intTypeId = - vector1.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType())); - byte longTypeId = - vector1.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType())); - byte floatTypeId = - vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType())); - byte doubleTypeId = - vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType())); - - vector1.addVector(floatTypeId, new Float4Vector("", allocator)); - vector1.addVector(longTypeId, new BigIntVector("", allocator)); - vector1.addVector(intTypeId, new IntVector("", allocator)); - vector1.addVector(doubleTypeId, new Float8Vector("", allocator)); - - // set children for vector2 - intTypeId = vector2.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType())); - longTypeId = - vector2.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType())); - floatTypeId = - vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType())); - doubleTypeId = - vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType())); - - // add vectors in a different order - vector2.addVector(intTypeId, new IntVector("", allocator)); - vector2.addVector(floatTypeId, new Float4Vector("", allocator)); - vector2.addVector(doubleTypeId, new Float8Vector("", allocator)); - vector2.addVector(longTypeId, new BigIntVector("", allocator)); - - // compare ranges - TypeEqualsVisitor typeVisitor = - new TypeEqualsVisitor(vector2, /* check name */ false, /* check meta data */ true); - assertTrue(typeVisitor.equals(vector1)); - - // if we check names, the types should be different - typeVisitor = - new TypeEqualsVisitor(vector2, /* check name */ true, /* check meta data */ true); - assertFalse(typeVisitor.equals(vector1)); - } - } - - @Test - public void testStringViewTypeEquals() { - try (final ViewVarCharVector varchar1 = new ViewVarCharVector("varchar1", allocator); - final ViewVarCharVector varchar2 = new ViewVarCharVector("varchar2", allocator); - final ViewVarBinaryVector binary = new ViewVarBinaryVector("binary", allocator)) { - final int valueCount = 2; - final byte[] str0 = "apache".getBytes(StandardCharsets.UTF_8); - final byte[] str1 = "arrow".getBytes(StandardCharsets.UTF_8); - - // add elements for varchar1 - varchar1.allocateNew(48, valueCount); - varchar1.set(0, str0); - varchar1.set(1, str1); - varchar1.setValueCount(valueCount); - - // add elements for varchar2 in a difference order - varchar2.allocateNew(48, valueCount); - varchar2.set(0, str1); - varchar2.set(1, str0); - varchar2.setValueCount(valueCount); - - // add elements for binary - binary.allocateNew(48, valueCount); - binary.set(0, str0); - binary.set(1, str1); - binary.setValueCount(valueCount); - - // compare ignore check name - TypeEqualsVisitor visitor = - new TypeEqualsVisitor(varchar1, /* check name */ false, /* check meta data */ true); - assertTrue(visitor.equals(varchar2)); - assertFalse(visitor.equals(binary)); - - // if we check names, the types should be different - visitor = new TypeEqualsVisitor(varchar1, /* check name */ true, /* check meta data */ true); - assertFalse(visitor.equals(varchar2)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java deleted file mode 100644 index b692a205fc12f..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex; - -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableVarBinaryHolder; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.Test; - -public class TestDenseUnionBufferSize { - @Test - public void testBufferSize() { - try (BufferAllocator allocator = new RootAllocator(); - DenseUnionVector duv = - new DenseUnionVector( - "duv", - allocator, - FieldType.nullable(new ArrowType.Union(UnionMode.Dense, null)), - null)) { - - byte aTypeId = 42; - byte bTypeId = 7; - - duv.addVector( - aTypeId, - new IntVector("a", FieldType.notNullable(new ArrowType.Int(32, true)), allocator)); - duv.addVector( - bTypeId, - new VarBinaryVector("b", FieldType.notNullable(new ArrowType.Binary()), allocator)); - - NullableIntHolder intHolder = new NullableIntHolder(); - NullableVarBinaryHolder varBinaryHolder = new NullableVarBinaryHolder(); - - int aCount = BaseValueVector.INITIAL_VALUE_ALLOCATION + 1; - for (int i = 0; i < aCount; i++) { - duv.setTypeId(i, aTypeId); - duv.setSafe(i, intHolder); - } - - int bCount = 2; - for (int i = 0; i < bCount; i++) { - duv.setTypeId(i + aCount, bTypeId); - duv.setSafe(i + aCount, varBinaryHolder); - } - - int count = aCount + bCount; - duv.setValueCount(count); - - // will not necessarily see an error unless bounds checking is on. - assertDoesNotThrow(duv::getBufferSize); - - IntVector intVector = duv.getIntVector(aTypeId); - VarBinaryVector varBinaryVector = duv.getVarBinaryVector(bTypeId); - - long overhead = DenseUnionVector.TYPE_WIDTH + DenseUnionVector.OFFSET_WIDTH; - - assertEquals( - overhead * count + intVector.getBufferSize() + varBinaryVector.getBufferSize(), - duv.getBufferSize()); - - assertEquals( - overhead * (aCount + 1) - + intVector.getBufferSizeFor(aCount) - + varBinaryVector.getBufferSizeFor(1), - duv.getBufferSizeFor(aCount + 1)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java deleted file mode 100644 index 3bc02c602983b..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java +++ /dev/null @@ -1,848 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.holders.DecimalHolder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.DecimalUtility; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestComplexCopier { - - private BufferAllocator allocator; - - private static final int COUNT = 100; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testCopyFixedSizeListVector() { - try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator); - FixedSizeListVector to = FixedSizeListVector.empty("v", 3, allocator)) { - - from.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())); - to.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())); - - // populate from vector - UnionFixedSizeListWriter writer = from.getWriter(); - for (int i = 0; i < COUNT; i++) { - writer.startList(); - writer.integer().writeInt(i); - writer.integer().writeInt(i * 2); - writer.integer().writeInt(i * 3); - writer.endList(); - } - from.setValueCount(COUNT); - to.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testInvalidCopyFixedSizeListVector() { - try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator); - FixedSizeListVector to = FixedSizeListVector.empty("v", 2, allocator)) { - - from.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())); - to.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())); - - // populate from vector - UnionFixedSizeListWriter writer = from.getWriter(); - for (int i = 0; i < COUNT; i++) { - writer.startList(); - writer.integer().writeInt(i); - writer.integer().writeInt(i * 2); - writer.integer().writeInt(i * 3); - writer.endList(); - } - from.setValueCount(COUNT); - to.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - IllegalStateException e = - assertThrows(IllegalStateException.class, () -> ComplexCopier.copy(in, out)); - assertTrue(e.getMessage().contains("greater than listSize")); - } - } - - @Test - public void testCopyMapVector() { - try (final MapVector from = MapVector.empty("v", allocator, false); - final MapVector to = MapVector.empty("v", allocator, false)) { - - from.allocateNew(); - - UnionMapWriter mapWriter = from.getWriter(); - for (int i = 0; i < COUNT; i++) { - mapWriter.setPosition(i); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(i); - mapWriter.value().integer().writeInt(i); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().decimal().writeDecimal(BigDecimal.valueOf(i * 2)); - mapWriter.value().decimal().writeDecimal(BigDecimal.valueOf(i * 2)); - mapWriter.endEntry(); - mapWriter.endMap(); - } - - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyListVector() { - try (ListVector from = ListVector.empty("v", allocator); - ListVector to = ListVector.empty("v", allocator)) { - - UnionListWriter listWriter = from.getWriter(); - listWriter.allocate(); - - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - listWriter.startList(); - - listWriter.integer().writeInt(i); - listWriter.integer().writeInt(i * 2); - - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(i); - listWriter.list().bigInt().writeBigInt(i * 2); - listWriter.list().bigInt().writeBigInt(i * 3); - listWriter.list().endList(); - - listWriter.list().startList(); - listWriter.list().decimal().writeDecimal(BigDecimal.valueOf(i * 4)); - listWriter.list().decimal().writeDecimal(BigDecimal.valueOf(i * 5)); - listWriter.list().endList(); - listWriter.endList(); - } - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyListVectorToANonEmptyList() { - try (ListVector from = ListVector.empty("v", allocator); - ListVector to = ListVector.empty("v", allocator)) { - - UnionListWriter listWriter = from.getWriter(); - listWriter.allocate(); - - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - listWriter.startList(); - listWriter.integer().writeInt(i); - listWriter.integer().writeInt(i * 2); - listWriter.endList(); - } - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - to.setValueCount(COUNT); - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - - // Copy again to the target vector which is non-empty - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - - // copy using copyFromSafe method - for (int i = 0; i < COUNT; i++) { - to.copyFromSafe(i, i, from); - } - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyListVectorWithNulls() { - try (ListVector from = ListVector.empty("v", allocator); - ListVector to = ListVector.empty("v", allocator)) { - - UnionListWriter listWriter = from.getWriter(); - listWriter.allocate(); - - // writer null, [null,i,null,i*2,null] alternatively - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - if (i % 2 == 0) { - listWriter.writeNull(); - continue; - } - listWriter.startList(); - listWriter.integer().writeNull(); - listWriter.integer().writeInt(i); - listWriter.integer().writeNull(); - listWriter.integer().writeInt(i * 2); - listWriter.integer().writeNull(); - listWriter.endList(); - } - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyListOfListVectorWithNulls() { - try (ListVector from = ListVector.empty("v", allocator); - ListVector to = ListVector.empty("v", allocator); ) { - - UnionListWriter listWriter = from.getWriter(); - listWriter.allocate(); - - // write null, [null,[50,100,null,200],null, - // [null,50,null,100,null,200,null],null] alternatively - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - if (i % 2 == 0) { - listWriter.writeNull(); - continue; - } - listWriter.startList(); - listWriter.list().writeNull(); - listWriter.list().startList(); - listWriter.list().bigInt().writeBigInt(50); - listWriter.list().bigInt().writeBigInt(100); - listWriter.list().bigInt().writeNull(); - listWriter.list().bigInt().writeBigInt(200); - listWriter.list().endList(); - listWriter.list().writeNull(); - listWriter.list().startList(); - listWriter.list().bigInt().writeNull(); - listWriter.list().bigInt().writeBigInt(50); - listWriter.list().bigInt().writeNull(); - listWriter.list().bigInt().writeBigInt(100); - listWriter.list().bigInt().writeNull(); - listWriter.list().bigInt().writeBigInt(200); - listWriter.list().bigInt().writeNull(); - listWriter.list().endList(); - listWriter.list().writeNull(); - listWriter.endList(); - } - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyListOStructVectorWithNulls() { - try (ListVector from = ListVector.empty("v", allocator); - ListVector to = ListVector.empty("v", allocator); ) { - - UnionListWriter listWriter = from.getWriter(); - listWriter.allocate(); - - // write null, [null,{"f1":1,"f2":2},null, - // {"f1":1,"f2":2},null] alternatively - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - if (i % 2 == 0) { - listWriter.writeNull(); - continue; - } - listWriter.startList(); - listWriter.struct().writeNull(); - listWriter.struct().start(); - listWriter.struct().integer("f1").writeInt(1); - listWriter.struct().integer("f2").writeInt(2); - listWriter.struct().integer("f3").writeNull(); - listWriter.struct().end(); - listWriter.struct().writeNull(); - listWriter.struct().start(); - listWriter.struct().integer("f1").writeInt(1); - listWriter.struct().integer("f2").writeInt(2); - listWriter.struct().integer("f3").writeNull(); - listWriter.struct().end(); - listWriter.struct().writeNull(); - listWriter.endList(); - } - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyListOfListOfStructVectorWithNulls() { - try (ListVector from = ListVector.empty("v", allocator); - ListVector to = ListVector.empty("v", allocator); ) { - - UnionListWriter listWriter = from.getWriter(); - listWriter.allocate(); - - // write null, - // [null,[{"f1":50},null,{"f1":100},null,{"f1":200}],null, - // [null,{"f1":50},null,{"f1":100},null,{"f1":200},null],null] - // alternatively - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - if (i % 2 == 0) { - listWriter.writeNull(); - continue; - } - listWriter.startList(); - listWriter.list().writeNull(); - listWriter.list().startList(); - listWriter.list().struct().start(); - listWriter.list().struct().bigInt("f1").writeBigInt(50); - listWriter.list().struct().end(); - listWriter.list().struct().writeNull(); - listWriter.list().struct().start(); - listWriter.list().struct().bigInt("f1").writeBigInt(100); - listWriter.list().struct().end(); - listWriter.list().struct().writeNull(); - listWriter.list().struct().start(); - listWriter.list().struct().bigInt("f1").writeBigInt(200); - listWriter.list().struct().end(); - listWriter.list().endList(); - - listWriter.list().writeNull(); - - listWriter.list().startList(); - listWriter.list().struct().writeNull(); - listWriter.list().struct().start(); - listWriter.list().struct().bigInt("f1").writeBigInt(50); - listWriter.list().struct().end(); - - listWriter.list().struct().writeNull(); - listWriter.list().struct().start(); - listWriter.list().struct().bigInt("f1").writeBigInt(100); - listWriter.list().struct().end(); - - listWriter.list().struct().writeNull(); - listWriter.list().struct().start(); - listWriter.list().struct().bigInt("f1").writeBigInt(200); - listWriter.list().struct().end(); - - listWriter.list().struct().writeNull(); - listWriter.list().endList(); - - listWriter.list().writeNull(); - - listWriter.endList(); - } - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testMapWithListValue() throws Exception { - try (MapVector from = MapVector.empty("map", allocator, false); - MapVector to = MapVector.empty("map", allocator, false)) { - - UnionMapWriter mapWriter = from.getWriter(); - BaseWriter.ListWriter valueWriter; - - /* allocate memory */ - mapWriter.allocate(); - - // write null, [{}, - // {"value":[]},{"key":1,"value":[null,50,null,100,null,200,null]}, - // null,{"key":2,"value":[null,75,null,125,null,150,null,175,null]}] - // alternatively - for (int i = 0; i < COUNT; i++) { - mapWriter.setPosition(i); - if (i % 2 == 0) { - mapWriter.writeNull(); - continue; - } - - mapWriter.startMap(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeNull(); - mapWriter.value().list().writeNull(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeNull(); - valueWriter = mapWriter.value().list(); - valueWriter.startList(); - valueWriter.endList(); - mapWriter.endEntry(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(1); - valueWriter = mapWriter.value().list(); - valueWriter.startList(); - valueWriter.bigInt().writeNull(); - valueWriter.bigInt().writeBigInt(50); - valueWriter.bigInt().writeNull(); - valueWriter.bigInt().writeBigInt(100); - valueWriter.bigInt().writeNull(); - valueWriter.bigInt().writeBigInt(200); - valueWriter.bigInt().writeNull(); - valueWriter.endList(); - mapWriter.endEntry(); - - mapWriter.writeNull(); - - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(2); - valueWriter = mapWriter.value().list(); - valueWriter.startList(); - valueWriter.bigInt().writeNull(); - valueWriter.bigInt().writeBigInt(75); - valueWriter.bigInt().writeNull(); - valueWriter.bigInt().writeBigInt(125); - valueWriter.bigInt().writeNull(); - valueWriter.bigInt().writeBigInt(150); - valueWriter.bigInt().writeNull(); - valueWriter.bigInt().writeBigInt(175); - valueWriter.bigInt().writeNull(); - valueWriter.endList(); - mapWriter.endEntry(); - - mapWriter.endMap(); - } - mapWriter.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyFixedSizedListOfDecimalsVector() { - try (FixedSizeListVector from = FixedSizeListVector.empty("v", 4, allocator); - FixedSizeListVector to = FixedSizeListVector.empty("v", 4, allocator)) { - from.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 0, 128))); - to.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 0, 128))); - - DecimalHolder holder = new DecimalHolder(); - holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH); - ArrowType arrowType = new ArrowType.Decimal(3, 0, 128); - - // populate from vector - UnionFixedSizeListWriter writer = from.getWriter(); - for (int i = 0; i < COUNT; i++) { - writer.startList(); - writer.decimal().writeDecimal(BigDecimal.valueOf(i)); - - DecimalUtility.writeBigDecimalToArrowBuf( - new BigDecimal(i * 2), holder.buffer, 0, DecimalVector.TYPE_WIDTH); - holder.start = 0; - holder.scale = 0; - holder.precision = 3; - writer.decimal().write(holder); - - DecimalUtility.writeBigDecimalToArrowBuf( - new BigDecimal(i * 3), holder.buffer, 0, DecimalVector.TYPE_WIDTH); - writer.decimal().writeDecimal(0, holder.buffer, arrowType); - - writer - .decimal() - .writeBigEndianBytesToDecimal( - BigDecimal.valueOf(i * 4).unscaledValue().toByteArray(), arrowType); - - writer.endList(); - } - from.setValueCount(COUNT); - to.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - holder.buffer.close(); - } - } - - @Test - public void testCopyUnionListWithDecimal() { - try (ListVector from = ListVector.empty("v", allocator); - ListVector to = ListVector.empty("v", allocator)) { - - UnionListWriter listWriter = from.getWriter(); - listWriter.allocate(); - - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - listWriter.startList(); - - listWriter.decimal().writeDecimal(BigDecimal.valueOf(i * 2)); - listWriter.integer().writeInt(i); - listWriter - .decimal() - .writeBigEndianBytesToDecimal( - BigDecimal.valueOf(i * 3).unscaledValue().toByteArray(), - new ArrowType.Decimal(3, 0, 128)); - - listWriter.endList(); - } - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyStructVector() { - try (final StructVector from = StructVector.empty("v", allocator); - final StructVector to = StructVector.empty("v", allocator)) { - - from.allocateNewSafe(); - - NullableStructWriter structWriter = from.getWriter(); - for (int i = 0; i < COUNT; i++) { - structWriter.setPosition(i); - structWriter.start(); - structWriter.integer("int").writeInt(i); - structWriter.decimal("dec", 0, 38).writeDecimal(BigDecimal.valueOf(i * 2)); - StructWriter innerStructWriter = structWriter.struct("struc"); - innerStructWriter.start(); - innerStructWriter.integer("innerint").writeInt(i * 3); - innerStructWriter.decimal("innerdec", 0, 38).writeDecimal(BigDecimal.valueOf(i * 4)); - innerStructWriter - .decimal("innerdec", 0, 38) - .writeBigEndianBytesToDecimal( - BigDecimal.valueOf(i * 4).unscaledValue().toByteArray(), - new ArrowType.Decimal(3, 0, 128)); - innerStructWriter.end(); - structWriter.end(); - } - - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyDecimalVectorWrongScale() { - try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator); - FixedSizeListVector to = FixedSizeListVector.empty("v", 3, allocator)) { - from.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 2, 128))); - to.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 1, 128))); - - // populate from vector - UnionFixedSizeListWriter writer = from.getWriter(); - for (int i = 0; i < COUNT; i++) { - writer.startList(); - writer.decimal().writeDecimal(BigDecimal.valueOf(1.23)); - writer.decimal().writeDecimal(BigDecimal.valueOf(2.45)); - writer.endList(); - } - from.setValueCount(COUNT); - to.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - UnsupportedOperationException e = - assertThrows(UnsupportedOperationException.class, () -> ComplexCopier.copy(in, out)); - assertTrue( - e.getMessage().contains("BigDecimal scale must equal that in the Arrow vector: 2 != 1")); - } - } - - @Test - public void testCopyStructVectorWithNulls() { - try (StructVector from = StructVector.empty("v", allocator); - StructVector to = StructVector.empty("v", allocator)) { - - NullableStructWriter writer = from.getWriter(); - - for (int i = 0; i < COUNT; ++i) { - writer.setPosition(i); - writer.start(); - writer.integer("int").writeInt(i); - if (i % 3 == 0) { - writer.float4("child").writeFloat4(12.3f); - } else if (i % 3 == 1) { - writer.integer("child").writeInt(123); - } else { - writer.integer("child").writeNull(); - } - writer.end(); - } - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyStructOfMap() { - try (final StructVector from = StructVector.empty("v", allocator); - final StructVector to = StructVector.empty("v", allocator); ) { - - from.allocateNew(); - - NullableStructWriter structWriter = from.getWriter(); - for (int i = 0; i < COUNT; i++) { - structWriter.setPosition(i); - structWriter.start(); - BaseWriter.MapWriter innerMapWriter = structWriter.map("f1"); - innerMapWriter.startMap(); - innerMapWriter.startEntry(); - innerMapWriter.key().integer().writeInt(i); - innerMapWriter.value().integer().writeInt(i); - innerMapWriter.endEntry(); - innerMapWriter.endMap(); - structWriter.end(); - } - - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } - - @Test - public void testCopyMapVectorWithMapValue() { - try (final MapVector from = MapVector.empty("v", allocator, false); - final MapVector to = MapVector.empty("v", allocator, false)) { - - from.allocateNew(); - - UnionMapWriter mapWriter = from.getWriter(); - for (int i = 0; i < COUNT; i++) { - mapWriter.setPosition(i); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(i); - BaseWriter.MapWriter innerMapWriter = mapWriter.value().map(false); - innerMapWriter.startMap(); - innerMapWriter.startEntry(); - innerMapWriter.key().integer().writeInt(i); - innerMapWriter.value().integer().writeInt(i); - innerMapWriter.endEntry(); - innerMapWriter.endMap(); - mapWriter.endEntry(); - mapWriter.endMap(); - } - - from.setValueCount(COUNT); - - // copy values - FieldReader in = from.getReader(); - FieldWriter out = to.getWriter(); - for (int i = 0; i < COUNT; i++) { - in.setPosition(i); - out.setPosition(i); - ComplexCopier.copy(in, out); - } - to.setValueCount(COUNT); - - // validate equals - assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java deleted file mode 100644 index 19b26b6d0ed07..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ /dev/null @@ -1,732 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.impl; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.charset.StandardCharsets; -import java.util.Objects; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.DirtyRootAllocator; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.holders.DurationHolder; -import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestPromotableWriter { - private static final String EMPTY_SCHEMA_PATH = ""; - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testPromoteToUnion() throws Exception { - - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - - container.allocateNew(); - - writer.start(); - - writer.setPosition(0); - writer.bit("A").writeBit(0); - - writer.setPosition(1); - writer.bit("A").writeBit(1); - - writer.decimal("dec", 10, 10); - - writer.setPosition(2); - writer.integer("A").writeInt(10); - - // we don't write anything in 3 - - writer.setPosition(4); - writer.integer("A").writeInt(100); - - writer.setPosition(5); - writer.timeStampMilliTZ("A").writeTimeStampMilliTZ(123123); - - // Also try the holder version for timeStampMilliTZ - writer.setPosition(6); - TimeStampMilliTZHolder tsmtzHolder = new TimeStampMilliTZHolder(); - // This has to be UTC since the vector above was initialized using the non holder - // version that defaults to UTC. - tsmtzHolder.timezone = "UTC"; - tsmtzHolder.value = 12345L; - writer.timeStampMilliTZ("A").write(tsmtzHolder); - - writer.setPosition(7); - DurationHolder durationHolder = new DurationHolder(); - durationHolder.unit = TimeUnit.SECOND; - durationHolder.value = 444413; - writer.duration("A").write(durationHolder); - - writer.setPosition(8); - ArrowBuf buf = allocator.buffer(4); - buf.setInt(0, 18978); - FixedSizeBinaryHolder binHolder = new FixedSizeBinaryHolder(); - binHolder.byteWidth = 4; - binHolder.buffer = buf; - writer.fixedSizeBinary("A", 4).write(binHolder); - - writer.end(); - - container.setValueCount(9); - - final UnionVector uv = v.getChild("A", UnionVector.class); - - assertFalse(uv.isNull(0), "0 shouldn't be null"); - assertEquals(false, uv.getObject(0)); - - assertFalse(uv.isNull(1), "1 shouldn't be null"); - assertEquals(true, uv.getObject(1)); - - assertFalse(uv.isNull(2), "2 shouldn't be null"); - assertEquals(10, uv.getObject(2)); - - assertNull(uv.getObject(3), "3 should be null"); - - assertFalse(uv.isNull(4), "4 shouldn't be null"); - assertEquals(100, uv.getObject(4)); - - assertFalse(uv.isNull(5), "5 shouldn't be null"); - assertEquals(123123L, uv.getObject(5)); - - assertFalse(uv.isNull(6), "6 shouldn't be null"); - NullableTimeStampMilliTZHolder readBackHolder = new NullableTimeStampMilliTZHolder(); - uv.getTimeStampMilliTZVector().get(6, readBackHolder); - assertEquals(12345L, readBackHolder.value); - assertEquals("UTC", readBackHolder.timezone); - - assertFalse(uv.isNull(7), "7 shouldn't be null"); - assertEquals(444413L, ((java.time.Duration) uv.getObject(7)).getSeconds()); - - assertFalse(uv.isNull(8), "8 shouldn't be null"); - assertEquals( - 18978, - ByteBuffer.wrap(uv.getFixedSizeBinaryVector().get(8)) - .order(ByteOrder.nativeOrder()) - .getInt()); - - container.clear(); - container.allocateNew(); - - ComplexWriterImpl newWriter = new ComplexWriterImpl(EMPTY_SCHEMA_PATH, container); - - StructWriter newStructWriter = newWriter.rootAsStruct(); - - newStructWriter.start(); - - newStructWriter.setPosition(2); - newStructWriter.integer("A").writeInt(10); - - Field childField1 = container.getField().getChildren().get(0).getChildren().get(0); - Field childField2 = container.getField().getChildren().get(0).getChildren().get(1); - assertEquals( - ArrowTypeID.Union, - childField1.getType().getTypeID(), - "Child field should be union type: " + childField1.getName()); - assertEquals( - ArrowTypeID.Decimal, - childField2.getType().getTypeID(), - "Child field should be decimal type: " + childField2.getName()); - - buf.close(); - } - } - - @Test - public void testNoPromoteFloat4ToUnionWithNull() throws Exception { - - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - - container.allocateNew(); - - writer.start(); - writer.list("list").startList(); - writer.list("list").endList(); - writer.end(); - - FieldType childTypeOfListInContainer = - container - .getField() - .getChildren() - .get(0) - .getChildren() - .get(0) - .getChildren() - .get(0) - .getFieldType(); - - // create a listvector with same type as list in container to, say, hold a copy - // this will be a nullvector - ListVector lv = ListVector.empty("name", allocator); - lv.addOrGetVector(childTypeOfListInContainer); - assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.NULL.getType()); - assertEquals( - lv.getChildrenFromFields().get(0).getMinorType().getType(), - Types.MinorType.NULL.getType()); - - writer.start(); - writer.list("list").startList(); - writer.list("list").float4().writeFloat4(1.36f); - writer.list("list").endList(); - writer.end(); - - container.setValueCount(2); - - childTypeOfListInContainer = - container - .getField() - .getChildren() - .get(0) - .getChildren() - .get(0) - .getChildren() - .get(0) - .getFieldType(); - - // repeat but now the type in container has been changed from null to float - // we expect same behaviour from listvector - lv.addOrGetVector(childTypeOfListInContainer); - assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.FLOAT4.getType()); - assertEquals( - lv.getChildrenFromFields().get(0).getMinorType().getType(), - Types.MinorType.FLOAT4.getType()); - - lv.close(); - } - } - - @Test - public void testNoPromoteTimeStampMilliTZToUnionWithNull() throws Exception { - - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - - container.allocateNew(); - - writer.start(); - writer.list("list").startList(); - writer.list("list").endList(); - writer.end(); - - FieldType childTypeOfListInContainer = - container - .getField() - .getChildren() - .get(0) - .getChildren() - .get(0) - .getChildren() - .get(0) - .getFieldType(); - - // create a listvector with same type as list in container to, say, hold a copy - // this will be a nullvector - ListVector lv = ListVector.empty("name", allocator); - lv.addOrGetVector(childTypeOfListInContainer); - assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.NULL.getType()); - assertEquals( - lv.getChildrenFromFields().get(0).getMinorType().getType(), - Types.MinorType.NULL.getType()); - - writer.start(); - writer.list("list").startList(); - TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); - holder.value = 12341234L; - holder.timezone = "FakeTimeZone"; - writer.list("list").timeStampMilliTZ().write(holder); - - // Test that we get an exception when the timezone doesn't match - holder.timezone = "SomeTimeZone"; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, - () -> writer.list("list").timeStampMilliTZ().write(holder)); - assertEquals( - "holder.timezone: SomeTimeZone not equal to vector timezone: FakeTimeZone", - ex.getMessage()); - - writer.list("list").endList(); - writer.end(); - - container.setValueCount(2); - - childTypeOfListInContainer = - container - .getField() - .getChildren() - .get(0) - .getChildren() - .get(0) - .getChildren() - .get(0) - .getFieldType(); - - // repeat but now the type in container has been changed from null to float - // we expect same behaviour from listvector - lv.addOrGetVector(childTypeOfListInContainer); - assertEquals( - childTypeOfListInContainer.getType(), - new ArrowType.Timestamp(TimeUnit.MILLISECOND, "FakeTimeZone")); - assertEquals( - lv.getChildrenFromFields().get(0).getField().getType(), - new ArrowType.Timestamp(TimeUnit.MILLISECOND, "FakeTimeZone")); - - lv.close(); - } - } - - @Test - public void testNoPromoteDurationToUnionWithNull() throws Exception { - - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - - container.allocateNew(); - - writer.start(); - writer.list("list").startList(); - writer.list("list").endList(); - writer.end(); - - FieldType childTypeOfListInContainer = - container - .getField() - .getChildren() - .get(0) - .getChildren() - .get(0) - .getChildren() - .get(0) - .getFieldType(); - - // create a listvector with same type as list in container to, say, hold a copy - // this will be a nullvector - ListVector lv = ListVector.empty("name", allocator); - lv.addOrGetVector(childTypeOfListInContainer); - assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.NULL.getType()); - assertEquals( - lv.getChildrenFromFields().get(0).getMinorType().getType(), - Types.MinorType.NULL.getType()); - - writer.start(); - writer.list("list").startList(); - DurationHolder holder = new DurationHolder(); - holder.unit = TimeUnit.NANOSECOND; - holder.value = 567657L; - writer.list("list").duration().write(holder); - - // Test that we get an exception when the unit doesn't match - holder.unit = TimeUnit.MICROSECOND; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, () -> writer.list("list").duration().write(holder)); - assertEquals( - "holder.unit: MICROSECOND not equal to vector unit: NANOSECOND", ex.getMessage()); - - writer.list("list").endList(); - writer.end(); - - container.setValueCount(2); - - childTypeOfListInContainer = - container - .getField() - .getChildren() - .get(0) - .getChildren() - .get(0) - .getChildren() - .get(0) - .getFieldType(); - - // repeat but now the type in container has been changed from null to float - // we expect same behaviour from listvector - lv.addOrGetVector(childTypeOfListInContainer); - assertEquals( - childTypeOfListInContainer.getType(), new ArrowType.Duration(TimeUnit.NANOSECOND)); - assertEquals( - lv.getChildrenFromFields().get(0).getField().getType(), - new ArrowType.Duration(TimeUnit.NANOSECOND)); - - lv.close(); - } - } - - @Test - public void testNoPromoteFixedSizeBinaryToUnionWithNull() throws Exception { - - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - - container.allocateNew(); - - writer.start(); - writer.list("list").startList(); - writer.list("list").endList(); - writer.end(); - - FieldType childTypeOfListInContainer = - container - .getField() - .getChildren() - .get(0) - .getChildren() - .get(0) - .getChildren() - .get(0) - .getFieldType(); - - // create a listvector with same type as list in container to, say, hold a copy - // this will be a nullvector - ListVector lv = ListVector.empty("name", allocator); - lv.addOrGetVector(childTypeOfListInContainer); - assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.NULL.getType()); - assertEquals( - lv.getChildrenFromFields().get(0).getMinorType().getType(), - Types.MinorType.NULL.getType()); - - writer.start(); - writer.list("list").startList(); - ArrowBuf buf = allocator.buffer(4); - buf.setInt(0, 22222); - FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder(); - holder.byteWidth = 4; - holder.buffer = buf; - writer.list("list").fixedSizeBinary().write(holder); - - // Test that we get an exception when the unit doesn't match - holder.byteWidth = 7; - IllegalArgumentException ex = - assertThrows( - IllegalArgumentException.class, - () -> writer.list("list").fixedSizeBinary().write(holder)); - assertEquals("holder.byteWidth: 7 not equal to vector byteWidth: 4", ex.getMessage()); - - writer.list("list").endList(); - writer.end(); - - container.setValueCount(2); - - childTypeOfListInContainer = - container - .getField() - .getChildren() - .get(0) - .getChildren() - .get(0) - .getChildren() - .get(0) - .getFieldType(); - - // repeat but now the type in container has been changed from null to float - // we expect same behaviour from listvector - lv.addOrGetVector(childTypeOfListInContainer); - assertEquals(childTypeOfListInContainer.getType(), new ArrowType.FixedSizeBinary(4)); - assertEquals( - lv.getChildrenFromFields().get(0).getField().getType(), new ArrowType.FixedSizeBinary(4)); - - lv.close(); - buf.close(); - } - } - - @Test - public void testPromoteLargeVarCharHelpersOnStruct() throws Exception { - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - container.allocateNew(); - - writer.start(); - writer.setPosition(0); - writer.largeVarChar("c").writeLargeVarChar(new Text("foo")); - writer.setPosition(1); - writer.largeVarChar("c").writeLargeVarChar("foo2"); - writer.end(); - - final LargeVarCharVector uv = v.getChild("c", LargeVarCharVector.class); - assertEquals("foo", Objects.requireNonNull(uv.getObject(0)).toString()); - assertEquals("foo2", Objects.requireNonNull(uv.getObject(1)).toString()); - } - } - - @Test - public void testPromoteVarCharHelpersOnStruct() throws Exception { - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - container.allocateNew(); - - writer.start(); - writer.setPosition(0); - writer.varChar("c").writeVarChar(new Text("foo")); - writer.setPosition(1); - writer.varChar("c").writeVarChar("foo2"); - writer.end(); - - final VarCharVector uv = v.getChild("c", VarCharVector.class); - assertEquals("foo", Objects.requireNonNull(uv.getObject(0)).toString()); - assertEquals("foo2", Objects.requireNonNull(uv.getObject(1)).toString()); - } - } - - @Test - public void testPromoteVarCharHelpersDirect() throws Exception { - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - container.allocateNew(); - - writer.start(); - writer.setPosition(0); - writer.writeVarChar(new Text("foo")); - writer.setPosition(1); - writer.writeVarChar("foo2"); - writer.end(); - - // The "test" vector in the parent container should have been replaced with a UnionVector. - UnionVector promotedVector = container.getChild("test", UnionVector.class); - VarCharVector vector = promotedVector.getVarCharVector(); - assertEquals("foo", Objects.requireNonNull(vector.getObject(0)).toString()); - assertEquals("foo2", Objects.requireNonNull(vector.getObject(1)).toString()); - } - } - - @Test - public void testPromoteLargeVarCharHelpersDirect() throws Exception { - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - container.allocateNew(); - - writer.start(); - writer.setPosition(0); - writer.writeLargeVarChar(new Text("foo")); - writer.setPosition(1); - writer.writeLargeVarChar("foo2"); - writer.end(); - - // The "test" vector in the parent container should have been replaced with a UnionVector. - UnionVector promotedVector = container.getChild("test", UnionVector.class); - LargeVarCharVector vector = promotedVector.getLargeVarCharVector(); - assertEquals("foo", Objects.requireNonNull(vector.getObject(0)).toString()); - assertEquals("foo2", Objects.requireNonNull(vector.getObject(1)).toString()); - } - } - - @Test - public void testPromoteVarBinaryHelpersOnStruct() throws Exception { - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - container.allocateNew(); - - writer.start(); - writer.setPosition(0); - writer.varBinary("c").writeVarBinary("row1".getBytes(StandardCharsets.UTF_8)); - writer.setPosition(1); - writer - .varBinary("c") - .writeVarBinary( - "row2".getBytes(StandardCharsets.UTF_8), - 0, - "row2".getBytes(StandardCharsets.UTF_8).length); - writer.setPosition(2); - writer - .varBinary("c") - .writeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8))); - writer.setPosition(3); - writer - .varBinary("c") - .writeVarBinary( - ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), - 0, - "row4".getBytes(StandardCharsets.UTF_8).length); - writer.end(); - - final VarBinaryVector uv = v.getChild("c", VarBinaryVector.class); - assertEquals("row1", new String(Objects.requireNonNull(uv.get(0)), StandardCharsets.UTF_8)); - assertEquals("row2", new String(Objects.requireNonNull(uv.get(1)), StandardCharsets.UTF_8)); - assertEquals("row3", new String(Objects.requireNonNull(uv.get(2)), StandardCharsets.UTF_8)); - assertEquals("row4", new String(Objects.requireNonNull(uv.get(3)), StandardCharsets.UTF_8)); - } - } - - @Test - public void testPromoteVarBinaryHelpersDirect() throws Exception { - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - container.allocateNew(); - - writer.start(); - writer.setPosition(0); - writer.writeVarBinary("row1".getBytes(StandardCharsets.UTF_8)); - writer.setPosition(1); - writer.writeVarBinary( - "row2".getBytes(StandardCharsets.UTF_8), - 0, - "row2".getBytes(StandardCharsets.UTF_8).length); - writer.setPosition(2); - writer.writeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8))); - writer.setPosition(3); - writer.writeVarBinary( - ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), - 0, - "row4".getBytes(StandardCharsets.UTF_8).length); - writer.end(); - - // The "test" vector in the parent container should have been replaced with a UnionVector. - UnionVector promotedVector = container.getChild("test", UnionVector.class); - VarBinaryVector uv = promotedVector.getVarBinaryVector(); - assertEquals("row1", new String(Objects.requireNonNull(uv.get(0)), StandardCharsets.UTF_8)); - assertEquals("row2", new String(Objects.requireNonNull(uv.get(1)), StandardCharsets.UTF_8)); - assertEquals("row3", new String(Objects.requireNonNull(uv.get(2)), StandardCharsets.UTF_8)); - assertEquals("row4", new String(Objects.requireNonNull(uv.get(3)), StandardCharsets.UTF_8)); - } - } - - @Test - public void testPromoteLargeVarBinaryHelpersOnStruct() throws Exception { - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - container.allocateNew(); - - writer.start(); - writer.setPosition(0); - writer.largeVarBinary("c").writeLargeVarBinary("row1".getBytes(StandardCharsets.UTF_8)); - writer.setPosition(1); - writer - .largeVarBinary("c") - .writeLargeVarBinary( - "row2".getBytes(StandardCharsets.UTF_8), - 0, - "row2".getBytes(StandardCharsets.UTF_8).length); - writer.setPosition(2); - writer - .largeVarBinary("c") - .writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8))); - writer.setPosition(3); - writer - .largeVarBinary("c") - .writeLargeVarBinary( - ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), - 0, - "row4".getBytes(StandardCharsets.UTF_8).length); - writer.end(); - - final LargeVarBinaryVector uv = v.getChild("c", LargeVarBinaryVector.class); - assertEquals("row1", new String(Objects.requireNonNull(uv.get(0)), StandardCharsets.UTF_8)); - assertEquals("row2", new String(Objects.requireNonNull(uv.get(1)), StandardCharsets.UTF_8)); - assertEquals("row3", new String(Objects.requireNonNull(uv.get(2)), StandardCharsets.UTF_8)); - assertEquals("row4", new String(Objects.requireNonNull(uv.get(3)), StandardCharsets.UTF_8)); - } - } - - @Test - public void testPromoteLargeVarBinaryHelpersDirect() throws Exception { - try (final NonNullableStructVector container = - NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); - final StructVector v = container.addOrGetStruct("test"); - final PromotableWriter writer = new PromotableWriter(v, container)) { - container.allocateNew(); - - writer.start(); - writer.setPosition(0); - writer.writeLargeVarBinary("row1".getBytes(StandardCharsets.UTF_8)); - writer.setPosition(1); - writer.writeLargeVarBinary( - "row2".getBytes(StandardCharsets.UTF_8), - 0, - "row2".getBytes(StandardCharsets.UTF_8).length); - writer.setPosition(2); - writer.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8))); - writer.setPosition(3); - writer.writeLargeVarBinary( - ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), - 0, - "row4".getBytes(StandardCharsets.UTF_8).length); - writer.end(); - - // The "test" vector in the parent container should have been replaced with a UnionVector. - UnionVector promotedVector = container.getChild("test", UnionVector.class); - LargeVarBinaryVector uv = promotedVector.getLargeVarBinaryVector(); - assertEquals("row1", new String(Objects.requireNonNull(uv.get(0)), StandardCharsets.UTF_8)); - assertEquals("row2", new String(Objects.requireNonNull(uv.get(1)), StandardCharsets.UTF_8)); - assertEquals("row3", new String(Objects.requireNonNull(uv.get(2)), StandardCharsets.UTF_8)); - assertEquals("row4", new String(Objects.requireNonNull(uv.get(3)), StandardCharsets.UTF_8)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java deleted file mode 100644 index 2745386db4e22..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ /dev/null @@ -1,2492 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.writer; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.SchemaChangeCallBack; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.ViewVarCharVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; -import org.apache.arrow.vector.complex.impl.NullableStructReaderImpl; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl; -import org.apache.arrow.vector.complex.impl.SingleStructWriter; -import org.apache.arrow.vector.complex.impl.UnionListReader; -import org.apache.arrow.vector.complex.impl.UnionListViewReader; -import org.apache.arrow.vector.complex.impl.UnionListViewWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.impl.UnionMapReader; -import org.apache.arrow.vector.complex.impl.UnionReader; -import org.apache.arrow.vector.complex.impl.UnionWriter; -import org.apache.arrow.vector.complex.reader.BaseReader.StructReader; -import org.apache.arrow.vector.complex.reader.BigIntReader; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.reader.Float4Reader; -import org.apache.arrow.vector.complex.reader.Float8Reader; -import org.apache.arrow.vector.complex.reader.IntReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.holders.DecimalHolder; -import org.apache.arrow.vector.holders.DurationHolder; -import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; -import org.apache.arrow.vector.holders.IntHolder; -import org.apache.arrow.vector.holders.NullableDurationHolder; -import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; -import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; -import org.apache.arrow.vector.types.pojo.ArrowType.Union; -import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.DecimalUtility; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.JsonStringHashMap; -import org.apache.arrow.vector.util.Text; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestComplexWriter { - - private BufferAllocator allocator; - - private static final int COUNT = 100; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - /* Test Utils */ - - private void checkNullableStruct(NonNullableStructVector structVector) { - StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root"); - for (int i = 0; i < COUNT; i++) { - rootReader.setPosition(i); - assertTrue(rootReader.isSet(), "index is set: " + i); - FieldReader struct = rootReader.reader("struct"); - if (i % 2 == 0) { - assertTrue(struct.isSet(), "index is set: " + i); - assertNotNull(struct.readObject(), "index is set: " + i); - assertEquals(i, struct.reader("nested").readLong().longValue()); - } else { - assertFalse(struct.isSet(), "index is not set: " + i); - assertNull(struct.readObject(), "index is not set: " + i); - } - } - } - - private void createListTypeVectorWithScalarType(FieldWriter writer) { - for (int i = 0; i < COUNT; i++) { - writer.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - writer.writeInt(j); - } else { - IntHolder holder = new IntHolder(); - holder.value = j; - writer.write(holder); - } - } - writer.endList(); - } - } - - private void checkListTypeVectorWithScalarType(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - assertEquals(j, reader.reader().readInteger().intValue()); - } - } - } - - private void createListTypeVectorWithScalarNull(FieldWriter writer) { - for (int i = 0; i < COUNT; i++) { - writer.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - writer.writeNull(); - } else { - IntHolder holder = new IntHolder(); - holder.value = j; - writer.write(holder); - } - } - writer.endList(); - } - } - - private void checkListTypeVectorWithScalarNull(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - if (j % 2 == 0) { - assertFalse(reader.reader().isSet(), "index is set: " + j); - } else { - assertTrue(reader.reader().isSet(), "index is not set: " + j); - assertEquals(j, reader.reader().readInteger().intValue()); - } - } - } - } - - private void createListTypeVectorWithDecimalType(FieldWriter writer, DecimalHolder holder) { - holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH); - ArrowType arrowType = new ArrowType.Decimal(10, 0, 128); - for (int i = 0; i < COUNT; i++) { - writer.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 4 == 0) { - writer.writeDecimal(new BigDecimal(j)); - } else if (j % 4 == 1) { - DecimalUtility.writeBigDecimalToArrowBuf( - new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH); - holder.start = 0; - holder.scale = 0; - holder.precision = 10; - writer.write(holder); - } else if (j % 4 == 2) { - DecimalUtility.writeBigDecimalToArrowBuf( - new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH); - writer.writeDecimal(0, holder.buffer, arrowType); - } else { - byte[] value = BigDecimal.valueOf(j).unscaledValue().toByteArray(); - writer.writeBigEndianBytesToDecimal(value, arrowType); - } - } - writer.endList(); - } - } - - private void checkListTypeVectorWithDecimalType(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - Object expected = new BigDecimal(j); - Object actual = reader.reader().readBigDecimal(); - assertEquals(expected, actual); - } - } - } - - private void createListTypeVectorWithTimeStampMilliTZType(FieldWriter writer) { - for (int i = 0; i < COUNT; i++) { - writer.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - writer.writeNull(); - } else { - TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); - holder.timezone = "FakeTimeZone"; - holder.value = j; - writer.timeStampMilliTZ().write(holder); - } - } - writer.endList(); - } - } - - private void checkListTypeVectorWithTimeStampMilliTZType(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - if (j % 2 == 0) { - assertFalse(reader.reader().isSet(), "index is set: " + j); - } else { - NullableTimeStampMilliTZHolder actual = new NullableTimeStampMilliTZHolder(); - reader.reader().read(actual); - assertEquals(j, actual.value); - assertEquals("FakeTimeZone", actual.timezone); - } - } - } - } - - private void createNullsWithListWriters(FieldWriter writer) { - for (int i = 0; i < COUNT; i++) { - writer.setPosition(i); - if (i % 2 == 0) { - writer.startList(); - if (i % 4 == 0) { - writer.integer().writeNull(); - } else { - writer.integer().writeInt(i); - writer.integer().writeInt(i * 2); - } - writer.endList(); - } else { - writer.writeNull(); - } - } - } - - private void checkNullsWithListWriters(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - if (i % 2 == 0) { - assertTrue(reader.isSet()); - reader.next(); - if (i % 4 == 0) { - assertNull(reader.reader().readInteger()); - } else { - assertEquals(i, reader.reader().readInteger().intValue()); - reader.next(); - assertEquals(i * 2, reader.reader().readInteger().intValue()); - } - } else { - assertFalse(reader.isSet()); - } - } - } - - /* Test Cases */ - - @Test - public void simpleNestedTypes() { - NonNullableStructVector parent = populateStructVector(null); - StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); - for (int i = 0; i < COUNT; i++) { - rootReader.setPosition(i); - assertEquals(i, rootReader.reader("int").readInteger().intValue()); - assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); - } - - parent.close(); - } - - @Test - public void transferPairSchemaChange() { - SchemaChangeCallBack callBack1 = new SchemaChangeCallBack(); - try (NonNullableStructVector parent = populateStructVector(callBack1)) { - - ComplexWriter writer = new ComplexWriterImpl("newWriter", parent); - StructWriter rootWriter = writer.rootAsStruct(); - IntWriter intWriter = rootWriter.integer("newInt"); - intWriter.writeInt(1); - writer.setValueCount(1); - - assertTrue(callBack1.getSchemaChangedAndReset()); - // The second vector should not have registered a schema change - assertFalse(callBack1.getSchemaChangedAndReset()); - } - } - - private NonNullableStructVector populateStructVector(CallBack callBack) { - NonNullableStructVector parent = - new NonNullableStructVector( - "parent", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack); - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - IntWriter intWriter = rootWriter.integer("int"); - BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); - for (int i = 0; i < COUNT; i++) { - rootWriter.start(); - intWriter.writeInt(i); - bigIntWriter.writeBigInt(i); - rootWriter.end(); - } - writer.setValueCount(COUNT); - return parent; - } - - @Test - public void nullableStruct() { - try (NonNullableStructVector structVector = - NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", structVector); - StructWriter rootWriter = writer.rootAsStruct(); - for (int i = 0; i < COUNT; i++) { - rootWriter.start(); - if (i % 2 == 0) { - StructWriter structWriter = rootWriter.struct("struct"); - structWriter.setPosition(i); - structWriter.start(); - structWriter.bigInt("nested").writeBigInt(i); - structWriter.end(); - } - rootWriter.end(); - } - writer.setValueCount(COUNT); - checkNullableStruct(structVector); - } - } - - /** - * This test is similar to {@link #nullableStruct()} ()} but we get the inner struct writer once - * at the beginning. - */ - @Test - public void nullableStruct2() { - try (NonNullableStructVector structVector = - NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", structVector); - StructWriter rootWriter = writer.rootAsStruct(); - StructWriter structWriter = rootWriter.struct("struct"); - - for (int i = 0; i < COUNT; i++) { - rootWriter.start(); - if (i % 2 == 0) { - structWriter.setPosition(i); - structWriter.start(); - structWriter.bigInt("nested").writeBigInt(i); - structWriter.end(); - } - rootWriter.end(); - } - writer.setValueCount(COUNT); - checkNullableStruct(structVector); - } - } - - @Test - public void testList() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - - rootWriter.start(); - rootWriter.bigInt("int").writeBigInt(0); - rootWriter.list("list").startList(); - rootWriter.list("list").bigInt().writeBigInt(0); - rootWriter.list("list").endList(); - rootWriter.end(); - - rootWriter.start(); - rootWriter.bigInt("int").writeBigInt(1); - rootWriter.end(); - - writer.setValueCount(2); - - StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); - - rootReader.setPosition(0); - assertTrue(rootReader.reader("list").isSet(), "row 0 list is not set"); - assertEquals(Long.valueOf(0), rootReader.reader("list").reader().readLong()); - - rootReader.setPosition(1); - assertFalse(rootReader.reader("list").isSet(), "row 1 list is set"); - } - } - - private void createListTypeVectorWithDurationType(FieldWriter writer) { - for (int i = 0; i < COUNT; i++) { - writer.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - writer.writeNull(); - } else { - DurationHolder holder = new DurationHolder(); - holder.unit = TimeUnit.MICROSECOND; - holder.value = j; - writer.duration().write(holder); - } - } - writer.endList(); - } - } - - private void checkListTypeVectorWithDurationType(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - if (j % 2 == 0) { - assertFalse(reader.reader().isSet(), "index is set: " + j); - } else { - NullableDurationHolder actual = new NullableDurationHolder(); - reader.reader().read(actual); - assertEquals(TimeUnit.MICROSECOND, actual.unit); - assertEquals(j, actual.value); - } - } - } - } - - private void createScalarTypeVectorWithNullableType(FieldWriter writer) { - for (int i = 0; i < COUNT; i++) { - if (i % 2 == 0) { - writer.setPosition(i); - writer.startList(); - for (int j = 0; j < i % 7; j++) { - writer.writeInt(j); - } - writer.endList(); - } - } - } - - private void checkScalarTypeVectorWithNullableType(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - if (i % 2 == 0) { - assertTrue(reader.isSet(), "index is set: " + i); - assertEquals(i % 7, ((List) reader.readObject()).size(), "correct length at: " + i); - } else { - assertFalse(reader.isSet(), "index is not set: " + i); - assertNull(reader.readObject(), "index is not set: " + i); - } - } - } - - private void createListTypeVectorWithStructType( - FieldWriter fieldWriter, StructWriter structWriter) { - for (int i = 0; i < COUNT; i++) { - fieldWriter.startList(); - for (int j = 0; j < i % 7; j++) { - structWriter.start(); - structWriter.integer("int").writeInt(j); - structWriter.bigInt("bigInt").writeBigInt(j); - structWriter.end(); - } - fieldWriter.endList(); - } - } - - private void checkListTypeVectorWithStructType(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - assertEquals(j, reader.reader().reader("int").readInteger().intValue(), "record: " + i); - assertEquals(j, reader.reader().reader("bigInt").readLong().longValue()); - } - } - } - - private void checkListOfListTypes(final FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - FieldReader innerListReader = reader.reader(); - for (int k = 0; k < i % 13; k++) { - innerListReader.next(); - assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); - } - } - } - } - - private void checkUnionListType(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - FieldReader innerListReader = reader.reader(); - for (int k = 0; k < i % 13; k++) { - innerListReader.next(); - if (k % 2 == 0) { - assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); - } else { - assertEquals(k, innerListReader.reader().readLong().longValue(), "record: " + i); - } - } - } - } - } - - private static void createListTypeVectorWithMapType(FieldWriter writer) { - MapWriter innerMapWriter = writer.map(true); - for (int i = 0; i < COUNT; i++) { - writer.startList(); - for (int j = 0; j < i % 7; j++) { - innerMapWriter.startMap(); - for (int k = 0; k < i % 13; k++) { - innerMapWriter.startEntry(); - innerMapWriter.key().integer().writeInt(k); - if (k % 2 == 0) { - innerMapWriter.value().bigInt().writeBigInt(k); - } - innerMapWriter.endEntry(); - } - innerMapWriter.endMap(); - } - writer.endList(); - } - } - - private void checkListTypeMap(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - UnionMapReader mapReader = (UnionMapReader) reader.reader(); - for (int k = 0; k < i % 13; k++) { - mapReader.next(); - assertEquals(k, mapReader.key().readInteger().intValue(), "record key: " + i); - if (k % 2 == 0) { - assertEquals(k, mapReader.value().readLong().longValue(), "record value: " + i); - } else { - assertNull(mapReader.value().readLong(), "record value: " + i); - } - } - } - } - } - - /* Test Cases */ - - private void createListTypeVectorWithFixedSizeBinaryType( - FieldWriter writer, List buffers) { - for (int i = 0; i < COUNT; i++) { - writer.startList(); - for (int j = 0; j < i % 7; j++) { - if (j % 2 == 0) { - writer.writeNull(); - } else { - ArrowBuf buf = allocator.buffer(4); - buf.setInt(0, j); - FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder(); - holder.byteWidth = 4; - holder.buffer = buf; - writer.fixedSizeBinary().write(holder); - buffers.add(buf); - } - } - writer.endList(); - } - } - - private void checkListTypeVectorWithFixedSizeBinaryType(FieldReader reader) { - for (int i = 0; i < COUNT; i++) { - reader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - reader.next(); - if (j % 2 == 0) { - assertFalse(reader.reader().isSet(), "index is set: " + j); - } else { - NullableFixedSizeBinaryHolder actual = new NullableFixedSizeBinaryHolder(); - reader.reader().read(actual); - assertEquals(j, actual.buffer.getInt(0)); - assertEquals(4, actual.byteWidth); - } - } - } - } - - @Test - public void listScalarType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - createListTypeVectorWithScalarType(listWriter); - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - // validate - checkListTypeVectorWithScalarType(listReader); - } - } - - @Test - public void listViewScalarType() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - createListTypeVectorWithScalarType(listViewWriter); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - // validate - checkListTypeVectorWithScalarType(listViewReader); - } - } - - @Test - public void testListScalarNull() { - /* Write to an integer list vector - * each list of size 8 - * and having its data values alternating between null and a non-null. - * Read and verify - */ - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - createListTypeVectorWithScalarNull(listWriter); - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListTypeVectorWithScalarNull(listReader); - } - } - - @Test - public void testListViewScalarNull() { - /* Write to an integer list vector - * each list of size 8 - * and having its data values alternating between null and a non-null. - * Read and verify - */ - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - createListTypeVectorWithScalarNull(listViewWriter); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - checkListTypeVectorWithScalarNull(listViewReader); - } - } - - @Test - public void listDecimalType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - DecimalHolder holder = new DecimalHolder(); - createListTypeVectorWithDecimalType(listWriter, holder); - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListTypeVectorWithDecimalType(listReader); - holder.buffer.close(); - } - } - - @Test - public void listViewDecimalType() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - DecimalHolder holder = new DecimalHolder(); - createListTypeVectorWithDecimalType(listViewWriter, holder); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - checkListTypeVectorWithDecimalType(listViewReader); - holder.buffer.close(); - } - } - - @Test - public void listTimeStampMilliTZType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - createListTypeVectorWithTimeStampMilliTZType(listWriter); - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListTypeVectorWithTimeStampMilliTZType(listReader); - } - } - - @Test - public void listViewTimeStampMilliTZType() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - createListTypeVectorWithTimeStampMilliTZType(listViewWriter); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - checkListTypeVectorWithTimeStampMilliTZType(listViewReader); - } - } - - @Test - public void listDurationType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - createListTypeVectorWithDurationType(listWriter); - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListTypeVectorWithDurationType(listReader); - } - } - - @Test - public void listViewDurationType() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - createListTypeVectorWithDurationType(listViewWriter); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listReader = new UnionListViewReader(listViewVector); - checkListTypeVectorWithDurationType(listReader); - } - } - - @Test - public void listFixedSizeBinaryType() throws Exception { - List buffers = new ArrayList<>(); - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - createListTypeVectorWithFixedSizeBinaryType(listWriter, buffers); - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListTypeVectorWithFixedSizeBinaryType(listReader); - } - AutoCloseables.close(buffers); - } - - @Test - public void listViewFixedSizeBinaryType() throws Exception { - List buffers = new ArrayList<>(); - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - createListTypeVectorWithFixedSizeBinaryType(listViewWriter, buffers); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listReader = new UnionListViewReader(listViewVector); - checkListTypeVectorWithFixedSizeBinaryType(listReader); - } - AutoCloseables.close(buffers); - } - - @Test - public void listScalarTypeNullable() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - createScalarTypeVectorWithNullableType(listWriter); - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkScalarTypeVectorWithNullableType(listReader); - } - } - - @Test - public void listViewScalarTypeNullable() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - createScalarTypeVectorWithNullableType(listViewWriter); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listReader = new UnionListViewReader(listViewVector); - checkScalarTypeVectorWithNullableType(listReader); - } - } - - @Test - public void listStructType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listViewWriter = new UnionListWriter(listVector); - StructWriter structWriter = listViewWriter.struct(); - createListTypeVectorWithStructType(listViewWriter, structWriter); - listViewWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListTypeVectorWithStructType(listReader); - } - } - - @Test - public void listViewStructType() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - StructWriter structWriter = listViewWriter.struct(); - createListTypeVectorWithStructType(listViewWriter, structWriter); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listReader = new UnionListViewReader(listViewVector); - checkListTypeVectorWithStructType(listReader); - } - } - - @Test - public void listListType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - ListWriter innerListWriter = listWriter.list(); - innerListWriter.startList(); - for (int k = 0; k < i % 13; k++) { - innerListWriter.integer().writeInt(k); - } - innerListWriter.endList(); - } - listWriter.endList(); - } - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListOfListTypes(listReader); - } - } - - @Test - public void listViewListType() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - for (int i = 0; i < COUNT; i++) { - listViewWriter.startListView(); - for (int j = 0; j < i % 7; j++) { - ListWriter innerListWriter = listViewWriter.listView(); - innerListWriter.startListView(); - for (int k = 0; k < i % 13; k++) { - innerListWriter.integer().writeInt(k); - } - innerListWriter.endListView(); - } - listViewWriter.endListView(); - } - listViewWriter.setValueCount(COUNT); - UnionListViewReader listReader = new UnionListViewReader(listViewVector); - checkListOfListTypes(listReader); - } - } - - /** - * This test is similar to {@link #listListType()} but we get the inner list writer once at the - * beginning. - */ - @Test - public void listListType2() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - ListWriter innerListWriter = listWriter.list(); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - innerListWriter.startList(); - for (int k = 0; k < i % 13; k++) { - innerListWriter.integer().writeInt(k); - } - innerListWriter.endList(); - } - listWriter.endList(); - } - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListOfListTypes(listReader); - } - } - - @Test - public void listViewListType2() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - ListWriter innerListWriter = listViewWriter.list(); - for (int i = 0; i < COUNT; i++) { - listViewWriter.startListView(); - for (int j = 0; j < i % 7; j++) { - innerListWriter.startListView(); - for (int k = 0; k < i % 13; k++) { - innerListWriter.integer().writeInt(k); - } - innerListWriter.endListView(); - } - listViewWriter.endListView(); - } - listViewWriter.setValueCount(COUNT); - UnionListViewReader listReader = new UnionListViewReader(listViewVector); - checkListOfListTypes(listReader); - } - } - - @Test - public void unionListListType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - ListWriter innerListWriter = listWriter.list(); - innerListWriter.startList(); - for (int k = 0; k < i % 13; k++) { - if (k % 2 == 0) { - innerListWriter.integer().writeInt(k); - } else { - innerListWriter.bigInt().writeBigInt(k); - } - } - innerListWriter.endList(); - } - listWriter.endList(); - } - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkUnionListType(listReader); - } - } - - @Test - public void unionListViewListType() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - for (int i = 0; i < COUNT; i++) { - listViewWriter.startList(); - for (int j = 0; j < i % 7; j++) { - ListWriter innerListWriter = listViewWriter.listView(); - innerListWriter.startListView(); - for (int k = 0; k < i % 13; k++) { - if (k % 2 == 0) { - innerListWriter.integer().writeInt(k); - } else { - innerListWriter.bigInt().writeBigInt(k); - } - } - innerListWriter.endListView(); - } - listViewWriter.endListView(); - } - listViewWriter.setValueCount(COUNT); - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - checkUnionListType(listViewReader); - } - } - - /** - * This test is similar to {@link #unionListListType()} but we get the inner list writer once at - * the beginning. - */ - @Test - public void unionListListType2() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - ListWriter innerListWriter = listWriter.listView(); - for (int i = 0; i < COUNT; i++) { - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - innerListWriter.startList(); - for (int k = 0; k < i % 13; k++) { - if (k % 2 == 0) { - innerListWriter.integer().writeInt(k); - } else { - innerListWriter.bigInt().writeBigInt(k); - } - } - innerListWriter.endList(); - } - listWriter.endList(); - } - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkUnionListType(listReader); - } - } - - /** - * This test is similar to {@link #unionListViewListType()} but we get the inner list writer once - * at the beginning. - */ - @Test - public void unionListViewListType2() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - ListWriter innerListWriter = listViewWriter.listView(); - for (int i = 0; i < COUNT; i++) { - listViewWriter.startListView(); - for (int j = 0; j < i % 7; j++) { - innerListWriter.startListView(); - for (int k = 0; k < i % 13; k++) { - if (k % 2 == 0) { - innerListWriter.integer().writeInt(k); - } else { - innerListWriter.bigInt().writeBigInt(k); - } - } - innerListWriter.endListView(); - } - listViewWriter.endListView(); - } - listViewWriter.setValueCount(COUNT); - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - checkUnionListType(listViewReader); - } - } - - @Test - public void testListMapType() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - - createListTypeVectorWithMapType(listWriter); - listWriter.setValueCount(COUNT); - UnionListReader listReader = new UnionListReader(listVector); - checkListTypeMap(listReader); - // Verify that the map vector has keysSorted = true - MapVector mapVector = (MapVector) listVector.getDataVector(); - ArrowType arrowType = mapVector.getField().getFieldType().getType(); - assertTrue(((ArrowType.Map) arrowType).getKeysSorted()); - } - } - - @Test - public void testListViewMapType() { - try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { - listViewVector.allocateNew(); - UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector); - - createListTypeVectorWithMapType(listViewWriter); - listViewWriter.setValueCount(COUNT); - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - checkListTypeMap(listViewReader); - // Verify that the map vector has keysSorted = true - MapVector mapVector = (MapVector) listViewVector.getDataVector(); - ArrowType arrowType = mapVector.getField().getFieldType().getType(); - assertTrue(((ArrowType.Map) arrowType).getKeysSorted()); - } - } - - @Test - public void simpleUnion() throws Exception { - List bufs = new ArrayList(); - UnionVector vector = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null); - UnionWriter unionWriter = new UnionWriter(vector); - unionWriter.allocate(); - for (int i = 0; i < COUNT; i++) { - unionWriter.setPosition(i); - if (i % 5 == 0) { - unionWriter.writeInt(i); - } else if (i % 5 == 1) { - TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); - holder.value = (long) i; - holder.timezone = "AsdfTimeZone"; - unionWriter.write(holder); - } else if (i % 5 == 2) { - DurationHolder holder = new DurationHolder(); - holder.value = (long) i; - holder.unit = TimeUnit.NANOSECOND; - unionWriter.write(holder); - } else if (i % 5 == 3) { - FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder(); - ArrowBuf buf = allocator.buffer(4); - buf.setInt(0, i); - holder.byteWidth = 4; - holder.buffer = buf; - unionWriter.write(holder); - bufs.add(buf); - } else { - unionWriter.writeFloat4((float) i); - } - } - vector.setValueCount(COUNT); - UnionReader unionReader = new UnionReader(vector); - for (int i = 0; i < COUNT; i++) { - unionReader.setPosition(i); - if (i % 5 == 0) { - assertEquals(i, unionReader.readInteger().intValue()); - } else if (i % 5 == 1) { - NullableTimeStampMilliTZHolder holder = new NullableTimeStampMilliTZHolder(); - unionReader.read(holder); - assertEquals(i, holder.value); - assertEquals("AsdfTimeZone", holder.timezone); - } else if (i % 5 == 2) { - NullableDurationHolder holder = new NullableDurationHolder(); - unionReader.read(holder); - assertEquals(i, holder.value); - assertEquals(TimeUnit.NANOSECOND, holder.unit); - } else if (i % 5 == 3) { - NullableFixedSizeBinaryHolder holder = new NullableFixedSizeBinaryHolder(); - unionReader.read(holder); - assertEquals(i, holder.buffer.getInt(0)); - assertEquals(4, holder.byteWidth); - } else { - assertEquals((float) i, unionReader.readFloat(), 1e-12); - } - } - vector.close(); - AutoCloseables.close(bufs); - } - - @Test - public void promotableWriter() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - for (int i = 0; i < 100; i++) { - BigIntWriter bigIntWriter = rootWriter.bigInt("a"); - bigIntWriter.setPosition(i); - bigIntWriter.writeBigInt(i); - } - Field field = parent.getField().getChildren().get(0).getChildren().get(0); - assertEquals("a", field.getName()); - assertEquals(Int.TYPE_TYPE, field.getType().getTypeID()); - Int intType = (Int) field.getType(); - - assertEquals(64, intType.getBitWidth()); - assertTrue(intType.getIsSigned()); - for (int i = 100; i < 200; i++) { - VarCharWriter varCharWriter = rootWriter.varChar("a"); - varCharWriter.setPosition(i); - byte[] bytes = Integer.toString(i).getBytes(StandardCharsets.UTF_8); - ArrowBuf tempBuf = allocator.buffer(bytes.length); - tempBuf.setBytes(0, bytes); - varCharWriter.writeVarChar(0, bytes.length, tempBuf); - tempBuf.close(); - } - field = parent.getField().getChildren().get(0).getChildren().get(0); - assertEquals("a", field.getName()); - assertEquals(Union.TYPE_TYPE, field.getType().getTypeID()); - assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID()); - assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID()); - StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); - for (int i = 0; i < 100; i++) { - rootReader.setPosition(i); - FieldReader reader = rootReader.reader("a"); - Long value = reader.readLong(); - assertNotNull(value, "index: " + i); - assertEquals(i, value.intValue()); - } - for (int i = 100; i < 200; i++) { - rootReader.setPosition(i); - FieldReader reader = rootReader.reader("a"); - Text value = reader.readText(); - assertEquals(Integer.toString(i), value.toString()); - } - } - } - - /** Even without writing to the writer, the union schema is created correctly. */ - @Test - public void promotableWriterSchema() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - rootWriter.bigInt("a"); - rootWriter.varChar("a"); - - Field field = parent.getField().getChildren().get(0).getChildren().get(0); - assertEquals("a", field.getName()); - assertEquals(ArrowTypeID.Union, field.getType().getTypeID()); - - assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID()); - Int intType = (Int) field.getChildren().get(0).getType(); - assertEquals(64, intType.getBitWidth()); - assertTrue(intType.getIsSigned()); - assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID()); - } - } - - private Set getFieldNames(List fields) { - Set fieldNames = new HashSet<>(); - for (Field field : fields) { - fieldNames.add(field.getName()); - if (!field.getChildren().isEmpty()) { - for (String name : getFieldNames(field.getChildren())) { - fieldNames.add(field.getName() + "::" + name); - } - } - } - return fieldNames; - } - - @Test - public void structWriterMixedCaseFieldNames() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - // test case-sensitive StructWriter - ComplexWriter writer = new ComplexWriterImpl("rootCaseSensitive", parent, false, true); - StructWriter rootWriterCaseSensitive = writer.rootAsStruct(); - rootWriterCaseSensitive.bigInt("int_field"); - rootWriterCaseSensitive.bigInt("Int_Field"); - rootWriterCaseSensitive.float4("float_field"); - rootWriterCaseSensitive.float4("Float_Field"); - StructWriter structFieldWriterCaseSensitive = rootWriterCaseSensitive.struct("struct_field"); - structFieldWriterCaseSensitive.varChar("char_field"); - structFieldWriterCaseSensitive.varChar("Char_Field"); - ListWriter listFieldWriterCaseSensitive = rootWriterCaseSensitive.list("list_field"); - StructWriter listStructFieldWriterCaseSensitive = listFieldWriterCaseSensitive.struct(); - listStructFieldWriterCaseSensitive.bit("bit_field"); - listStructFieldWriterCaseSensitive.bit("Bit_Field"); - - List fieldsCaseSensitive = parent.getField().getChildren().get(0).getChildren(); - Set fieldNamesCaseSensitive = getFieldNames(fieldsCaseSensitive); - assertEquals(11, fieldNamesCaseSensitive.size()); - assertTrue(fieldNamesCaseSensitive.contains("int_field")); - assertTrue(fieldNamesCaseSensitive.contains("Int_Field")); - assertTrue(fieldNamesCaseSensitive.contains("float_field")); - assertTrue(fieldNamesCaseSensitive.contains("Float_Field")); - assertTrue(fieldNamesCaseSensitive.contains("struct_field")); - assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field")); - assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field")); - assertTrue(fieldNamesCaseSensitive.contains("list_field")); - assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); - assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); - assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field")); - - // test case-insensitive StructWriter - ComplexWriter writerCaseInsensitive = - new ComplexWriterImpl("rootCaseInsensitive", parent, false, false); - StructWriter rootWriterCaseInsensitive = writerCaseInsensitive.rootAsStruct(); - - rootWriterCaseInsensitive.bigInt("int_field"); - rootWriterCaseInsensitive.bigInt("Int_Field"); - rootWriterCaseInsensitive.float4("float_field"); - rootWriterCaseInsensitive.float4("Float_Field"); - StructWriter structFieldWriterCaseInsensitive = - rootWriterCaseInsensitive.struct("struct_field"); - structFieldWriterCaseInsensitive.varChar("char_field"); - structFieldWriterCaseInsensitive.varChar("Char_Field"); - ListWriter listFieldWriterCaseInsensitive = rootWriterCaseInsensitive.list("list_field"); - StructWriter listStructFieldWriterCaseInsensitive = listFieldWriterCaseInsensitive.struct(); - listStructFieldWriterCaseInsensitive.bit("bit_field"); - listStructFieldWriterCaseInsensitive.bit("Bit_Field"); - - List fieldsCaseInsensitive = parent.getField().getChildren().get(1).getChildren(); - Set fieldNamesCaseInsensitive = getFieldNames(fieldsCaseInsensitive); - assertEquals(7, fieldNamesCaseInsensitive.size()); - assertTrue(fieldNamesCaseInsensitive.contains("int_field")); - assertTrue(fieldNamesCaseInsensitive.contains("float_field")); - assertTrue(fieldNamesCaseInsensitive.contains("struct_field")); - assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field")); - assertTrue(fieldNamesCaseSensitive.contains("list_field")); - assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); - assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); - } - } - - @Test - public void timeStampSecWriter() throws Exception { - // test values - final long expectedSecs = 981173106L; - final LocalDateTime expectedSecDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 0); - - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - // write - - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - - { - TimeStampSecWriter timeStampSecWriter = rootWriter.timeStampSec("sec"); - timeStampSecWriter.setPosition(0); - timeStampSecWriter.writeTimeStampSec(expectedSecs); - } - { - TimeStampSecTZWriter timeStampSecTZWriter = rootWriter.timeStampSecTZ("secTZ", "UTC"); - timeStampSecTZWriter.setPosition(1); - timeStampSecTZWriter.writeTimeStampSecTZ(expectedSecs); - } - // schema - List children = parent.getField().getChildren().get(0).getChildren(); - checkTimestampField(children.get(0), "sec"); - checkTimestampTZField(children.get(1), "secTZ", "UTC"); - - // read - StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); - { - FieldReader secReader = rootReader.reader("sec"); - secReader.setPosition(0); - LocalDateTime secDateTime = secReader.readLocalDateTime(); - assertEquals(expectedSecDateTime, secDateTime); - long secLong = secReader.readLong(); - assertEquals(expectedSecs, secLong); - } - { - FieldReader secTZReader = rootReader.reader("secTZ"); - secTZReader.setPosition(1); - long secTZLong = secTZReader.readLong(); - assertEquals(expectedSecs, secTZLong); - } - } - } - - @Test - public void timeStampMilliWriters() throws Exception { - // test values - final long expectedMillis = 981173106123L; - final LocalDateTime expectedMilliDateTime = - LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123 * 1_000_000); - - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator); ) { - // write - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - { - TimeStampMilliWriter timeStampWriter = rootWriter.timeStampMilli("milli"); - timeStampWriter.setPosition(0); - timeStampWriter.writeTimeStampMilli(expectedMillis); - } - String tz = "UTC"; - { - TimeStampMilliTZWriter timeStampTZWriter = rootWriter.timeStampMilliTZ("milliTZ", tz); - timeStampTZWriter.setPosition(0); - timeStampTZWriter.writeTimeStampMilliTZ(expectedMillis); - } - // schema - List children = parent.getField().getChildren().get(0).getChildren(); - checkTimestampField(children.get(0), "milli"); - checkTimestampTZField(children.get(1), "milliTZ", tz); - - // read - StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); - - { - FieldReader milliReader = rootReader.reader("milli"); - milliReader.setPosition(0); - LocalDateTime milliDateTime = milliReader.readLocalDateTime(); - assertEquals(expectedMilliDateTime, milliDateTime); - long milliLong = milliReader.readLong(); - assertEquals(expectedMillis, milliLong); - } - { - FieldReader milliTZReader = rootReader.reader("milliTZ"); - milliTZReader.setPosition(0); - long milliTZLong = milliTZReader.readLong(); - assertEquals(expectedMillis, milliTZLong); - } - } - } - - private void checkTimestampField(Field field, String name) { - assertEquals(name, field.getName()); - assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID()); - } - - private void checkTimestampTZField(Field field, String name, String tz) { - checkTimestampField(field, name); - assertEquals(tz, ((Timestamp) field.getType()).getTimezone()); - } - - @Test - public void timeStampMicroWriters() throws Exception { - // test values - final long expectedMicros = 981173106123456L; - final LocalDateTime expectedMicroDateTime = - LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123456 * 1000); - - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - // write - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - - { - TimeStampMicroWriter timeStampMicroWriter = rootWriter.timeStampMicro("micro"); - timeStampMicroWriter.setPosition(0); - timeStampMicroWriter.writeTimeStampMicro(expectedMicros); - } - String tz = "UTC"; - { - TimeStampMicroTZWriter timeStampMicroWriter = rootWriter.timeStampMicroTZ("microTZ", tz); - timeStampMicroWriter.setPosition(1); - timeStampMicroWriter.writeTimeStampMicroTZ(expectedMicros); - } - - // schema - List children = parent.getField().getChildren().get(0).getChildren(); - checkTimestampField(children.get(0), "micro"); - checkTimestampTZField(children.get(1), "microTZ", tz); - - // read - StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); - { - FieldReader microReader = rootReader.reader("micro"); - microReader.setPosition(0); - LocalDateTime microDateTime = microReader.readLocalDateTime(); - assertEquals(expectedMicroDateTime, microDateTime); - long microLong = microReader.readLong(); - assertEquals(expectedMicros, microLong); - } - { - FieldReader microReader = rootReader.reader("microTZ"); - microReader.setPosition(1); - long microLong = microReader.readLong(); - assertEquals(expectedMicros, microLong); - } - } - } - - @Test - public void timeStampNanoWriters() throws Exception { - // test values - final long expectedNanos = 981173106123456789L; - final LocalDateTime expectedNanoDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123456789); - - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - // write - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - - { - TimeStampNanoWriter timeStampNanoWriter = rootWriter.timeStampNano("nano"); - timeStampNanoWriter.setPosition(0); - timeStampNanoWriter.writeTimeStampNano(expectedNanos); - } - String tz = "UTC"; - { - TimeStampNanoTZWriter timeStampNanoWriter = rootWriter.timeStampNanoTZ("nanoTZ", tz); - timeStampNanoWriter.setPosition(0); - timeStampNanoWriter.writeTimeStampNanoTZ(expectedNanos); - } - // schema - List children = parent.getField().getChildren().get(0).getChildren(); - checkTimestampField(children.get(0), "nano"); - checkTimestampTZField(children.get(1), "nanoTZ", tz); - // read - StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); - - { - FieldReader nanoReader = rootReader.reader("nano"); - nanoReader.setPosition(0); - LocalDateTime nanoDateTime = nanoReader.readLocalDateTime(); - assertEquals(expectedNanoDateTime, nanoDateTime); - long nanoLong = nanoReader.readLong(); - assertEquals(expectedNanos, nanoLong); - } - { - FieldReader nanoReader = rootReader.reader("nanoTZ"); - nanoReader.setPosition(0); - long nanoLong = nanoReader.readLong(); - assertEquals(expectedNanos, nanoLong); - NullableTimeStampNanoTZHolder h = new NullableTimeStampNanoTZHolder(); - nanoReader.read(h); - assertEquals(expectedNanos, h.value); - } - } - } - - @Test - public void fixedSizeBinaryWriters() throws Exception { - // test values - int numValues = 10; - int byteWidth = 9; - byte[][] values = new byte[numValues][byteWidth]; - for (int i = 0; i < numValues; i++) { - for (int j = 0; j < byteWidth; j++) { - values[i][j] = ((byte) i); - } - } - ArrowBuf[] bufs = new ArrowBuf[numValues]; - for (int i = 0; i < numValues; i++) { - bufs[i] = allocator.buffer(byteWidth); - bufs[i].setBytes(0, values[i]); - } - - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - // write - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - - String fieldName = "fixedSizeBinary"; - FixedSizeBinaryWriter fixedSizeBinaryWriter = - rootWriter.fixedSizeBinary(fieldName, byteWidth); - for (int i = 0; i < numValues; i++) { - fixedSizeBinaryWriter.setPosition(i); - fixedSizeBinaryWriter.writeFixedSizeBinary(bufs[i]); - } - - // schema - List children = parent.getField().getChildren().get(0).getChildren(); - assertEquals(fieldName, children.get(0).getName()); - assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID()); - - // read - StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); - - FieldReader fixedSizeBinaryReader = rootReader.reader(fieldName); - for (int i = 0; i < numValues; i++) { - fixedSizeBinaryReader.setPosition(i); - byte[] readValues = fixedSizeBinaryReader.readByteArray(); - assertArrayEquals(values[i], readValues); - } - } - - AutoCloseables.close(bufs); - } - - @Test - public void complexCopierWithList() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - ListWriter listWriter = rootWriter.list("list"); - - StructWriter innerStructWriter = listWriter.struct(); - IntWriter outerIntWriter = listWriter.integer(); - rootWriter.start(); - listWriter.startList(); - outerIntWriter.writeInt(1); - outerIntWriter.writeInt(2); - innerStructWriter.start(); - IntWriter intWriter = innerStructWriter.integer("a"); - intWriter.writeInt(1); - innerStructWriter.end(); - innerStructWriter.start(); - intWriter = innerStructWriter.integer("a"); - intWriter.writeInt(2); - innerStructWriter.end(); - listWriter.endList(); - rootWriter.end(); - writer.setValueCount(1); - - StructVector structVector = (StructVector) parent.getChild("root"); - TransferPair tp = structVector.getTransferPair(allocator); - tp.splitAndTransfer(0, 1); - NonNullableStructVector toStructVector = (NonNullableStructVector) tp.getTo(); - JsonStringHashMap toMapValue = (JsonStringHashMap) toStructVector.getObject(0); - JsonStringArrayList object = (JsonStringArrayList) toMapValue.get("list"); - assertEquals(1, object.get(0)); - assertEquals(2, object.get(1)); - JsonStringHashMap innerStruct = (JsonStringHashMap) object.get(2); - assertEquals(1, innerStruct.get("a")); - innerStruct = (JsonStringHashMap) object.get(3); - assertEquals(2, innerStruct.get("a")); - toStructVector.close(); - } - } - - @Test - public void complexCopierWithListView() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - ListWriter listViewWriter = rootWriter.listView("listView"); - - StructWriter innerStructWriter = listViewWriter.struct(); - IntWriter outerIntWriter = listViewWriter.integer(); - rootWriter.start(); - listViewWriter.startListView(); - outerIntWriter.writeInt(1); - outerIntWriter.writeInt(2); - innerStructWriter.start(); - IntWriter intWriter = innerStructWriter.integer("a"); - intWriter.writeInt(1); - innerStructWriter.end(); - innerStructWriter.start(); - intWriter = innerStructWriter.integer("a"); - intWriter.writeInt(2); - innerStructWriter.end(); - listViewWriter.endListView(); - rootWriter.end(); - writer.setValueCount(1); - - StructVector structVector = (StructVector) parent.getChild("root"); - TransferPair tp = structVector.getTransferPair(allocator); - tp.splitAndTransfer(0, 1); - NonNullableStructVector toStructVector = (NonNullableStructVector) tp.getTo(); - JsonStringHashMap toMapValue = (JsonStringHashMap) toStructVector.getObject(0); - JsonStringArrayList object = (JsonStringArrayList) toMapValue.get("listView"); - assertEquals(1, object.get(0)); - assertEquals(2, object.get(1)); - JsonStringHashMap innerStruct = (JsonStringHashMap) object.get(2); - assertEquals(1, innerStruct.get("a")); - innerStruct = (JsonStringHashMap) object.get(3); - assertEquals(2, innerStruct.get("a")); - toStructVector.close(); - } - } - - @Test - public void testSingleStructWriter1() { - /* initialize a SingleStructWriter with empty StructVector and then lazily - * create all vectors with expected initialCapacity. - */ - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - SingleStructWriter singleStructWriter = new SingleStructWriter(parent); - - int initialCapacity = 1024; - singleStructWriter.setInitialCapacity(initialCapacity); - - IntWriter intWriter = singleStructWriter.integer("intField"); - BigIntWriter bigIntWriter = singleStructWriter.bigInt("bigIntField"); - Float4Writer float4Writer = singleStructWriter.float4("float4Field"); - Float8Writer float8Writer = singleStructWriter.float8("float8Field"); - ListWriter listWriter = singleStructWriter.list("listField"); - ListWriter listViewWriter = singleStructWriter.listView("listViewField"); - MapWriter mapWriter = singleStructWriter.map("mapField", false); - - int intValue = 100; - long bigIntValue = 10000; - float float4Value = 100.5f; - double float8Value = 100.375; - - for (int i = 0; i < initialCapacity; i++) { - singleStructWriter.start(); - - intWriter.writeInt(intValue + i); - bigIntWriter.writeBigInt(bigIntValue + (long) i); - float4Writer.writeFloat4(float4Value + (float) i); - float8Writer.writeFloat8(float8Value + (double) i); - - listWriter.setPosition(i); - listWriter.startList(); - listWriter.integer().writeInt(intValue + i); - listWriter.integer().writeInt(intValue + i + 1); - listWriter.integer().writeInt(intValue + i + 2); - listWriter.integer().writeInt(intValue + i + 3); - listWriter.endList(); - - listViewWriter.setPosition(i); - listViewWriter.startListView(); - listViewWriter.integer().writeInt(intValue + i); - listViewWriter.integer().writeInt(intValue + i + 1); - listViewWriter.integer().writeInt(intValue + i + 2); - listViewWriter.integer().writeInt(intValue + i + 3); - listViewWriter.endListView(); - - mapWriter.setPosition(i); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(intValue + i); - mapWriter.value().integer().writeInt(intValue + i + 1); - mapWriter.endEntry(); - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(intValue + i + 2); - mapWriter.value().integer().writeInt(intValue + i + 3); - mapWriter.endEntry(); - mapWriter.endMap(); - - singleStructWriter.end(); - } - - IntVector intVector = (IntVector) parent.getChild("intField"); - BigIntVector bigIntVector = (BigIntVector) parent.getChild("bigIntField"); - Float4Vector float4Vector = (Float4Vector) parent.getChild("float4Field"); - Float8Vector float8Vector = (Float8Vector) parent.getChild("float8Field"); - - int capacity = singleStructWriter.getValueCapacity(); - assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); - capacity = intVector.getValueCapacity(); - assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); - capacity = bigIntVector.getValueCapacity(); - assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); - capacity = float4Vector.getValueCapacity(); - assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); - capacity = float8Vector.getValueCapacity(); - assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2); - - StructReader singleStructReader = new SingleStructReaderImpl(parent); - - IntReader intReader = singleStructReader.reader("intField"); - BigIntReader bigIntReader = singleStructReader.reader("bigIntField"); - Float4Reader float4Reader = singleStructReader.reader("float4Field"); - Float8Reader float8Reader = singleStructReader.reader("float8Field"); - UnionListReader listReader = (UnionListReader) singleStructReader.reader("listField"); - UnionListViewReader listViewReader = - (UnionListViewReader) singleStructReader.reader("listViewField"); - UnionMapReader mapReader = (UnionMapReader) singleStructReader.reader("mapField"); - - for (int i = 0; i < initialCapacity; i++) { - intReader.setPosition(i); - bigIntReader.setPosition(i); - float4Reader.setPosition(i); - float8Reader.setPosition(i); - listReader.setPosition(i); - listViewReader.setPosition(i); - mapReader.setPosition(i); - - assertEquals(intValue + i, intReader.readInteger().intValue()); - assertEquals(bigIntValue + (long) i, bigIntReader.readLong().longValue()); - assertEquals(float4Value + (float) i, float4Reader.readFloat().floatValue(), 0); - assertEquals(float8Value + (double) i, float8Reader.readDouble().doubleValue(), 0); - - for (int j = 0; j < 4; j++) { - listReader.next(); - assertEquals(intValue + i + j, listReader.reader().readInteger().intValue()); - } - - for (int j = 0; j < 4; j++) { - listViewReader.next(); - assertEquals(intValue + i + j, listViewReader.reader().readInteger().intValue()); - } - - for (int k = 0; k < 4; k += 2) { - mapReader.next(); - assertEquals(intValue + k + i, mapReader.key().readInteger().intValue()); - assertEquals(intValue + k + i + 1, mapReader.value().readInteger().intValue()); - } - } - } - } - - @Test - public void testListWriterWithNulls() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.setInitialCapacity(COUNT); - listVector.allocateNew(); - listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity()); - - UnionListWriter listWriter = listVector.getWriter(); - - // expected listVector : [[null], null, [2, 4], null, [null], null, [6, 12], ...] - createNullsWithListWriters(listWriter); - listVector.setValueCount(COUNT); - - UnionListReader listReader = new UnionListReader(listVector); - checkNullsWithListWriters(listReader); - } - } - - @Test - public void testListViewWriterWithNulls() { - try (ListViewVector listViewVector = ListViewVector.empty("listView", allocator)) { - listViewVector.setInitialCapacity(COUNT); - listViewVector.allocateNew(); - listViewVector - .getValidityBuffer() - .setOne(0, (int) listViewVector.getValidityBuffer().capacity()); - - UnionListViewWriter listWriter = listViewVector.getWriter(); - - // expected listVector : [[null], null, [2, 4], null, [null], null, [6, 12], ...] - createNullsWithListWriters(listWriter); - listViewVector.setValueCount(COUNT); - - UnionListViewReader listReader = new UnionListViewReader(listViewVector); - checkNullsWithListWriters(listReader); - } - } - - @Test - public void testListOfListWriterWithNulls() { - try (ListVector listVector = ListVector.empty("listoflist", allocator)) { - listVector.setInitialCapacity(COUNT); - listVector.allocateNew(); - listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity()); - - UnionListWriter listWriter = listVector.getWriter(); - - // create list : [ [null], null, [[null, 2, 4]], null, [null], null, [[null, 6, 12]], ... ] - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - if (i % 2 == 0) { - listWriter.startList(); - if (i % 4 == 0) { - listWriter.list().writeNull(); - } else { - listWriter.list().startList(); - listWriter.list().integer().writeNull(); - listWriter.list().integer().writeInt(i); - listWriter.list().integer().writeInt(i * 2); - listWriter.list().endList(); - } - listWriter.endList(); - } else { - listWriter.writeNull(); - } - } - listVector.setValueCount(COUNT); - - UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - if (i % 2 == 0) { - assertTrue(listReader.isSet()); - listReader.next(); - if (i % 4 == 0) { - assertFalse(listReader.reader().isSet()); - } else { - listReader.reader().next(); - assertFalse(listReader.reader().reader().isSet()); - listReader.reader().next(); - assertEquals(i, listReader.reader().reader().readInteger().intValue()); - listReader.reader().next(); - assertEquals(i * 2, listReader.reader().reader().readInteger().intValue()); - } - } else { - assertFalse(listReader.isSet()); - } - } - } - } - - @Test - public void testListViewOfListViewWriterWithNulls() { - try (ListViewVector listViewVector = ListViewVector.empty("listViewoflistView", allocator)) { - listViewVector.setInitialCapacity(COUNT); - listViewVector.allocateNew(); - listViewVector - .getValidityBuffer() - .setOne(0, (int) listViewVector.getValidityBuffer().capacity()); - - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - - // create list : [ [null], null, [[null, 2, 4]], null, [null], null, [[null, 6, 12]], ... ] - for (int i = 0; i < COUNT; i++) { - listViewWriter.setPosition(i); - if (i % 2 == 0) { - listViewWriter.startListView(); - if (i % 4 == 0) { - listViewWriter.listView().writeNull(); - } else { - listViewWriter.listView().startListView(); - listViewWriter.listView().integer().writeNull(); - listViewWriter.listView().integer().writeInt(i); - listViewWriter.listView().integer().writeInt(i * 2); - listViewWriter.listView().endListView(); - } - listViewWriter.endListView(); - } else { - listViewWriter.writeNull(); - } - } - listViewVector.setValueCount(COUNT); - - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - for (int i = 0; i < COUNT; i++) { - listViewReader.setPosition(i); - if (i % 2 == 0) { - assertTrue(listViewReader.isSet()); - listViewReader.next(); - if (i % 4 == 0) { - assertFalse(listViewReader.reader().isSet()); - } else { - listViewReader.reader().next(); - assertFalse(listViewReader.reader().reader().isSet()); - listViewReader.reader().next(); - assertEquals(i, listViewReader.reader().reader().readInteger().intValue()); - listViewReader.reader().next(); - assertEquals(i * 2, listViewReader.reader().reader().readInteger().intValue()); - } - } else { - assertFalse(listViewReader.isSet()); - } - } - } - } - - @Test - public void testListOfListOfListWriterWithNulls() { - try (ListVector listVector = ListVector.empty("listoflistoflist", allocator)) { - listVector.setInitialCapacity(COUNT); - listVector.allocateNew(); - listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity()); - - UnionListWriter listWriter = listVector.getWriter(); - - // create list : [ null, [null], [[null]], [[[null, 1, 2]]], null, [null], ... - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - if (i % 4 == 0) { - listWriter.writeNull(); - } else { - listWriter.startList(); - if (i % 4 == 1) { - listWriter.list().writeNull(); - } else if (i % 4 == 2) { - listWriter.list().startList(); - listWriter.list().list().writeNull(); - listWriter.list().endList(); - } else { - listWriter.list().startList(); - listWriter.list().list().startList(); - listWriter.list().list().integer().writeNull(); - listWriter.list().list().integer().writeInt(i); - listWriter.list().list().integer().writeInt(i * 2); - listWriter.list().list().endList(); - listWriter.list().endList(); - } - listWriter.endList(); - } - } - listVector.setValueCount(COUNT); - - UnionListReader listReader = new UnionListReader(listVector); - for (int i = 0; i < COUNT; i++) { - listReader.setPosition(i); - if (i % 4 == 0) { - assertFalse(listReader.isSet()); - } else { - assertTrue(listReader.isSet()); - listReader.next(); - if (i % 4 == 1) { - assertFalse(listReader.reader().isSet()); - } else if (i % 4 == 2) { - listReader.reader().next(); - assertFalse(listReader.reader().reader().isSet()); - } else { - listReader.reader().next(); - listReader.reader().reader().next(); - assertFalse(listReader.reader().reader().reader().isSet()); - listReader.reader().reader().next(); - assertEquals(i, listReader.reader().reader().reader().readInteger().intValue()); - listReader.reader().reader().next(); - assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue()); - } - } - } - } - } - - @Test - public void testListViewOfListViewOfListViewWriterWithNulls() { - try (ListViewVector listViewVector = - ListViewVector.empty("listViewoflistViewoflistView", allocator)) { - listViewVector.setInitialCapacity(COUNT); - listViewVector.allocateNew(); - listViewVector - .getValidityBuffer() - .setOne(0, (int) listViewVector.getValidityBuffer().capacity()); - - UnionListViewWriter listViewWriter = listViewVector.getWriter(); - - // create list : [ null, [null], [[null]], [[[null, 1, 2]]], null, [null], ... - for (int i = 0; i < COUNT; i++) { - listViewWriter.setPosition(i); - if (i % 4 == 0) { - listViewWriter.writeNull(); - } else { - listViewWriter.startListView(); - if (i % 4 == 1) { - listViewWriter.listView().writeNull(); - } else if (i % 4 == 2) { - listViewWriter.listView().startListView(); - listViewWriter.listView().listView().writeNull(); - listViewWriter.listView().endListView(); - } else { - listViewWriter.listView().startListView(); - listViewWriter.listView().listView().startListView(); - listViewWriter.listView().listView().integer().writeNull(); - listViewWriter.listView().listView().integer().writeInt(i); - listViewWriter.listView().listView().integer().writeInt(i * 2); - listViewWriter.listView().listView().endListView(); - listViewWriter.listView().endListView(); - } - listViewWriter.endListView(); - } - } - listViewVector.setValueCount(COUNT); - - UnionListViewReader listViewReader = new UnionListViewReader(listViewVector); - for (int i = 0; i < COUNT; i++) { - listViewReader.setPosition(i); - if (i % 4 == 0) { - assertFalse(listViewReader.isSet()); - } else { - assertTrue(listViewReader.isSet()); - listViewReader.next(); - if (i % 4 == 1) { - assertFalse(listViewReader.reader().isSet()); - } else if (i % 4 == 2) { - listViewReader.reader().next(); - assertFalse(listViewReader.reader().reader().isSet()); - } else { - listViewReader.reader().next(); - listViewReader.reader().reader().next(); - assertFalse(listViewReader.reader().reader().reader().isSet()); - listViewReader.reader().reader().next(); - assertEquals(i, listViewReader.reader().reader().reader().readInteger().intValue()); - listViewReader.reader().reader().next(); - assertEquals(i * 2, listViewReader.reader().reader().reader().readInteger().intValue()); - } - } - } - } - } - - @Test - public void testStructOfList() { - try (StructVector structVector = StructVector.empty("struct1", allocator)) { - structVector.addOrGetList("childList1"); - NullableStructReaderImpl structReader = structVector.getReader(); - FieldReader childListReader = structReader.reader("childList1"); - assertNotNull(childListReader); - } - - try (StructVector structVector = StructVector.empty("struct2", allocator)) { - structVector.addOrGetList("childList2"); - NullableStructWriter structWriter = structVector.getWriter(); - structWriter.start(); - ListWriter listWriter = structWriter.list("childList2"); - listWriter.startList(); - listWriter.integer().writeInt(10); - listWriter.endList(); - structWriter.end(); - - NullableStructReaderImpl structReader = structVector.getReader(); - FieldReader childListReader = structReader.reader("childList2"); - int size = childListReader.size(); - assertEquals(1, size); - int data = childListReader.reader().readInteger(); - assertEquals(10, data); - } - - try (StructVector structVector = StructVector.empty("struct3", allocator)) { - structVector.addOrGetList("childList3"); - NullableStructWriter structWriter = structVector.getWriter(); - for (int i = 0; i < 5; ++i) { - structWriter.setPosition(i); - structWriter.start(); - ListWriter listWriter = structWriter.list("childList3"); - listWriter.startList(); - listWriter.integer().writeInt(i); - listWriter.endList(); - structWriter.end(); - } - - NullableStructReaderImpl structReader = structVector.getReader(); - structReader.setPosition(3); - FieldReader childListReader = structReader.reader("childList3"); - int size = childListReader.size(); - assertEquals(1, size); - int data = ((List) childListReader.readObject()).get(0); - assertEquals(3, data); - } - - try (StructVector structVector = StructVector.empty("struct4", allocator)) { - structVector.addOrGetList("childList4"); - NullableStructWriter structWriter = structVector.getWriter(); - for (int i = 0; i < 5; ++i) { - structWriter.setPosition(i); - structWriter.start(); - structWriter.writeNull(); - structWriter.end(); - } - - NullableStructReaderImpl structReader = structVector.getReader(); - structReader.setPosition(3); - FieldReader childListReader = structReader.reader("childList4"); - int size = childListReader.size(); - assertEquals(0, size); - } - } - - @Test - public void testMap() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - MapWriter mapWriter = writer.rootAsMap(false); - for (int i = 0; i < COUNT; i++) { - mapWriter.startMap(); - for (int j = 0; j < i % 7; j++) { - mapWriter.startEntry(); - if (j % 2 == 0) { - mapWriter.key().integer().writeInt(j); - mapWriter.value().integer().writeInt(j + 1); - } else { - IntHolder keyHolder = new IntHolder(); - keyHolder.value = j; - IntHolder valueHolder = new IntHolder(); - valueHolder.value = j + 1; - mapWriter.key().integer().write(keyHolder); - mapWriter.value().integer().write(valueHolder); - } - mapWriter.endEntry(); - } - mapWriter.endMap(); - } - writer.setValueCount(COUNT); - UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); - for (int i = 0; i < COUNT; i++) { - mapReader.setPosition(i); - for (int j = 0; j < i % 7; j++) { - mapReader.next(); - assertEquals(j, mapReader.key().readInteger().intValue()); - assertEquals(j + 1, mapReader.value().readInteger().intValue()); - } - } - } - } - - @Test - public void testMapWithNulls() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - MapWriter mapWriter = writer.rootAsMap(false); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().integer().writeNull(); - mapWriter.value().integer().writeInt(1); - mapWriter.endEntry(); - mapWriter.endMap(); - writer.setValueCount(1); - UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); - assertNull(mapReader.key().readInteger()); - assertEquals(1, mapReader.value().readInteger().intValue()); - } - } - - @Test - public void testMapWithListKey() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - MapWriter mapWriter = writer.rootAsMap(false); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().list().startList(); - for (int i = 0; i < 3; i++) { - mapWriter.key().list().integer().writeInt(i); - } - mapWriter.key().list().endList(); - mapWriter.value().integer().writeInt(1); - mapWriter.endEntry(); - mapWriter.endMap(); - writer.setValueCount(1); - UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); - mapReader.key().next(); - assertEquals(0, mapReader.key().reader().readInteger().intValue()); - mapReader.key().next(); - assertEquals(1, mapReader.key().reader().readInteger().intValue()); - mapReader.key().next(); - assertEquals(2, mapReader.key().reader().readInteger().intValue()); - assertEquals(1, mapReader.value().readInteger().intValue()); - } - } - - @Test - public void testMapWithStructKey() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - MapWriter mapWriter = writer.rootAsMap(false); - mapWriter.startMap(); - mapWriter.startEntry(); - mapWriter.key().struct().start(); - mapWriter.key().struct().integer("value1").writeInt(1); - mapWriter.key().struct().integer("value2").writeInt(2); - mapWriter.key().struct().end(); - mapWriter.value().integer().writeInt(1); - mapWriter.endEntry(); - mapWriter.endMap(); - writer.setValueCount(1); - UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); - assertEquals(1, mapReader.key().reader("value1").readInteger().intValue()); - assertEquals(2, mapReader.key().reader("value2").readInteger().intValue()); - assertEquals(1, mapReader.value().readInteger().intValue()); - } - } - - @Test - public void structWriterVarCharHelpers() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); - StructWriter rootWriter = writer.rootAsStruct(); - rootWriter.start(); - rootWriter.setPosition(0); - rootWriter.varChar("c").writeVarChar(new Text("row1")); - rootWriter.setPosition(1); - rootWriter.varChar("c").writeVarChar("row2"); - rootWriter.end(); - - VarCharVector vector = - parent.getChild("root", StructVector.class).getChild("c", VarCharVector.class); - - assertEquals("row1", vector.getObject(0).toString()); - assertEquals("row2", vector.getObject(1).toString()); - } - } - - @Test - public void structWriterVarCharViewHelpers() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); - StructWriter rootWriter = writer.rootAsStruct(); - rootWriter.start(); - rootWriter.setPosition(0); - rootWriter.viewVarChar("c").writeViewVarChar(new Text("row1")); - rootWriter.setPosition(1); - rootWriter.viewVarChar("c").writeViewVarChar("row2"); - rootWriter.end(); - - ViewVarCharVector vector = - parent.getChild("root", StructVector.class).getChild("c", ViewVarCharVector.class); - - assertEquals("row1", vector.getObject(0).toString()); - assertEquals("row2", vector.getObject(1).toString()); - } - } - - @Test - public void structWriterLargeVarCharHelpers() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); - StructWriter rootWriter = writer.rootAsStruct(); - rootWriter.start(); - rootWriter.setPosition(0); - rootWriter.largeVarChar("c").writeLargeVarChar(new Text("row1")); - rootWriter.setPosition(1); - rootWriter.largeVarChar("c").writeLargeVarChar("row2"); - rootWriter.end(); - - LargeVarCharVector vector = - parent.getChild("root", StructVector.class).getChild("c", LargeVarCharVector.class); - - assertEquals("row1", vector.getObject(0).toString()); - assertEquals("row2", vector.getObject(1).toString()); - } - } - - @Test - public void structWriterVarBinaryHelpers() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); - StructWriter rootWriter = writer.rootAsStruct(); - rootWriter.start(); - rootWriter.setPosition(0); - rootWriter.varBinary("c").writeVarBinary("row1".getBytes(StandardCharsets.UTF_8)); - rootWriter.setPosition(1); - rootWriter - .varBinary("c") - .writeVarBinary( - "row2".getBytes(StandardCharsets.UTF_8), - 0, - "row2".getBytes(StandardCharsets.UTF_8).length); - rootWriter.setPosition(2); - rootWriter - .varBinary("c") - .writeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8))); - rootWriter.setPosition(3); - rootWriter - .varBinary("c") - .writeVarBinary( - ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), - 0, - "row4".getBytes(StandardCharsets.UTF_8).length); - rootWriter.end(); - - VarBinaryVector uv = - parent.getChild("root", StructVector.class).getChild("c", VarBinaryVector.class); - - assertEquals("row1", new String(uv.get(0), StandardCharsets.UTF_8)); - assertEquals("row2", new String(uv.get(1), StandardCharsets.UTF_8)); - assertEquals("row3", new String(uv.get(2), StandardCharsets.UTF_8)); - assertEquals("row4", new String(uv.get(3), StandardCharsets.UTF_8)); - } - } - - @Test - public void structWriterLargeVarBinaryHelpers() { - try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { - ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); - StructWriter rootWriter = writer.rootAsStruct(); - rootWriter.start(); - rootWriter.setPosition(0); - rootWriter.largeVarBinary("c").writeLargeVarBinary("row1".getBytes(StandardCharsets.UTF_8)); - rootWriter.setPosition(1); - rootWriter - .largeVarBinary("c") - .writeLargeVarBinary( - "row2".getBytes(StandardCharsets.UTF_8), - 0, - "row2".getBytes(StandardCharsets.UTF_8).length); - rootWriter.setPosition(2); - rootWriter - .largeVarBinary("c") - .writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8))); - rootWriter.setPosition(3); - rootWriter - .largeVarBinary("c") - .writeLargeVarBinary( - ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), - 0, - "row4".getBytes(StandardCharsets.UTF_8).length); - rootWriter.end(); - - LargeVarBinaryVector uv = - parent.getChild("root", StructVector.class).getChild("c", LargeVarBinaryVector.class); - - assertEquals("row1", new String(uv.get(0), StandardCharsets.UTF_8)); - assertEquals("row2", new String(uv.get(1), StandardCharsets.UTF_8)); - assertEquals("row3", new String(uv.get(2), StandardCharsets.UTF_8)); - assertEquals("row4", new String(uv.get(3), StandardCharsets.UTF_8)); - } - } - - @Test - public void listVarCharHelpers() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - listWriter.startList(); - listWriter.writeVarChar("row1"); - listWriter.writeVarChar(new Text("row2")); - listWriter.endList(); - listWriter.setValueCount(1); - assertEquals("row1", listVector.getObject(0).get(0).toString()); - assertEquals("row2", listVector.getObject(0).get(1).toString()); - } - } - - @Test - public void listLargeVarCharHelpers() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - listWriter.startList(); - listWriter.writeLargeVarChar("row1"); - listWriter.writeLargeVarChar(new Text("row2")); - listWriter.endList(); - listWriter.setValueCount(1); - assertEquals("row1", listVector.getObject(0).get(0).toString()); - assertEquals("row2", listVector.getObject(0).get(1).toString()); - } - } - - @Test - public void listVarBinaryHelpers() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - listWriter.startList(); - listWriter.writeVarBinary("row1".getBytes(StandardCharsets.UTF_8)); - listWriter.writeVarBinary( - "row2".getBytes(StandardCharsets.UTF_8), - 0, - "row2".getBytes(StandardCharsets.UTF_8).length); - listWriter.writeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8))); - listWriter.writeVarBinary( - ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), - 0, - "row4".getBytes(StandardCharsets.UTF_8).length); - listWriter.endList(); - listWriter.setValueCount(1); - assertEquals( - "row1", new String((byte[]) listVector.getObject(0).get(0), StandardCharsets.UTF_8)); - assertEquals( - "row2", new String((byte[]) listVector.getObject(0).get(1), StandardCharsets.UTF_8)); - assertEquals( - "row3", new String((byte[]) listVector.getObject(0).get(2), StandardCharsets.UTF_8)); - assertEquals( - "row4", new String((byte[]) listVector.getObject(0).get(3), StandardCharsets.UTF_8)); - } - } - - @Test - public void listLargeVarBinaryHelpers() { - try (ListVector listVector = ListVector.empty("list", allocator)) { - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - listWriter.startList(); - listWriter.writeLargeVarBinary("row1".getBytes(StandardCharsets.UTF_8)); - listWriter.writeLargeVarBinary( - "row2".getBytes(StandardCharsets.UTF_8), - 0, - "row2".getBytes(StandardCharsets.UTF_8).length); - listWriter.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8))); - listWriter.writeLargeVarBinary( - ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), - 0, - "row4".getBytes(StandardCharsets.UTF_8).length); - listWriter.endList(); - listWriter.setValueCount(1); - assertEquals( - "row1", new String((byte[]) listVector.getObject(0).get(0), StandardCharsets.UTF_8)); - assertEquals( - "row2", new String((byte[]) listVector.getObject(0).get(1), StandardCharsets.UTF_8)); - assertEquals( - "row3", new String((byte[]) listVector.getObject(0).get(2), StandardCharsets.UTF_8)); - assertEquals( - "row4", new String((byte[]) listVector.getObject(0).get(3), StandardCharsets.UTF_8)); - } - } - - @Test - public void unionWithVarCharAndBinaryHelpers() throws Exception { - try (UnionVector vector = - new UnionVector("union", allocator, /* field type */ null, /* call-back */ null)) { - UnionWriter unionWriter = new UnionWriter(vector); - unionWriter.allocate(); - unionWriter.start(); - unionWriter.setPosition(0); - unionWriter.writeVarChar("row1"); - unionWriter.setPosition(1); - unionWriter.writeVarChar(new Text("row2")); - unionWriter.setPosition(2); - unionWriter.writeLargeVarChar("row3"); - unionWriter.setPosition(3); - unionWriter.writeLargeVarChar(new Text("row4")); - unionWriter.setPosition(4); - unionWriter.writeVarBinary("row5".getBytes(StandardCharsets.UTF_8)); - unionWriter.setPosition(5); - unionWriter.writeVarBinary( - "row6".getBytes(StandardCharsets.UTF_8), - 0, - "row6".getBytes(StandardCharsets.UTF_8).length); - unionWriter.setPosition(6); - unionWriter.writeVarBinary(ByteBuffer.wrap("row7".getBytes(StandardCharsets.UTF_8))); - unionWriter.setPosition(7); - unionWriter.writeVarBinary( - ByteBuffer.wrap("row8".getBytes(StandardCharsets.UTF_8)), - 0, - "row8".getBytes(StandardCharsets.UTF_8).length); - unionWriter.setPosition(8); - unionWriter.writeLargeVarBinary("row9".getBytes(StandardCharsets.UTF_8)); - unionWriter.setPosition(9); - unionWriter.writeLargeVarBinary( - "row10".getBytes(StandardCharsets.UTF_8), - 0, - "row10".getBytes(StandardCharsets.UTF_8).length); - unionWriter.setPosition(10); - unionWriter.writeLargeVarBinary(ByteBuffer.wrap("row11".getBytes(StandardCharsets.UTF_8))); - unionWriter.setPosition(11); - unionWriter.writeLargeVarBinary( - ByteBuffer.wrap("row12".getBytes(StandardCharsets.UTF_8)), - 0, - "row12".getBytes(StandardCharsets.UTF_8).length); - unionWriter.end(); - - assertEquals("row1", new String(vector.getVarCharVector().get(0), StandardCharsets.UTF_8)); - assertEquals("row2", new String(vector.getVarCharVector().get(1), StandardCharsets.UTF_8)); - assertEquals( - "row3", new String(vector.getLargeVarCharVector().get(2), StandardCharsets.UTF_8)); - assertEquals( - "row4", new String(vector.getLargeVarCharVector().get(3), StandardCharsets.UTF_8)); - assertEquals("row5", new String(vector.getVarBinaryVector().get(4), StandardCharsets.UTF_8)); - assertEquals("row6", new String(vector.getVarBinaryVector().get(5), StandardCharsets.UTF_8)); - assertEquals("row7", new String(vector.getVarBinaryVector().get(6), StandardCharsets.UTF_8)); - assertEquals("row8", new String(vector.getVarBinaryVector().get(7), StandardCharsets.UTF_8)); - assertEquals( - "row9", new String(vector.getLargeVarBinaryVector().get(8), StandardCharsets.UTF_8)); - assertEquals( - "row10", new String(vector.getLargeVarBinaryVector().get(9), StandardCharsets.UTF_8)); - assertEquals( - "row11", new String(vector.getLargeVarBinaryVector().get(10), StandardCharsets.UTF_8)); - assertEquals( - "row12", new String(vector.getLargeVarBinaryVector().get(11), StandardCharsets.UTF_8)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java deleted file mode 100644 index 5bb596270417c..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.complex.writer; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.ByteBuffer; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl; -import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl; -import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; -import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestSimpleWriter { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testWriteByteArrayToVarBinary() throws Exception { - try (VarBinaryVector vector = new VarBinaryVector("test", allocator); - VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { - byte[] input = new byte[] {0x01, 0x02}; - writer.writeVarBinary(input); - byte[] result = vector.get(0); - assertArrayEquals(input, result); - } - } - - @Test - public void testWriteByteArrayWithOffsetToVarBinary() throws Exception { - try (VarBinaryVector vector = new VarBinaryVector("test", allocator); - VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { - byte[] input = new byte[] {0x01, 0x02}; - writer.writeVarBinary(input, 1, 1); - byte[] result = vector.get(0); - assertArrayEquals(new byte[] {0x02}, result); - } - } - - @Test - public void testWriteByteBufferToVarBinary() throws Exception { - try (VarBinaryVector vector = new VarBinaryVector("test", allocator); - VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { - byte[] input = new byte[] {0x01, 0x02}; - ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeVarBinary(buffer); - byte[] result = vector.get(0); - assertArrayEquals(input, result); - } - } - - @Test - public void testWriteByteBufferWithOffsetToVarBinary() throws Exception { - try (VarBinaryVector vector = new VarBinaryVector("test", allocator); - VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { - byte[] input = new byte[] {0x01, 0x02}; - ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeVarBinary(buffer, 1, 1); - byte[] result = vector.get(0); - assertArrayEquals(new byte[] {0x02}, result); - } - } - - @Test - public void testWriteByteArrayToLargeVarBinary() throws Exception { - try (LargeVarBinaryVector vector = new LargeVarBinaryVector("test", allocator); - LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { - byte[] input = new byte[] {0x01, 0x02}; - writer.writeLargeVarBinary(input); - byte[] result = vector.get(0); - assertArrayEquals(input, result); - } - } - - @Test - public void testWriteByteArrayWithOffsetToLargeVarBinary() throws Exception { - try (LargeVarBinaryVector vector = new LargeVarBinaryVector("test", allocator); - LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { - byte[] input = new byte[] {0x01, 0x02}; - writer.writeLargeVarBinary(input, 1, 1); - byte[] result = vector.get(0); - assertArrayEquals(new byte[] {0x02}, result); - } - } - - @Test - public void testWriteByteBufferToLargeVarBinary() throws Exception { - try (LargeVarBinaryVector vector = new LargeVarBinaryVector("test", allocator); - LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { - byte[] input = new byte[] {0x01, 0x02}; - ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeLargeVarBinary(buffer); - byte[] result = vector.get(0); - assertArrayEquals(input, result); - } - } - - @Test - public void testWriteByteBufferWithOffsetToLargeVarBinary() throws Exception { - try (LargeVarBinaryVector vector = new LargeVarBinaryVector("test", allocator); - LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { - byte[] input = new byte[] {0x01, 0x02}; - ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeLargeVarBinary(buffer, 1, 1); - byte[] result = vector.get(0); - assertArrayEquals(new byte[] {0x02}, result); - } - } - - @Test - public void testWriteStringToVarChar() throws Exception { - try (VarCharVector vector = new VarCharVector("test", allocator); - VarCharWriter writer = new VarCharWriterImpl(vector)) { - String input = "testInput"; - writer.writeVarChar(input); - String result = vector.getObject(0).toString(); - assertEquals(input, result); - } - } - - @Test - public void testWriteTextToVarChar() throws Exception { - try (VarCharVector vector = new VarCharVector("test", allocator); - VarCharWriter writer = new VarCharWriterImpl(vector)) { - String input = "testInput"; - writer.writeVarChar(new Text(input)); - String result = vector.getObject(0).toString(); - assertEquals(input, result); - } - } - - @Test - public void testWriteStringToLargeVarChar() throws Exception { - try (LargeVarCharVector vector = new LargeVarCharVector("test", allocator); - LargeVarCharWriter writer = new LargeVarCharWriterImpl(vector)) { - String input = "testInput"; - writer.writeLargeVarChar(input); - String result = vector.getObject(0).toString(); - assertEquals(input, result); - } - } - - @Test - public void testWriteTextToLargeVarChar() throws Exception { - try (LargeVarCharVector vector = new LargeVarCharVector("test", allocator); - LargeVarCharWriter writer = new LargeVarCharWriterImpl(vector)) { - String input = "testInput"; - writer.writeLargeVarChar(new Text(input)); - String result = vector.getObject(0).toString(); - assertEquals(input, result); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java deleted file mode 100644 index 281f050dfb662..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java +++ /dev/null @@ -1,889 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.nio.charset.StandardCharsets; -import java.time.LocalDateTime; -import java.time.LocalTime; -import java.time.ZoneId; -import java.time.ZoneOffset; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.complex.impl.UnionMapReader; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.complex.writer.BigIntWriter; -import org.apache.arrow.vector.complex.writer.DateMilliWriter; -import org.apache.arrow.vector.complex.writer.Float4Writer; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.complex.writer.TimeMilliWriter; -import org.apache.arrow.vector.complex.writer.TimeStampMilliTZWriter; -import org.apache.arrow.vector.complex.writer.TimeStampMilliWriter; -import org.apache.arrow.vector.complex.writer.TimeStampNanoWriter; -import org.apache.arrow.vector.complex.writer.UInt1Writer; -import org.apache.arrow.vector.complex.writer.UInt2Writer; -import org.apache.arrow.vector.complex.writer.UInt4Writer; -import org.apache.arrow.vector.complex.writer.UInt8Writer; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.util.JsonStringArrayList; -import org.apache.arrow.vector.util.Text; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Helps testing the file formats. */ -public class BaseFileTest { - private static final Logger LOGGER = LoggerFactory.getLogger(BaseFileTest.class); - protected static final int COUNT = 10; - protected BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterEach - public void tearDown() { - allocator.close(); - } - - private static short[] uint1Values = new short[] {0, 255, 1, 128, 2}; - private static char[] uint2Values = - new char[] {0, Character.MAX_VALUE, 1, Short.MAX_VALUE * 2, 2}; - private static long[] uint4Values = - new long[] {0, Integer.MAX_VALUE + 1L, 1, Integer.MAX_VALUE * 2L, 2}; - private static BigInteger[] uint8Values = - new BigInteger[] { - BigInteger.valueOf(0), - BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.valueOf(2)), - BigInteger.valueOf(2), - BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.valueOf(1)), - BigInteger.valueOf(2) - }; - - protected void writeData(int count, StructVector parent) { - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - IntWriter intWriter = rootWriter.integer("int"); - UInt1Writer uint1Writer = rootWriter.uInt1("uint1"); - UInt2Writer uint2Writer = rootWriter.uInt2("uint2"); - UInt4Writer uint4Writer = rootWriter.uInt4("uint4"); - UInt8Writer uint8Writer = rootWriter.uInt8("uint8"); - BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); - Float4Writer float4Writer = rootWriter.float4("float"); - for (int i = 0; i < count; i++) { - intWriter.setPosition(i); - intWriter.writeInt(i); - uint1Writer.setPosition(i); - // TODO: Fix add safe write methods on uint methods. - uint1Writer.setPosition(i); - uint1Writer.writeUInt1((byte) uint1Values[i % uint1Values.length]); - uint2Writer.setPosition(i); - uint2Writer.writeUInt2((char) uint2Values[i % uint2Values.length]); - uint4Writer.setPosition(i); - uint4Writer.writeUInt4((int) uint4Values[i % uint4Values.length]); - uint8Writer.setPosition(i); - uint8Writer.writeUInt8(uint8Values[i % uint8Values.length].longValue()); - bigIntWriter.setPosition(i); - bigIntWriter.writeBigInt(i); - float4Writer.setPosition(i); - float4Writer.writeFloat4(i == 0 ? Float.NaN : i); - } - writer.setValueCount(count); - } - - protected void validateContent(int count, VectorSchemaRoot root) { - for (int i = 0; i < count; i++) { - assertEquals(i, root.getVector("int").getObject(i)); - assertEquals( - (Short) uint1Values[i % uint1Values.length], - ((UInt1Vector) root.getVector("uint1")).getObjectNoOverflow(i)); - assertEquals( - (Character) uint2Values[i % uint2Values.length], - (Character) ((UInt2Vector) root.getVector("uint2")).get(i), - "Failed for index: " + i); - assertEquals( - (Long) uint4Values[i % uint4Values.length], - ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i), - "Failed for index: " + i); - assertEquals( - uint8Values[i % uint8Values.length], - ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i), - "Failed for index: " + i); - assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i)); - } - } - - protected void writeComplexData(int count, StructVector parent) { - ArrowBuf varchar = allocator.buffer(3); - varchar.readerIndex(0); - varchar.setByte(0, 'a'); - varchar.setByte(1, 'b'); - varchar.setByte(2, 'c'); - varchar.writerIndex(3); - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - IntWriter intWriter = rootWriter.integer("int"); - BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); - ListWriter listWriter = rootWriter.list("list"); - StructWriter structWriter = rootWriter.struct("struct"); - for (int i = 0; i < count; i++) { - if (i % 5 != 3) { - intWriter.setPosition(i); - intWriter.writeInt(i); - } - bigIntWriter.setPosition(i); - bigIntWriter.writeBigInt(i); - listWriter.setPosition(i); - listWriter.startList(); - for (int j = 0; j < i % 3; j++) { - listWriter.varChar().writeVarChar(0, 3, varchar); - } - listWriter.endList(); - structWriter.setPosition(i); - structWriter.start(); - structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i); - structWriter.end(); - } - writer.setValueCount(count); - varchar.getReferenceManager().release(); - } - - public void printVectors(List vectors) { - for (FieldVector vector : vectors) { - LOGGER.debug(vector.getField().getName()); - int valueCount = vector.getValueCount(); - for (int i = 0; i < valueCount; i++) { - LOGGER.debug(String.valueOf(vector.getObject(i))); - } - } - } - - protected void validateComplexContent(int count, VectorSchemaRoot root) { - assertEquals(count, root.getRowCount()); - printVectors(root.getFieldVectors()); - for (int i = 0; i < count; i++) { - - Object intVal = root.getVector("int").getObject(i); - if (i % 5 != 3) { - assertEquals(i, intVal); - } else { - assertNull(intVal); - } - assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - assertEquals(i % 3, ((List) root.getVector("list").getObject(i)).size()); - NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); - FieldReader structReader = root.getVector("struct").getReader(); - structReader.setPosition(i); - structReader.reader("timestamp").read(h); - assertEquals(i, h.value); - } - } - - private LocalDateTime makeDateTimeFromCount(int i) { - return LocalDateTime.of(2000 + i, 1 + i, 1 + i, i, i, i, i * 100_000_000 + i); - } - - protected void writeDateTimeData(int count, StructVector parent) { - assertTrue(count < 100); - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - DateMilliWriter dateWriter = rootWriter.dateMilli("date"); - TimeMilliWriter timeWriter = rootWriter.timeMilli("time"); - TimeStampMilliWriter timeStampMilliWriter = rootWriter.timeStampMilli("timestamp-milli"); - TimeStampMilliTZWriter timeStampMilliTZWriter = - rootWriter.timeStampMilliTZ("timestamp-milliTZ", "Europe/Paris"); - TimeStampNanoWriter timeStampNanoWriter = rootWriter.timeStampNano("timestamp-nano"); - for (int i = 0; i < count; i++) { - LocalDateTime dt = makeDateTimeFromCount(i); - // Number of days in milliseconds since epoch, stored as 64-bit integer, only date part is - // used - dateWriter.setPosition(i); - long dateLong = dt.toLocalDate().atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli(); - dateWriter.writeDateMilli(dateLong); - // Time is a value in milliseconds since midnight, stored as 32-bit integer - timeWriter.setPosition(i); - int milliOfDay = - (int) java.util.concurrent.TimeUnit.NANOSECONDS.toMillis(dt.toLocalTime().toNanoOfDay()); - timeWriter.writeTimeMilli(milliOfDay); - // Timestamp as milliseconds since the epoch, stored as 64-bit integer - timeStampMilliWriter.setPosition(i); - timeStampMilliWriter.writeTimeStampMilli(dt.toInstant(ZoneOffset.UTC).toEpochMilli()); - // Timestamp as milliseconds since epoch with timezone - timeStampMilliTZWriter.setPosition(i); - timeStampMilliTZWriter.writeTimeStampMilliTZ( - dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli()); - // Timestamp as nanoseconds since epoch - timeStampNanoWriter.setPosition(i); - long tsNanos = - dt.toInstant(ZoneOffset.UTC).toEpochMilli() * 1_000_000 - + i; // need to add back in nano val - timeStampNanoWriter.writeTimeStampNano(tsNanos); - } - writer.setValueCount(count); - } - - protected void validateDateTimeContent(int count, VectorSchemaRoot root) { - assertEquals(count, root.getRowCount()); - printVectors(root.getFieldVectors()); - for (int i = 0; i < count; i++) { - LocalDateTime dt = makeDateTimeFromCount(i); - LocalDateTime dtMilli = dt.minusNanos(i); - LocalDateTime dateVal = ((DateMilliVector) root.getVector("date")).getObject(i); - LocalDateTime dateExpected = dt.toLocalDate().atStartOfDay(); - assertEquals(dateExpected, dateVal); - LocalTime timeVal = ((TimeMilliVector) root.getVector("time")).getObject(i).toLocalTime(); - assertEquals(dtMilli.toLocalTime(), timeVal); - Object timestampMilliVal = root.getVector("timestamp-milli").getObject(i); - assertEquals(dtMilli, timestampMilliVal); - Object timestampMilliTZVal = root.getVector("timestamp-milliTZ").getObject(i); - assertEquals( - dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal); - Object timestampNanoVal = root.getVector("timestamp-nano").getObject(i); - assertEquals(dt, timestampNanoVal); - } - } - - protected VectorSchemaRoot writeFlatDictionaryData( - BufferAllocator bufferAllocator, DictionaryProvider.MapDictionaryProvider provider) { - - // Define dictionaries and add to provider - VarCharVector dictionary1Vector = newVarCharVector("D1", bufferAllocator); - dictionary1Vector.allocateNewSafe(); - dictionary1Vector.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - dictionary1Vector.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - dictionary1Vector.set(2, "baz".getBytes(StandardCharsets.UTF_8)); - dictionary1Vector.setValueCount(3); - - Dictionary dictionary1 = - new Dictionary(dictionary1Vector, new DictionaryEncoding(1L, false, null)); - provider.put(dictionary1); - - VarCharVector dictionary2Vector = newVarCharVector("D2", bufferAllocator); - dictionary2Vector.allocateNewSafe(); - dictionary2Vector.set(0, "micro".getBytes(StandardCharsets.UTF_8)); - dictionary2Vector.set(1, "small".getBytes(StandardCharsets.UTF_8)); - dictionary2Vector.set(2, "large".getBytes(StandardCharsets.UTF_8)); - dictionary2Vector.setValueCount(3); - - Dictionary dictionary2 = - new Dictionary(dictionary2Vector, new DictionaryEncoding(2L, false, null)); - provider.put(dictionary2); - - // Populate the vectors - VarCharVector vector1A = newVarCharVector("varcharA", bufferAllocator); - vector1A.allocateNewSafe(); - vector1A.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - vector1A.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - vector1A.set(3, "baz".getBytes(StandardCharsets.UTF_8)); - vector1A.set(4, "bar".getBytes(StandardCharsets.UTF_8)); - vector1A.set(5, "baz".getBytes(StandardCharsets.UTF_8)); - vector1A.setValueCount(6); - - FieldVector encodedVector1A = (FieldVector) DictionaryEncoder.encode(vector1A, dictionary1); - vector1A.close(); // Done with this vector after encoding - - // Write this vector using indices instead of encoding - IntVector encodedVector1B = new IntVector("varcharB", bufferAllocator); - encodedVector1B.allocateNewSafe(); - encodedVector1B.set(0, 2); // "baz" - encodedVector1B.set(1, 1); // "bar" - encodedVector1B.set(2, 2); // "baz" - encodedVector1B.set(4, 1); // "bar" - encodedVector1B.set(5, 0); // "foo" - encodedVector1B.setValueCount(6); - - VarCharVector vector2 = newVarCharVector("sizes", bufferAllocator); - vector2.allocateNewSafe(); - vector2.set(1, "large".getBytes(StandardCharsets.UTF_8)); - vector2.set(2, "small".getBytes(StandardCharsets.UTF_8)); - vector2.set(3, "small".getBytes(StandardCharsets.UTF_8)); - vector2.set(4, "large".getBytes(StandardCharsets.UTF_8)); - vector2.setValueCount(6); - - FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary2); - vector2.close(); // Done with this vector after encoding - - List fields = - Arrays.asList( - encodedVector1A.getField(), encodedVector1B.getField(), encodedVector2.getField()); - List vectors = - Collections2.asImmutableList(encodedVector1A, encodedVector1B, encodedVector2); - - return new VectorSchemaRoot(fields, vectors, encodedVector1A.getValueCount()); - } - - protected void validateFlatDictionary(VectorSchemaRoot root, DictionaryProvider provider) { - FieldVector vector1A = root.getVector("varcharA"); - assertNotNull(vector1A); - - DictionaryEncoding encoding1A = vector1A.getField().getDictionary(); - assertNotNull(encoding1A); - assertEquals(1L, encoding1A.getId()); - - assertEquals(6, vector1A.getValueCount()); - assertEquals(0, vector1A.getObject(0)); - assertEquals(1, vector1A.getObject(1)); - assertEquals(null, vector1A.getObject(2)); - assertEquals(2, vector1A.getObject(3)); - assertEquals(1, vector1A.getObject(4)); - assertEquals(2, vector1A.getObject(5)); - - FieldVector vector1B = root.getVector("varcharB"); - assertNotNull(vector1B); - - DictionaryEncoding encoding1B = vector1A.getField().getDictionary(); - assertNotNull(encoding1B); - assertTrue(encoding1A.equals(encoding1B)); - assertEquals(1L, encoding1B.getId()); - - assertEquals(6, vector1B.getValueCount()); - assertEquals(2, vector1B.getObject(0)); - assertEquals(1, vector1B.getObject(1)); - assertEquals(2, vector1B.getObject(2)); - assertEquals(null, vector1B.getObject(3)); - assertEquals(1, vector1B.getObject(4)); - assertEquals(0, vector1B.getObject(5)); - - FieldVector vector2 = root.getVector("sizes"); - assertNotNull(vector2); - - DictionaryEncoding encoding2 = vector2.getField().getDictionary(); - assertNotNull(encoding2); - assertEquals(2L, encoding2.getId()); - - assertEquals(6, vector2.getValueCount()); - assertEquals(null, vector2.getObject(0)); - assertEquals(2, vector2.getObject(1)); - assertEquals(1, vector2.getObject(2)); - assertEquals(1, vector2.getObject(3)); - assertEquals(2, vector2.getObject(4)); - assertEquals(null, vector2.getObject(5)); - - Dictionary dictionary1 = provider.lookup(1L); - assertNotNull(dictionary1); - VarCharVector dictionaryVector = ((VarCharVector) dictionary1.getVector()); - assertEquals(3, dictionaryVector.getValueCount()); - assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - assertEquals(new Text("bar"), dictionaryVector.getObject(1)); - assertEquals(new Text("baz"), dictionaryVector.getObject(2)); - - Dictionary dictionary2 = provider.lookup(2L); - assertNotNull(dictionary2); - dictionaryVector = ((VarCharVector) dictionary2.getVector()); - assertEquals(3, dictionaryVector.getValueCount()); - assertEquals(new Text("micro"), dictionaryVector.getObject(0)); - assertEquals(new Text("small"), dictionaryVector.getObject(1)); - assertEquals(new Text("large"), dictionaryVector.getObject(2)); - } - - protected VectorSchemaRoot writeNestedDictionaryData( - BufferAllocator bufferAllocator, DictionaryProvider.MapDictionaryProvider provider) { - - // Define the dictionary and add to the provider - VarCharVector dictionaryVector = newVarCharVector("D2", bufferAllocator); - dictionaryVector.allocateNewSafe(); - dictionaryVector.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - dictionaryVector.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - dictionaryVector.setValueCount(2); - - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(2L, false, null)); - provider.put(dictionary); - - // Write the vector data using dictionary indices - ListVector listVector = ListVector.empty("list", bufferAllocator); - DictionaryEncoding encoding = dictionary.getEncoding(); - listVector.addOrGetVector(new FieldType(true, encoding.getIndexType(), encoding)); - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - listWriter.startList(); - listWriter.writeInt(0); - listWriter.writeInt(1); - listWriter.endList(); - listWriter.startList(); - listWriter.writeInt(0); - listWriter.endList(); - listWriter.startList(); - listWriter.writeInt(1); - listWriter.endList(); - listWriter.setValueCount(3); - - List fields = Collections2.asImmutableList(listVector.getField()); - List vectors = Collections2.asImmutableList(listVector); - return new VectorSchemaRoot(fields, vectors, 3); - } - - protected void validateNestedDictionary(VectorSchemaRoot root, DictionaryProvider provider) { - FieldVector vector = root.getFieldVectors().get(0); - assertNotNull(vector); - assertNull(vector.getField().getDictionary()); - Field nestedField = vector.getField().getChildren().get(0); - - DictionaryEncoding encoding = nestedField.getDictionary(); - assertNotNull(encoding); - assertEquals(2L, encoding.getId()); - assertEquals(new ArrowType.Int(32, true), encoding.getIndexType()); - - assertEquals(3, vector.getValueCount()); - assertEquals(Arrays.asList(0, 1), vector.getObject(0)); - assertEquals(Arrays.asList(0), vector.getObject(1)); - assertEquals(Arrays.asList(1), vector.getObject(2)); - - Dictionary dictionary = provider.lookup(2L); - assertNotNull(dictionary); - VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector()); - assertEquals(2, dictionaryVector.getValueCount()); - assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - assertEquals(new Text("bar"), dictionaryVector.getObject(1)); - } - - protected VectorSchemaRoot writeDecimalData(BufferAllocator bufferAllocator) { - DecimalVector decimalVector1 = new DecimalVector("decimal1", bufferAllocator, 10, 3); - DecimalVector decimalVector2 = new DecimalVector("decimal2", bufferAllocator, 4, 2); - DecimalVector decimalVector3 = new DecimalVector("decimal3", bufferAllocator, 16, 8); - - int count = 10; - decimalVector1.allocateNew(count); - decimalVector2.allocateNew(count); - decimalVector3.allocateNew(count); - - for (int i = 0; i < count; i++) { - decimalVector1.setSafe(i, new BigDecimal(BigInteger.valueOf(i), 3)); - decimalVector2.setSafe(i, new BigDecimal(BigInteger.valueOf(i * (1 << 10)), 2)); - decimalVector3.setSafe(i, new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), 8)); - } - - decimalVector1.setValueCount(count); - decimalVector2.setValueCount(count); - decimalVector3.setValueCount(count); - - List fields = - Collections2.asImmutableList( - decimalVector1.getField(), decimalVector2.getField(), decimalVector3.getField()); - List vectors = - Collections2.asImmutableList(decimalVector1, decimalVector2, decimalVector3); - return new VectorSchemaRoot(fields, vectors, count); - } - - protected void validateDecimalData(VectorSchemaRoot root) { - DecimalVector decimalVector1 = (DecimalVector) root.getVector("decimal1"); - DecimalVector decimalVector2 = (DecimalVector) root.getVector("decimal2"); - DecimalVector decimalVector3 = (DecimalVector) root.getVector("decimal3"); - int count = 10; - assertEquals(count, root.getRowCount()); - - for (int i = 0; i < count; i++) { - // Verify decimal 1 vector - BigDecimal readValue = decimalVector1.getObject(i); - ArrowType.Decimal type = (ArrowType.Decimal) decimalVector1.getField().getType(); - BigDecimal genValue = new BigDecimal(BigInteger.valueOf(i), type.getScale()); - assertEquals(genValue, readValue); - - // Verify decimal 2 vector - readValue = decimalVector2.getObject(i); - type = (ArrowType.Decimal) decimalVector2.getField().getType(); - genValue = new BigDecimal(BigInteger.valueOf(i * (1 << 10)), type.getScale()); - assertEquals(genValue, readValue); - - // Verify decimal 3 vector - readValue = decimalVector3.getObject(i); - type = (ArrowType.Decimal) decimalVector3.getField().getType(); - genValue = new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), type.getScale()); - assertEquals(genValue, readValue); - } - } - - protected VectorSchemaRoot writeNullData(int valueCount) { - NullVector nullVector1 = new NullVector("vector1"); - NullVector nullVector2 = new NullVector("vector2"); - nullVector1.setValueCount(valueCount); - nullVector2.setValueCount(valueCount); - - List fields = - Collections2.asImmutableList(nullVector1.getField(), nullVector2.getField()); - List vectors = Collections2.asImmutableList(nullVector1, nullVector2); - return new VectorSchemaRoot(fields, vectors, valueCount); - } - - protected void validateNullData(VectorSchemaRoot root, int valueCount) { - - NullVector vector1 = (NullVector) root.getFieldVectors().get(0); - NullVector vector2 = (NullVector) root.getFieldVectors().get(1); - - assertEquals(valueCount, vector1.getValueCount()); - assertEquals(valueCount, vector2.getValueCount()); - } - - public void validateUnionData(int count, VectorSchemaRoot root) { - FieldReader unionReader = root.getVector("union").getReader(); - for (int i = 0; i < count; i++) { - unionReader.setPosition(i); - switch (i % 5) { - case 0: - assertEquals(i, unionReader.readInteger().intValue()); - break; - case 1: - assertEquals(i, unionReader.readLong().longValue()); - break; - case 2: - assertEquals(i % 3, unionReader.size()); - break; - case 3: - assertEquals(3, unionReader.size()); - break; - case 4: - NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); - unionReader.reader("timestamp").read(h); - assertEquals(i, h.value); - break; - default: - assert false : "Unexpected value in switch statement: " + i; - } - } - } - - public void writeUnionData(int count, StructVector parent) { - ArrowBuf varchar = allocator.buffer(3); - varchar.readerIndex(0); - varchar.setByte(0, 'a'); - varchar.setByte(1, 'b'); - varchar.setByte(2, 'c'); - varchar.writerIndex(3); - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - IntWriter intWriter = rootWriter.integer("union"); - BigIntWriter bigIntWriter = rootWriter.bigInt("union"); - ListWriter listWriter = rootWriter.list("union"); - ListWriter listViewWriter = rootWriter.listView("union"); - StructWriter structWriter = rootWriter.struct("union"); - for (int i = 0; i < count; i++) { - switch (i % 5) { - case 0: - intWriter.setPosition(i); - intWriter.writeInt(i); - break; - case 1: - bigIntWriter.setPosition(i); - bigIntWriter.writeBigInt(i); - break; - case 2: - listWriter.setPosition(i); - listWriter.startList(); - for (int j = 0; j < i % 3; j++) { - listWriter.varChar().writeVarChar(0, 3, varchar); - } - listWriter.endList(); - break; - case 3: - listViewWriter.setPosition(i); - listViewWriter.startListView(); - for (int j = 0; j < i % 5; j++) { - listViewWriter.varChar().writeVarChar(0, 3, varchar); - } - listViewWriter.endListView(); - break; - case 4: - structWriter.setPosition(i); - structWriter.start(); - structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i); - structWriter.end(); - break; - default: - assert false : "Unexpected value in switch statement: " + i; - } - } - writer.setValueCount(count); - varchar.getReferenceManager().release(); - } - - protected void writeVarBinaryData(int count, StructVector parent) { - assertTrue(count < 100); - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - ListWriter listWriter = rootWriter.list("list"); - ArrowBuf varbin = allocator.buffer(count); - for (int i = 0; i < count; i++) { - varbin.setByte(i, i); - listWriter.setPosition(i); - listWriter.startList(); - for (int j = 0; j < i % 3; j++) { - listWriter.varBinary().writeVarBinary(0, i + 1, varbin); - } - listWriter.endList(); - } - writer.setValueCount(count); - varbin.getReferenceManager().release(); - } - - protected void validateVarBinary(int count, VectorSchemaRoot root) { - assertEquals(count, root.getRowCount()); - ListVector listVector = (ListVector) root.getVector("list"); - byte[] expectedArray = new byte[count]; - int numVarBinaryValues = 0; - for (int i = 0; i < count; i++) { - expectedArray[i] = (byte) i; - List objList = listVector.getObject(i); - if (i % 3 == 0) { - assertTrue(objList.isEmpty()); - } else { - byte[] expected = Arrays.copyOfRange(expectedArray, 0, i + 1); - for (int j = 0; j < i % 3; j++) { - byte[] result = (byte[]) objList.get(j); - assertArrayEquals(result, expected); - numVarBinaryValues++; - } - } - } - - // ListVector lastSet should be the index of last value + 1 - assertEquals(listVector.getLastSet(), count - 1); - - // VarBinaryVector lastSet should be the index of last value - VarBinaryVector binaryVector = (VarBinaryVector) listVector.getChildrenFromFields().get(0); - assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1); - } - - protected void writeBatchData(ArrowWriter writer, IntVector vector, VectorSchemaRoot root) - throws IOException { - writer.start(); - - vector.setNull(0); - vector.setSafe(1, 1); - vector.setSafe(2, 2); - vector.setNull(3); - vector.setSafe(4, 1); - vector.setValueCount(5); - root.setRowCount(5); - writer.writeBatch(); - - vector.setNull(0); - vector.setSafe(1, 1); - vector.setSafe(2, 2); - vector.setValueCount(3); - root.setRowCount(3); - writer.writeBatch(); - - writer.end(); - } - - protected void validateBatchData(ArrowReader reader, IntVector vector) throws IOException { - reader.loadNextBatch(); - - assertEquals(5, vector.getValueCount()); - assertTrue(vector.isNull(0)); - assertEquals(1, vector.get(1)); - assertEquals(2, vector.get(2)); - assertTrue(vector.isNull(3)); - assertEquals(1, vector.get(4)); - - reader.loadNextBatch(); - - assertEquals(3, vector.getValueCount()); - assertTrue(vector.isNull(0)); - assertEquals(1, vector.get(1)); - assertEquals(2, vector.get(2)); - } - - protected VectorSchemaRoot writeMapData(BufferAllocator bufferAllocator) { - MapVector mapVector = MapVector.empty("map", bufferAllocator, false); - MapVector sortedMapVector = MapVector.empty("mapSorted", bufferAllocator, true); - mapVector.allocateNew(); - sortedMapVector.allocateNew(); - UnionMapWriter mapWriter = mapVector.getWriter(); - UnionMapWriter sortedMapWriter = sortedMapVector.getWriter(); - - final int count = 10; - for (int i = 0; i < count; i++) { - // Write mapVector with NULL values - // i == 1 is a NULL - if (i != 1) { - mapWriter.setPosition(i); - mapWriter.startMap(); - // i == 3 is an empty map - if (i != 3) { - for (int j = 0; j < i + 1; j++) { - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(j); - // i == 5 maps to a NULL value - if (i != 5) { - mapWriter.value().integer().writeInt(j); - } - mapWriter.endEntry(); - } - } - mapWriter.endMap(); - } - // Write sortedMapVector - sortedMapWriter.setPosition(i); - sortedMapWriter.startMap(); - for (int j = 0; j < i + 1; j++) { - sortedMapWriter.startEntry(); - sortedMapWriter.key().bigInt().writeBigInt(j); - sortedMapWriter.value().integer().writeInt(j); - sortedMapWriter.endEntry(); - } - sortedMapWriter.endMap(); - } - mapWriter.setValueCount(COUNT); - sortedMapWriter.setValueCount(COUNT); - - List fields = - Collections2.asImmutableList(mapVector.getField(), sortedMapVector.getField()); - List vectors = Collections2.asImmutableList(mapVector, sortedMapVector); - return new VectorSchemaRoot(fields, vectors, count); - } - - protected void validateMapData(VectorSchemaRoot root) { - MapVector mapVector = (MapVector) root.getVector("map"); - MapVector sortedMapVector = (MapVector) root.getVector("mapSorted"); - - final int count = 10; - assertEquals(count, root.getRowCount()); - - UnionMapReader mapReader = new UnionMapReader(mapVector); - UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector); - for (int i = 0; i < count; i++) { - // Read mapVector with NULL values - mapReader.setPosition(i); - if (i == 1) { - assertFalse(mapReader.isSet()); - } else { - if (i == 3) { - JsonStringArrayList result = (JsonStringArrayList) mapReader.readObject(); - assertTrue(result.isEmpty()); - } else { - for (int j = 0; j < i + 1; j++) { - mapReader.next(); - assertEquals(j, mapReader.key().readLong().longValue()); - if (i == 5) { - assertFalse(mapReader.value().isSet()); - } else { - assertEquals(j, mapReader.value().readInteger().intValue()); - } - } - } - } - // Read sortedMapVector - sortedMapReader.setPosition(i); - for (int j = 0; j < i + 1; j++) { - sortedMapReader.next(); - assertEquals(j, sortedMapReader.key().readLong().longValue()); - assertEquals(j, sortedMapReader.value().readInteger().intValue()); - } - } - } - - protected VectorSchemaRoot writeListAsMapData(BufferAllocator bufferAllocator) { - ListVector mapEntryList = ListVector.empty("entryList", bufferAllocator); - FieldType mapEntryType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - StructVector mapEntryData = new StructVector("entryData", bufferAllocator, mapEntryType, null); - mapEntryData.addOrGet( - "myKey", new FieldType(false, new ArrowType.Int(64, true), null), BigIntVector.class); - mapEntryData.addOrGet( - "myValue", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - mapEntryList.initializeChildrenFromFields( - Collections2.asImmutableList(mapEntryData.getField())); - UnionListWriter entryWriter = mapEntryList.getWriter(); - entryWriter.allocate(); - - final int count = 10; - for (int i = 0; i < count; i++) { - entryWriter.setPosition(i); - entryWriter.startList(); - for (int j = 0; j < i + 1; j++) { - entryWriter.struct().start(); - entryWriter.struct().bigInt("myKey").writeBigInt(j); - entryWriter.struct().integer("myValue").writeInt(j); - entryWriter.struct().end(); - } - entryWriter.endList(); - } - entryWriter.setValueCount(COUNT); - - MapVector mapVector = MapVector.empty("map", bufferAllocator, false); - mapEntryList.makeTransferPair(mapVector).transfer(); - - List fields = Collections2.asImmutableList(mapVector.getField()); - List vectors = Collections2.asImmutableList(mapVector); - return new VectorSchemaRoot(fields, vectors, count); - } - - protected void validateListAsMapData(VectorSchemaRoot root) { - MapVector sortedMapVector = (MapVector) root.getVector("map"); - - final int count = 10; - assertEquals(count, root.getRowCount()); - - UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector); - sortedMapReader.setKeyValueNames("myKey", "myValue"); - for (int i = 0; i < count; i++) { - sortedMapReader.setPosition(i); - for (int j = 0; j < i + 1; j++) { - sortedMapReader.next(); - assertEquals(j, sortedMapReader.key().readLong().longValue()); - assertEquals(j, sortedMapReader.value().readInteger().intValue()); - } - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java deleted file mode 100644 index 04ea16a73c888..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.Arrays; -import java.util.Map; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Integration test for reading/writing {@link org.apache.arrow.vector.VectorSchemaRoot} with large - * (more than 2GB) buffers by {@link ArrowReader} and {@link ArrowWriter}.. To run this test, please - * make sure there is at least 8GB free memory, and 8GB free.disk space in the system. - */ -public class ITTestIPCWithLargeArrowBuffers { - - private static final Logger logger = - LoggerFactory.getLogger(ITTestIPCWithLargeArrowBuffers.class); - - // 4GB buffer size - static final long BUFFER_SIZE = 4 * 1024 * 1024 * 1024L; - - static final int DICTIONARY_VECTOR_SIZE = (int) (BUFFER_SIZE / BigIntVector.TYPE_WIDTH); - - static final int ENCODED_VECTOR_SIZE = (int) (BUFFER_SIZE / IntVector.TYPE_WIDTH); - - static final String FILE_NAME = "largeArrowData.data"; - - static final long DICTIONARY_ID = 123L; - - static final ArrowType.Int ENCODED_VECTOR_TYPE = new ArrowType.Int(32, true); - - static final DictionaryEncoding DICTIONARY_ENCODING = - new DictionaryEncoding(DICTIONARY_ID, false, ENCODED_VECTOR_TYPE); - - static final FieldType ENCODED_FIELD_TYPE = - new FieldType(true, ENCODED_VECTOR_TYPE, DICTIONARY_ENCODING, null); - - static final Field ENCODED_VECTOR_FIELD = new Field("encoded vector", ENCODED_FIELD_TYPE, null); - - private void testWriteLargeArrowData(boolean streamMode) throws IOException { - // simulate encoding big int as int - try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); - BigIntVector dictVector = new BigIntVector("dic vector", allocator); - FileOutputStream out = new FileOutputStream(FILE_NAME); - IntVector encodedVector = (IntVector) ENCODED_VECTOR_FIELD.createVector(allocator)) { - - // prepare dictionary provider. - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - Dictionary dictionary = new Dictionary(dictVector, DICTIONARY_ENCODING); - provider.put(dictionary); - - // populate the dictionary vector - dictVector.allocateNew(DICTIONARY_VECTOR_SIZE); - for (int i = 0; i < DICTIONARY_VECTOR_SIZE; i++) { - dictVector.set(i, i); - } - dictVector.setValueCount(DICTIONARY_VECTOR_SIZE); - assertTrue(dictVector.getDataBuffer().capacity() > Integer.MAX_VALUE); - logger.trace("Populating dictionary vector finished"); - - // populate the encoded vector - encodedVector.allocateNew(ENCODED_VECTOR_SIZE); - for (int i = 0; i < ENCODED_VECTOR_SIZE; i++) { - encodedVector.set(i, i % DICTIONARY_VECTOR_SIZE); - } - encodedVector.setValueCount(ENCODED_VECTOR_SIZE); - assertTrue(encodedVector.getDataBuffer().capacity() > Integer.MAX_VALUE); - logger.trace("Populating encoded vector finished"); - - // build vector schema root and write data. - try (VectorSchemaRoot root = - new VectorSchemaRoot( - Arrays.asList(ENCODED_VECTOR_FIELD), - Arrays.asList(encodedVector), - ENCODED_VECTOR_SIZE); - ArrowWriter writer = - streamMode - ? new ArrowStreamWriter(root, provider, out) - : new ArrowFileWriter(root, provider, out.getChannel())) { - writer.start(); - writer.writeBatch(); - writer.end(); - logger.trace("Writing data finished"); - } - } - - assertTrue(new File(FILE_NAME).exists()); - } - - private void testReadLargeArrowData(boolean streamMode) throws IOException { - try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); - FileInputStream in = new FileInputStream(FILE_NAME); - ArrowReader reader = - streamMode - ? new ArrowStreamReader(in, allocator) - : new ArrowFileReader(in.getChannel(), allocator)) { - - // verify schema - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(1, readSchema.getFields().size()); - assertEquals(ENCODED_VECTOR_FIELD, readSchema.getFields().get(0)); - logger.trace("Verifying schema finished"); - - // verify vector schema root - assertTrue(reader.loadNextBatch()); - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - - assertEquals(ENCODED_VECTOR_SIZE, root.getRowCount()); - assertEquals(1, root.getFieldVectors().size()); - assertTrue(root.getFieldVectors().get(0) instanceof IntVector); - - IntVector encodedVector = (IntVector) root.getVector(0); - for (int i = 0; i < ENCODED_VECTOR_SIZE; i++) { - assertEquals(i % DICTIONARY_VECTOR_SIZE, encodedVector.get(i)); - } - logger.trace("Verifying encoded vector finished"); - - // verify dictionary - Map dictVectors = reader.getDictionaryVectors(); - assertEquals(1, dictVectors.size()); - Dictionary dictionary = dictVectors.get(DICTIONARY_ID); - assertNotNull(dictionary); - - assertTrue(dictionary.getVector() instanceof BigIntVector); - BigIntVector dictVector = (BigIntVector) dictionary.getVector(); - assertEquals(DICTIONARY_VECTOR_SIZE, dictVector.getValueCount()); - for (int i = 0; i < DICTIONARY_VECTOR_SIZE; i++) { - assertEquals(i, dictVector.get(i)); - } - logger.trace("Verifying dictionary vector finished"); - - // ensure no more data available - assertFalse(reader.loadNextBatch()); - } finally { - File dataFile = new File(FILE_NAME); - dataFile.delete(); - assertFalse(dataFile.exists()); - } - } - - @Test - public void testIPC() throws IOException { - logger.trace("Start testing reading/writing large arrow stream data"); - testWriteLargeArrowData(true); - testReadLargeArrowData(true); - logger.trace("Finish testing reading/writing large arrow stream data"); - - logger.trace("Start testing reading/writing large arrow file data"); - testWriteLargeArrowData(false); - testReadLargeArrowData(false); - logger.trace("Finish testing reading/writing large arrow file data"); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java deleted file mode 100644 index 0a41b6c599029..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static java.util.Arrays.asList; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.Channels; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowMessage; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -public class MessageSerializerTest { - - public static ArrowBuf buf(BufferAllocator alloc, byte[] bytes) { - ArrowBuf buffer = alloc.buffer(bytes.length); - buffer.writeBytes(bytes); - return buffer; - } - - public static byte[] array(ArrowBuf buf) { - byte[] bytes = new byte[checkedCastToInt(buf.readableBytes())]; - buf.readBytes(bytes); - return bytes; - } - - private int intToByteRoundtrip(int v, byte[] bytes) { - MessageSerializer.intToBytes(v, bytes); - return MessageSerializer.bytesToInt(bytes); - } - - @Test - public void testIntToBytes() { - byte[] bytes = new byte[4]; - int[] values = new int[] {1, 15, 1 << 8, 1 << 16, Integer.MAX_VALUE}; - for (int v : values) { - assertEquals(intToByteRoundtrip(v, bytes), v); - } - } - - @Test - public void testWriteMessageBufferAligned() throws IOException { - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - WriteChannel out = new WriteChannel(Channels.newChannel(outputStream)); - - // This is not a valid Arrow Message, only to test writing and alignment - ByteBuffer buffer = ByteBuffer.allocate(8).order(ByteOrder.nativeOrder()); - buffer.putInt(1); - buffer.putInt(2); - buffer.flip(); - - int bytesWritten = MessageSerializer.writeMessageBuffer(out, 8, buffer); - assertEquals(16, bytesWritten); - - buffer.rewind(); - buffer.putInt(3); - buffer.flip(); - bytesWritten = MessageSerializer.writeMessageBuffer(out, 4, buffer); - assertEquals(16, bytesWritten); - - ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); - ReadChannel in = new ReadChannel(Channels.newChannel(inputStream)); - ByteBuffer result = ByteBuffer.allocate(32).order(ByteOrder.nativeOrder()); - in.readFully(result); - result.rewind(); - - // First message continuation, size, and 2 int values - assertEquals(MessageSerializer.IPC_CONTINUATION_TOKEN, result.getInt()); - // message length is represented in little endian - result.order(ByteOrder.LITTLE_ENDIAN); - assertEquals(8, result.getInt()); - result.order(ByteOrder.nativeOrder()); - assertEquals(1, result.getInt()); - assertEquals(2, result.getInt()); - - // Second message continuation, size, 1 int value and 4 bytes padding - assertEquals(MessageSerializer.IPC_CONTINUATION_TOKEN, result.getInt()); - // message length is represented in little endian - result.order(ByteOrder.LITTLE_ENDIAN); - assertEquals(8, result.getInt()); - result.order(ByteOrder.nativeOrder()); - assertEquals(3, result.getInt()); - assertEquals(0, result.getInt()); - } - - @Test - public void testSchemaMessageSerialization() throws IOException { - Schema schema = testSchema(); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - long size = MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema); - assertEquals(size, out.toByteArray().length); - - ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); - Schema deserialized = - MessageSerializer.deserializeSchema(new ReadChannel(Channels.newChannel(in))); - assertEquals(schema, deserialized); - assertEquals(1, deserialized.getFields().size()); - } - - @Test - public void testSchemaDictionaryMessageSerialization() throws IOException { - DictionaryEncoding dictionary = new DictionaryEncoding(9L, false, new ArrowType.Int(8, true)); - Field field = - new Field("test", new FieldType(true, ArrowType.Utf8.INSTANCE, dictionary, null), null); - Schema schema = new Schema(Collections.singletonList(field)); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - long size = MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema); - assertEquals(size, out.toByteArray().length); - - ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); - Schema deserialized = - MessageSerializer.deserializeSchema(new ReadChannel(Channels.newChannel(in))); - assertEquals(schema, deserialized); - } - - @Test - public void testSerializeRecordBatchV4() throws IOException { - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - - BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE); - ArrowBuf validityb = buf(alloc, validity); - ArrowBuf valuesb = buf(alloc, values); - - ArrowRecordBatch batch = - new ArrowRecordBatch(16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb)); - - // avoid writing legacy ipc format by default - IpcOption option = new IpcOption(false, MetadataVersion.V4); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), batch, option); - - ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); - ReadChannel channel = new ReadChannel(Channels.newChannel(in)); - ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc); - assertEquals(ArrowRecordBatch.class, deserialized.getClass()); - verifyBatch((ArrowRecordBatch) deserialized, validity, values); - } - - @Test - public void testSerializeRecordBatchV5() throws Exception { - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - - BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE); - ArrowBuf validityb = buf(alloc, validity); - ArrowBuf valuesb = buf(alloc, values); - - ArrowRecordBatch batch = - new ArrowRecordBatch(16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb)); - - // avoid writing legacy ipc format by default - IpcOption option = new IpcOption(false, MetadataVersion.V5); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), batch, option); - validityb.close(); - valuesb.close(); - batch.close(); - - { - ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); - ReadChannel channel = new ReadChannel(Channels.newChannel(in)); - ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc); - assertEquals(ArrowRecordBatch.class, deserialized.getClass()); - verifyBatch((ArrowRecordBatch) deserialized, validity, values); - deserialized.close(); - } - - { - byte[] validBytes = out.toByteArray(); - byte[] missingBytes = Arrays.copyOfRange(validBytes, /*from=*/ 0, validBytes.length - 1); - - ByteArrayInputStream in = new ByteArrayInputStream(missingBytes); - ReadChannel channel = new ReadChannel(Channels.newChannel(in)); - - assertThrows( - IOException.class, () -> MessageSerializer.deserializeMessageBatch(channel, alloc)); - } - - alloc.close(); - } - - public static Schema testSchema() { - return new Schema( - asList( - new Field( - "testField", - FieldType.nullable(new ArrowType.Int(8, true)), - Collections.emptyList()))); - } - - // Verifies batch contents matching test schema. - public static void verifyBatch(ArrowRecordBatch batch, byte[] validity, byte[] values) { - assertTrue(batch != null); - List nodes = batch.getNodes(); - assertEquals(1, nodes.size()); - ArrowFieldNode node = nodes.get(0); - assertEquals(16, node.getLength()); - assertEquals(8, node.getNullCount()); - List buffers = batch.getBuffers(); - assertEquals(2, buffers.size()); - assertArrayEquals(validity, MessageSerializerTest.array(buffers.get(0))); - assertArrayEquals(values, MessageSerializerTest.array(buffers.get(1))); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java deleted file mode 100644 index c8185623d680a..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static java.nio.channels.Channels.newChannel; -import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.types.pojo.Field; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestArrowFile extends BaseFileTest { - private static final Logger LOGGER = LoggerFactory.getLogger(TestArrowFile.class); - - @Test - public void testWrite() throws IOException { - File file = new File("target/mytest_write.arrow"); - int count = COUNT; - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", vectorAllocator)) { - writeData(count, parent); - write(parent.getChild("root"), file, new ByteArrayOutputStream()); - } - } - - @Test - public void testWriteComplex() throws IOException { - File file = new File("target/mytest_write_complex.arrow"); - int count = COUNT; - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", vectorAllocator)) { - writeComplexData(count, parent); - FieldVector root = parent.getChild("root"); - validateComplexContent(count, new VectorSchemaRoot(root)); - write(root, file, new ByteArrayOutputStream()); - } - } - - /** - * Writes the contents of parents to file. If outStream is non-null, also writes it to outStream - * in the streaming serialized format. - */ - private void write(FieldVector parent, File file, OutputStream outStream) throws IOException { - VectorSchemaRoot root = new VectorSchemaRoot(parent); - - try (FileOutputStream fileOutputStream = new FileOutputStream(file); - ArrowFileWriter arrowWriter = - new ArrowFileWriter(root, null, fileOutputStream.getChannel()); ) { - LOGGER.debug("writing schema: " + root.getSchema()); - arrowWriter.start(); - arrowWriter.writeBatch(); - arrowWriter.end(); - } - - // Also try serializing to the stream writer. - if (outStream != null) { - try (ArrowStreamWriter arrowWriter = new ArrowStreamWriter(root, null, outStream)) { - arrowWriter.start(); - arrowWriter.writeBatch(); - arrowWriter.end(); - } - } - } - - @Test - public void testFileStreamHasEos() throws IOException { - - try (VarCharVector vector1 = newVarCharVector("varchar1", allocator)) { - vector1.allocateNewSafe(); - vector1.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - vector1.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - vector1.set(3, "baz".getBytes(StandardCharsets.UTF_8)); - vector1.set(4, "bar".getBytes(StandardCharsets.UTF_8)); - vector1.set(5, "baz".getBytes(StandardCharsets.UTF_8)); - vector1.setValueCount(6); - - List fields = Arrays.asList(vector1.getField()); - List vectors = Collections2.asImmutableList(vector1); - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector1.getValueCount()); - - // write data - ByteArrayOutputStream out = new ByteArrayOutputStream(); - ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out)); - writer.start(); - writer.writeBatch(); - writer.end(); - - byte[] bytes = out.toByteArray(); - byte[] bytesWithoutMagic = new byte[bytes.length - 8]; - System.arraycopy(bytes, 8, bytesWithoutMagic, 0, bytesWithoutMagic.length); - - try (ArrowStreamReader reader = - new ArrowStreamReader(new ByteArrayInputStream(bytesWithoutMagic), allocator)) { - assertTrue(reader.loadNextBatch()); - // here will throw exception if read footer instead of eos. - assertFalse(reader.loadNextBatch()); - } - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java deleted file mode 100644 index c439510370e5e..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static java.util.Arrays.asList; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import com.google.flatbuffers.FlatBufferBuilder; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.flatbuf.Footer; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.ipc.message.ArrowFooter; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -public class TestArrowFooter { - - @Test - public void test() { - Schema schema = - new Schema( - asList( - new Field( - "a", - FieldType.nullable(new ArrowType.Int(8, true)), - Collections.emptyList()))); - ArrowFooter footer = - new ArrowFooter( - schema, Collections.emptyList(), Collections.emptyList()); - ArrowFooter newFooter = roundTrip(footer); - assertEquals(footer, newFooter); - - List ids = new ArrayList<>(); - ids.add(new ArrowBlock(0, 1, 2)); - ids.add(new ArrowBlock(4, 5, 6)); - footer = new ArrowFooter(schema, ids, ids); - assertEquals(footer, roundTrip(footer)); - } - - private ArrowFooter roundTrip(ArrowFooter footer) { - FlatBufferBuilder builder = new FlatBufferBuilder(); - int i = footer.writeTo(builder); - builder.finish(i); - ByteBuffer dataBuffer = builder.dataBuffer(); - ArrowFooter newFooter = new ArrowFooter(Footer.getRootAsFooter(dataBuffer)); - return newFooter; - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java deleted file mode 100644 index 74ff95d41d69c..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java +++ /dev/null @@ -1,1049 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static java.nio.channels.Channels.newChannel; -import static java.util.Arrays.asList; -import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.apache.arrow.vector.TestUtils.newVector; -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.function.BiFunction; -import java.util.stream.Collectors; -import org.apache.arrow.flatbuf.FieldNode; -import org.apache.arrow.flatbuf.Message; -import org.apache.arrow.flatbuf.RecordBatch; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.AutoCloseables; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.TestUtils; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorLoader; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.compare.TypeEqualsVisitor; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch; -import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.apache.arrow.vector.util.DictionaryUtility; -import org.apache.arrow.vector.util.TransferPair; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestArrowReaderWriter { - - private BufferAllocator allocator; - - private VarCharVector dictionaryVector1; - private VarCharVector dictionaryVector2; - private VarCharVector dictionaryVector3; - private StructVector dictionaryVector4; - - private Dictionary dictionary1; - private Dictionary dictionary2; - private Dictionary dictionary3; - private Dictionary dictionary4; - - private Schema schema; - private Schema encodedSchema; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - - dictionaryVector1 = newVarCharVector("D1", allocator); - setVector( - dictionaryVector1, - "foo".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8), - "baz".getBytes(StandardCharsets.UTF_8)); - - dictionaryVector2 = newVarCharVector("D2", allocator); - setVector( - dictionaryVector2, - "aa".getBytes(StandardCharsets.UTF_8), - "bb".getBytes(StandardCharsets.UTF_8), - "cc".getBytes(StandardCharsets.UTF_8)); - - dictionaryVector3 = newVarCharVector("D3", allocator); - setVector( - dictionaryVector3, - "foo".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8), - "baz".getBytes(StandardCharsets.UTF_8), - "aa".getBytes(StandardCharsets.UTF_8), - "bb".getBytes(StandardCharsets.UTF_8), - "cc".getBytes(StandardCharsets.UTF_8)); - - dictionaryVector4 = newVector(StructVector.class, "D4", MinorType.STRUCT, allocator); - final Map> dictionaryValues4 = new HashMap<>(); - dictionaryValues4.put("a", Arrays.asList(1, 2, 3)); - dictionaryValues4.put("b", Arrays.asList(4, 5, 6)); - setVector(dictionaryVector4, dictionaryValues4); - - dictionary1 = - new Dictionary( - dictionaryVector1, - new DictionaryEncoding(/*id=*/ 1L, /*ordered=*/ false, /*indexType=*/ null)); - dictionary2 = - new Dictionary( - dictionaryVector2, - new DictionaryEncoding(/*id=*/ 2L, /*ordered=*/ false, /*indexType=*/ null)); - dictionary3 = - new Dictionary( - dictionaryVector3, - new DictionaryEncoding(/*id=*/ 1L, /*ordered=*/ false, /*indexType=*/ null)); - dictionary4 = - new Dictionary( - dictionaryVector4, - new DictionaryEncoding(/*id=*/ 3L, /*ordered=*/ false, /*indexType=*/ null)); - } - - @AfterEach - public void terminate() throws Exception { - dictionaryVector1.close(); - dictionaryVector2.close(); - dictionaryVector3.close(); - dictionaryVector4.close(); - allocator.close(); - } - - ArrowBuf buf(byte[] bytes) { - ArrowBuf buffer = allocator.buffer(bytes.length); - buffer.writeBytes(bytes); - return buffer; - } - - byte[] array(ArrowBuf buf) { - byte[] bytes = new byte[checkedCastToInt(buf.readableBytes())]; - buf.readBytes(bytes); - return bytes; - } - - @Test - public void test() throws IOException { - Schema schema = - new Schema( - asList( - new Field( - "testField", - FieldType.nullable(new ArrowType.Int(8, true)), - Collections.emptyList()))); - ArrowType type = schema.getFields().get(0).getType(); - FieldVector vector = TestUtils.newVector(FieldVector.class, "testField", type, allocator); - vector.initializeChildrenFromFields(schema.getFields().get(0).getChildren()); - - byte[] validity = new byte[] {(byte) 255, 0}; - // second half is "undefined" - byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), asList(vector), 16); - ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out))) { - ArrowBuf validityb = buf(validity); - ArrowBuf valuesb = buf(values); - ArrowRecordBatch batch = - new ArrowRecordBatch(16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb)); - VectorLoader loader = new VectorLoader(root); - loader.load(batch); - writer.writeBatch(); - - validityb.close(); - valuesb.close(); - batch.close(); - } - - byte[] byteArray = out.toByteArray(); - - try (SeekableReadChannel channel = - new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(byteArray)); - ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(schema, readSchema); - // TODO: dictionaries - List recordBatches = reader.getRecordBlocks(); - assertEquals(1, recordBatches.size()); - reader.loadNextBatch(); - VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot()); - ArrowRecordBatch recordBatch = unloader.getRecordBatch(); - List nodes = recordBatch.getNodes(); - assertEquals(1, nodes.size()); - ArrowFieldNode node = nodes.get(0); - assertEquals(16, node.getLength()); - assertEquals(8, node.getNullCount()); - List buffers = recordBatch.getBuffers(); - assertEquals(2, buffers.size()); - assertArrayEquals(validity, array(buffers.get(0))); - assertArrayEquals(values, array(buffers.get(1))); - - // Read just the header. This demonstrates being able to read without need to - // deserialize the buffer. - ByteBuffer headerBuffer = ByteBuffer.allocate(recordBatches.get(0).getMetadataLength()); - headerBuffer.put(byteArray, (int) recordBatches.get(0).getOffset(), headerBuffer.capacity()); - // new format prefix_size ==8 - headerBuffer.position(8); - Message messageFB = Message.getRootAsMessage(headerBuffer); - RecordBatch recordBatchFB = (RecordBatch) messageFB.header(new RecordBatch()); - assertEquals(2, recordBatchFB.buffersLength()); - assertEquals(1, recordBatchFB.nodesLength()); - FieldNode nodeFB = recordBatchFB.nodes(0); - assertEquals(16, nodeFB.length()); - assertEquals(8, nodeFB.nullCount()); - - recordBatch.close(); - } - } - - @Test - public void testWriteReadNullVector() throws IOException { - - int valueCount = 3; - - NullVector nullVector = new NullVector("vector"); - nullVector.setValueCount(valueCount); - - Schema schema = new Schema(asList(nullVector.getField())); - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - try (VectorSchemaRoot root = - new VectorSchemaRoot(schema.getFields(), asList(nullVector), valueCount); - ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out))) { - ArrowRecordBatch batch = - new ArrowRecordBatch( - valueCount, asList(new ArrowFieldNode(valueCount, 0)), Collections.emptyList()); - VectorLoader loader = new VectorLoader(root); - loader.load(batch); - writer.writeBatch(); - } - - byte[] byteArray = out.toByteArray(); - - try (SeekableReadChannel channel = - new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(byteArray)); - ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(schema, readSchema); - List recordBatches = reader.getRecordBlocks(); - assertEquals(1, recordBatches.size()); - - assertTrue(reader.loadNextBatch()); - assertEquals(1, reader.getVectorSchemaRoot().getFieldVectors().size()); - - NullVector readNullVector = - (NullVector) reader.getVectorSchemaRoot().getFieldVectors().get(0); - assertEquals(valueCount, readNullVector.getValueCount()); - } - } - - @Test - public void testWriteReadWithDictionaries() throws IOException { - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary1); - - VarCharVector vector1 = newVarCharVector("varchar1", allocator); - vector1.allocateNewSafe(); - vector1.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - vector1.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - vector1.set(3, "baz".getBytes(StandardCharsets.UTF_8)); - vector1.set(4, "bar".getBytes(StandardCharsets.UTF_8)); - vector1.set(5, "baz".getBytes(StandardCharsets.UTF_8)); - vector1.setValueCount(6); - FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1); - vector1.close(); - - VarCharVector vector2 = newVarCharVector("varchar2", allocator); - vector2.allocateNewSafe(); - vector2.set(0, "bar".getBytes(StandardCharsets.UTF_8)); - vector2.set(1, "baz".getBytes(StandardCharsets.UTF_8)); - vector2.set(2, "foo".getBytes(StandardCharsets.UTF_8)); - vector2.set(3, "foo".getBytes(StandardCharsets.UTF_8)); - vector2.set(4, "foo".getBytes(StandardCharsets.UTF_8)); - vector2.set(5, "bar".getBytes(StandardCharsets.UTF_8)); - vector2.setValueCount(6); - FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary1); - vector2.close(); - - List fields = Arrays.asList(encodedVector1.getField(), encodedVector2.getField()); - List vectors = Collections2.asImmutableList(encodedVector1, encodedVector2); - try (VectorSchemaRoot root = - new VectorSchemaRoot(fields, vectors, encodedVector1.getValueCount()); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out)); ) { - - writer.start(); - writer.writeBatch(); - writer.end(); - - try (SeekableReadChannel channel = - new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(out.toByteArray())); - ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(root.getSchema(), readSchema); - assertEquals(1, reader.getDictionaryBlocks().size()); - assertEquals(1, reader.getRecordBlocks().size()); - - reader.loadNextBatch(); - assertEquals(2, reader.getVectorSchemaRoot().getFieldVectors().size()); - } - } - } - - @Test - public void testWriteReadWithStructDictionaries() throws IOException { - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary4); - - try (final StructVector vector = - newVector(StructVector.class, "D4", MinorType.STRUCT, allocator)) { - final Map> values = new HashMap<>(); - // Index: 0, 2, 1, 2, 1, 0, 0 - values.put("a", Arrays.asList(1, 3, 2, 3, 2, 1, 1)); - values.put("b", Arrays.asList(4, 6, 5, 6, 5, 4, 4)); - setVector(vector, values); - FieldVector encodedVector = (FieldVector) DictionaryEncoder.encode(vector, dictionary4); - - List fields = Arrays.asList(encodedVector.getField()); - List vectors = Collections2.asImmutableList(encodedVector); - try (VectorSchemaRoot root = - new VectorSchemaRoot(fields, vectors, encodedVector.getValueCount()); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out)); ) { - - writer.start(); - writer.writeBatch(); - writer.end(); - - try (SeekableReadChannel channel = - new SeekableReadChannel( - new ByteArrayReadableSeekableByteChannel(out.toByteArray())); - ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - final VectorSchemaRoot readRoot = reader.getVectorSchemaRoot(); - final Schema readSchema = readRoot.getSchema(); - assertEquals(root.getSchema(), readSchema); - assertEquals(1, reader.getDictionaryBlocks().size()); - assertEquals(1, reader.getRecordBlocks().size()); - - reader.loadNextBatch(); - assertEquals(1, readRoot.getFieldVectors().size()); - assertEquals(1, reader.getDictionaryVectors().size()); - - // Read the encoded vector and check it - final FieldVector readEncoded = readRoot.getVector(0); - assertEquals(encodedVector.getValueCount(), readEncoded.getValueCount()); - assertTrue( - new RangeEqualsVisitor(encodedVector, readEncoded) - .rangeEquals(new Range(0, 0, encodedVector.getValueCount()))); - - // Read the dictionary - final Map readDictionaryMap = reader.getDictionaryVectors(); - final Dictionary readDictionary = - readDictionaryMap.get(readEncoded.getField().getDictionary().getId()); - assertNotNull(readDictionary); - - // Assert the dictionary vector is correct - final FieldVector readDictionaryVector = readDictionary.getVector(); - assertEquals(dictionaryVector4.getValueCount(), readDictionaryVector.getValueCount()); - final BiFunction typeComparatorIgnoreName = - (v1, v2) -> new TypeEqualsVisitor(v1, false, true).equals(v2); - assertTrue( - new RangeEqualsVisitor( - dictionaryVector4, readDictionaryVector, typeComparatorIgnoreName) - .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount())), - "Dictionary vectors are not equal"); - - // Assert the decoded vector is correct - try (final ValueVector readVector = - DictionaryEncoder.decode(readEncoded, readDictionary)) { - assertEquals(vector.getValueCount(), readVector.getValueCount()); - assertTrue( - new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName) - .rangeEquals(new Range(0, 0, vector.getValueCount())), - "Decoded vectors are not equal"); - } - } - } - } - } - - @Test - public void testEmptyStreamInFileIPC() throws IOException { - - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary1); - - VarCharVector vector = newVarCharVector("varchar", allocator); - vector.allocateNewSafe(); - vector.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - vector.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - vector.set(3, "baz".getBytes(StandardCharsets.UTF_8)); - vector.set(4, "bar".getBytes(StandardCharsets.UTF_8)); - vector.set(5, "baz".getBytes(StandardCharsets.UTF_8)); - vector.setValueCount(6); - - FieldVector encodedVector1A = (FieldVector) DictionaryEncoder.encode(vector, dictionary1); - vector.close(); - - List fields = Arrays.asList(encodedVector1A.getField()); - List vectors = Collections2.asImmutableList(encodedVector1A); - - try (VectorSchemaRoot root = - new VectorSchemaRoot(fields, vectors, encodedVector1A.getValueCount()); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out))) { - - writer.start(); - writer.end(); - - try (SeekableReadChannel channel = - new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(out.toByteArray())); - ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(root.getSchema(), readSchema); - assertEquals(1, reader.getDictionaryVectors().size()); - assertEquals(0, reader.getDictionaryBlocks().size()); - assertEquals(0, reader.getRecordBlocks().size()); - } - } - } - - @Test - public void testEmptyStreamInStreamingIPC() throws IOException { - - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary1); - - VarCharVector vector = newVarCharVector("varchar", allocator); - vector.allocateNewSafe(); - vector.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - vector.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - vector.set(3, "baz".getBytes(StandardCharsets.UTF_8)); - vector.set(4, "bar".getBytes(StandardCharsets.UTF_8)); - vector.set(5, "baz".getBytes(StandardCharsets.UTF_8)); - vector.setValueCount(6); - - FieldVector encodedVector = (FieldVector) DictionaryEncoder.encode(vector, dictionary1); - vector.close(); - - List fields = Arrays.asList(encodedVector.getField()); - try (VectorSchemaRoot root = - new VectorSchemaRoot( - fields, Arrays.asList(encodedVector), encodedVector.getValueCount()); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, newChannel(out))) { - - writer.start(); - writer.end(); - - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator)) { - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(root.getSchema(), readSchema); - assertEquals(1, reader.getDictionaryVectors().size()); - assertFalse(reader.loadNextBatch()); - } - } - } - - @Test - public void testDictionaryReplacement() throws Exception { - VarCharVector vector1 = newVarCharVector("varchar1", allocator); - setVector( - vector1, - "foo".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8), - "baz".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8)); - - FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1); - - VarCharVector vector2 = newVarCharVector("varchar2", allocator); - setVector( - vector2, - "foo".getBytes(StandardCharsets.UTF_8), - "foo".getBytes(StandardCharsets.UTF_8), - "foo".getBytes(StandardCharsets.UTF_8), - "foo".getBytes(StandardCharsets.UTF_8)); - - FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary1); - - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary1); - List schemaFields = new ArrayList<>(); - schemaFields.add( - DictionaryUtility.toMessageFormat(encodedVector1.getField(), provider, new HashSet<>())); - schemaFields.add( - DictionaryUtility.toMessageFormat(encodedVector2.getField(), provider, new HashSet<>())); - Schema schema = new Schema(schemaFields); - - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - WriteChannel out = new WriteChannel(newChannel(outStream)); - - // write schema - MessageSerializer.serialize(out, schema); - - List closeableList = new ArrayList<>(); - - // write non-delta dictionary with id=1 - serializeDictionaryBatch(out, dictionary3, false, closeableList); - - // write non-delta dictionary with id=1 - serializeDictionaryBatch(out, dictionary1, false, closeableList); - - // write recordBatch2 - serializeRecordBatch(out, Arrays.asList(encodedVector1, encodedVector2), closeableList); - - // write eos - out.writeIntLittleEndian(0); - - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) { - assertEquals(1, reader.getDictionaryVectors().size()); - assertTrue(reader.loadNextBatch()); - FieldVector dictionaryVector = reader.getDictionaryVectors().get(1L).getVector(); - // make sure the delta dictionary is concatenated. - assertTrue(VectorEqualsVisitor.vectorEquals(dictionaryVector, dictionaryVector1, null)); - assertFalse(reader.loadNextBatch()); - } - - vector1.close(); - vector2.close(); - AutoCloseables.close(closeableList); - } - - @Test - public void testDeltaDictionary() throws Exception { - VarCharVector vector1 = newVarCharVector("varchar1", allocator); - setVector( - vector1, - "foo".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8), - "baz".getBytes(StandardCharsets.UTF_8), - "bar".getBytes(StandardCharsets.UTF_8)); - - FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1); - - VarCharVector vector2 = newVarCharVector("varchar2", allocator); - setVector( - vector2, - "foo".getBytes(StandardCharsets.UTF_8), - "aa".getBytes(StandardCharsets.UTF_8), - "bb".getBytes(StandardCharsets.UTF_8), - "cc".getBytes(StandardCharsets.UTF_8)); - - FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary3); - - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary1); - provider.put(dictionary3); - List schemaFields = new ArrayList<>(); - schemaFields.add( - DictionaryUtility.toMessageFormat(encodedVector1.getField(), provider, new HashSet<>())); - schemaFields.add( - DictionaryUtility.toMessageFormat(encodedVector2.getField(), provider, new HashSet<>())); - Schema schema = new Schema(schemaFields); - - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - WriteChannel out = new WriteChannel(newChannel(outStream)); - - // write schema - MessageSerializer.serialize(out, schema); - - List closeableList = new ArrayList<>(); - - // write non-delta dictionary with id=1 - serializeDictionaryBatch(out, dictionary1, false, closeableList); - - // write delta dictionary with id=1 - Dictionary deltaDictionary = - new Dictionary(dictionaryVector2, new DictionaryEncoding(1L, false, null)); - serializeDictionaryBatch(out, deltaDictionary, true, closeableList); - deltaDictionary.getVector().close(); - - // write recordBatch2 - serializeRecordBatch(out, Arrays.asList(encodedVector1, encodedVector2), closeableList); - - // write eos - out.writeIntLittleEndian(0); - - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) { - assertEquals(1, reader.getDictionaryVectors().size()); - assertTrue(reader.loadNextBatch()); - FieldVector dictionaryVector = reader.getDictionaryVectors().get(1L).getVector(); - // make sure the delta dictionary is concatenated. - assertTrue(VectorEqualsVisitor.vectorEquals(dictionaryVector, dictionaryVector3, null)); - assertFalse(reader.loadNextBatch()); - } - - vector1.close(); - vector2.close(); - AutoCloseables.close(closeableList); - } - - // Tests that the ArrowStreamWriter re-emits dictionaries when they change - @Test - public void testWriteReadStreamWithDictionaryReplacement() throws Exception { - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary1); - - String[] batch0 = {"foo", "bar", "baz", "bar", "baz"}; - String[] batch1 = {"foo", "aa", "bar", "bb", "baz", "cc"}; - - VarCharVector vector = newVarCharVector("varchar", allocator); - vector.allocateNewSafe(); - for (int i = 0; i < batch0.length; ++i) { - vector.set(i, batch0[i].getBytes(StandardCharsets.UTF_8)); - } - vector.setValueCount(batch0.length); - FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector, dictionary1); - - List fields = Arrays.asList(encodedVector1.getField()); - try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { - try (VectorSchemaRoot root = - new VectorSchemaRoot( - fields, Arrays.asList(encodedVector1), encodedVector1.getValueCount()); - ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, newChannel(out))) { - writer.start(); - - // Write batch with initial data and dictionary - writer.writeBatch(); - - // Create data for the next batch, using an extended dictionary with the same id - vector.reset(); - for (int i = 0; i < batch1.length; ++i) { - vector.set(i, batch1[i].getBytes(StandardCharsets.UTF_8)); - } - vector.setValueCount(batch1.length); - - // Re-encode and move encoded data into the vector schema root - provider.put(dictionary3); - FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector, dictionary3); - TransferPair transferPair = encodedVector2.makeTransferPair(root.getVector(0)); - transferPair.transfer(); - - // Write second batch - root.setRowCount(batch1.length); - writer.writeBatch(); - - writer.end(); - } - - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator)) { - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - - // Read and verify first batch - assertTrue(reader.loadNextBatch()); - assertEquals(batch0.length, root.getRowCount()); - FieldVector readEncoded1 = root.getVector(0); - long dictionaryId = readEncoded1.getField().getDictionary().getId(); - try (VarCharVector decodedValues = - (VarCharVector) DictionaryEncoder.decode(readEncoded1, reader.lookup(dictionaryId))) { - for (int i = 0; i < batch0.length; ++i) { - assertEquals(batch0[i], new String(decodedValues.get(i), StandardCharsets.UTF_8)); - } - } - - // Read and verify second batch - assertTrue(reader.loadNextBatch()); - assertEquals(batch1.length, root.getRowCount()); - FieldVector readEncoded2 = root.getVector(0); - dictionaryId = readEncoded2.getField().getDictionary().getId(); - try (VarCharVector decodedValues = - (VarCharVector) DictionaryEncoder.decode(readEncoded2, reader.lookup(dictionaryId))) { - for (int i = 0; i < batch1.length; ++i) { - assertEquals(batch1[i], new String(decodedValues.get(i), StandardCharsets.UTF_8)); - } - } - - assertFalse(reader.loadNextBatch()); - } - } - - vector.close(); - } - - private void serializeDictionaryBatch( - WriteChannel out, Dictionary dictionary, boolean isDelta, List closeables) - throws IOException { - - FieldVector dictVector = dictionary.getVector(); - VectorSchemaRoot root = - new VectorSchemaRoot( - Collections.singletonList(dictVector.getField()), - Collections.singletonList(dictVector), - dictVector.getValueCount()); - ArrowDictionaryBatch batch = - new ArrowDictionaryBatch( - dictionary.getEncoding().getId(), new VectorUnloader(root).getRecordBatch(), isDelta); - MessageSerializer.serialize(out, batch); - closeables.add(batch); - closeables.add(root); - } - - private void serializeRecordBatch( - WriteChannel out, List vectors, List closeables) - throws IOException { - - List fields = vectors.stream().map(v -> v.getField()).collect(Collectors.toList()); - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vectors.get(0).getValueCount()); - VectorUnloader unloader = new VectorUnloader(root); - ArrowRecordBatch batch = unloader.getRecordBatch(); - MessageSerializer.serialize(out, batch); - closeables.add(batch); - closeables.add(root); - } - - @Test - public void testReadInterleavedData() throws IOException { - List batches = createRecordBatches(); - - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - WriteChannel out = new WriteChannel(newChannel(outStream)); - - // write schema - MessageSerializer.serialize(out, schema); - - // write dictionary1 - FieldVector dictVector1 = dictionary1.getVector(); - VectorSchemaRoot dictRoot1 = - new VectorSchemaRoot( - Collections.singletonList(dictVector1.getField()), - Collections.singletonList(dictVector1), - dictVector1.getValueCount()); - ArrowDictionaryBatch dictionaryBatch1 = - new ArrowDictionaryBatch(1, new VectorUnloader(dictRoot1).getRecordBatch()); - MessageSerializer.serialize(out, dictionaryBatch1); - dictionaryBatch1.close(); - dictRoot1.close(); - - // write recordBatch1 - MessageSerializer.serialize(out, batches.get(0)); - - // write dictionary2 - FieldVector dictVector2 = dictionary2.getVector(); - VectorSchemaRoot dictRoot2 = - new VectorSchemaRoot( - Collections.singletonList(dictVector2.getField()), - Collections.singletonList(dictVector2), - dictVector2.getValueCount()); - ArrowDictionaryBatch dictionaryBatch2 = - new ArrowDictionaryBatch(2, new VectorUnloader(dictRoot2).getRecordBatch()); - MessageSerializer.serialize(out, dictionaryBatch2); - dictionaryBatch2.close(); - dictRoot2.close(); - - // write recordBatch1 - MessageSerializer.serialize(out, batches.get(1)); - - // write eos - out.writeIntLittleEndian(0); - - try (ArrowStreamReader reader = - new ArrowStreamReader( - new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) { - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(encodedSchema, readSchema); - assertEquals(2, reader.getDictionaryVectors().size()); - assertTrue(reader.loadNextBatch()); - assertTrue(reader.loadNextBatch()); - assertFalse(reader.loadNextBatch()); - } - - batches.forEach(batch -> batch.close()); - } - - private List createRecordBatches() { - List batches = new ArrayList<>(); - - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - provider.put(dictionary1); - provider.put(dictionary2); - - VarCharVector vectorA1 = newVarCharVector("varcharA1", allocator); - vectorA1.allocateNewSafe(); - vectorA1.set(0, "foo".getBytes(StandardCharsets.UTF_8)); - vectorA1.set(1, "bar".getBytes(StandardCharsets.UTF_8)); - vectorA1.set(3, "baz".getBytes(StandardCharsets.UTF_8)); - vectorA1.set(4, "bar".getBytes(StandardCharsets.UTF_8)); - vectorA1.set(5, "baz".getBytes(StandardCharsets.UTF_8)); - vectorA1.setValueCount(6); - - VarCharVector vectorA2 = newVarCharVector("varcharA2", allocator); - vectorA2.setValueCount(6); - FieldVector encodedVectorA1 = (FieldVector) DictionaryEncoder.encode(vectorA1, dictionary1); - vectorA1.close(); - FieldVector encodedVectorA2 = (FieldVector) DictionaryEncoder.encode(vectorA1, dictionary2); - vectorA2.close(); - - List fields = Arrays.asList(encodedVectorA1.getField(), encodedVectorA2.getField()); - List vectors = Collections2.asImmutableList(encodedVectorA1, encodedVectorA2); - VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, encodedVectorA1.getValueCount()); - VectorUnloader unloader = new VectorUnloader(root); - batches.add(unloader.getRecordBatch()); - root.close(); - - VarCharVector vectorB1 = newVarCharVector("varcharB1", allocator); - vectorB1.setValueCount(6); - - VarCharVector vectorB2 = newVarCharVector("varcharB2", allocator); - vectorB2.allocateNew(); - vectorB2.setValueCount(6); - vectorB2.set(0, "aa".getBytes(StandardCharsets.UTF_8)); - vectorB2.set(1, "aa".getBytes(StandardCharsets.UTF_8)); - vectorB2.set(3, "bb".getBytes(StandardCharsets.UTF_8)); - vectorB2.set(4, "bb".getBytes(StandardCharsets.UTF_8)); - vectorB2.set(5, "cc".getBytes(StandardCharsets.UTF_8)); - vectorB2.setValueCount(6); - FieldVector encodedVectorB1 = (FieldVector) DictionaryEncoder.encode(vectorB1, dictionary1); - vectorB1.close(); - FieldVector encodedVectorB2 = (FieldVector) DictionaryEncoder.encode(vectorB2, dictionary2); - vectorB2.close(); - - List fieldsB = Arrays.asList(encodedVectorB1.getField(), encodedVectorB2.getField()); - List vectorsB = Collections2.asImmutableList(encodedVectorB1, encodedVectorB2); - VectorSchemaRoot rootB = new VectorSchemaRoot(fieldsB, vectorsB, 6); - VectorUnloader unloaderB = new VectorUnloader(rootB); - batches.add(unloaderB.getRecordBatch()); - rootB.close(); - - List schemaFields = new ArrayList<>(); - schemaFields.add( - DictionaryUtility.toMessageFormat(encodedVectorA1.getField(), provider, new HashSet<>())); - schemaFields.add( - DictionaryUtility.toMessageFormat(encodedVectorA2.getField(), provider, new HashSet<>())); - schema = new Schema(schemaFields); - - encodedSchema = - new Schema(Arrays.asList(encodedVectorA1.getField(), encodedVectorA2.getField())); - - return batches; - } - - @Test - public void testLegacyIpcBackwardsCompatibility() throws Exception { - Schema schema = new Schema(asList(Field.nullable("field", new ArrowType.Int(32, true)))); - IntVector vector = new IntVector("vector", allocator); - final int valueCount = 2; - vector.setValueCount(valueCount); - vector.setSafe(0, 1); - vector.setSafe(1, 2); - ArrowRecordBatch batch = - new ArrowRecordBatch( - valueCount, - asList(new ArrowFieldNode(valueCount, 0)), - asList(vector.getValidityBuffer(), vector.getDataBuffer())); - - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - WriteChannel out = new WriteChannel(newChannel(outStream)); - - // write legacy ipc format - IpcOption option = new IpcOption(true, MetadataVersion.DEFAULT); - MessageSerializer.serialize(out, schema, option); - MessageSerializer.serialize(out, batch); - - ReadChannel in = new ReadChannel(newChannel(new ByteArrayInputStream(outStream.toByteArray()))); - Schema readSchema = MessageSerializer.deserializeSchema(in); - assertEquals(schema, readSchema); - ArrowRecordBatch readBatch = MessageSerializer.deserializeRecordBatch(in, allocator); - assertEquals(batch.getLength(), readBatch.getLength()); - assertEquals(batch.computeBodyLength(), readBatch.computeBodyLength()); - readBatch.close(); - - // write ipc format with continuation - option = IpcOption.DEFAULT; - MessageSerializer.serialize(out, schema, option); - MessageSerializer.serialize(out, batch); - - ReadChannel in2 = - new ReadChannel(newChannel(new ByteArrayInputStream(outStream.toByteArray()))); - Schema readSchema2 = MessageSerializer.deserializeSchema(in2); - assertEquals(schema, readSchema2); - ArrowRecordBatch readBatch2 = MessageSerializer.deserializeRecordBatch(in2, allocator); - assertEquals(batch.getLength(), readBatch2.getLength()); - assertEquals(batch.computeBodyLength(), readBatch2.computeBodyLength()); - readBatch2.close(); - - batch.close(); - vector.close(); - } - - @Test - public void testChannelReadFully() throws IOException { - final ByteBuffer buf = ByteBuffer.allocate(4).order(ByteOrder.nativeOrder()); - buf.putInt(200); - buf.rewind(); - - try (ReadChannel channel = - new ReadChannel(Channels.newChannel(new ByteArrayInputStream(buf.array()))); - ArrowBuf arrBuf = allocator.buffer(8)) { - arrBuf.setInt(0, 100); - arrBuf.writerIndex(4); - assertEquals(4, arrBuf.writerIndex()); - - long n = channel.readFully(arrBuf, 4); - assertEquals(4, n); - assertEquals(8, arrBuf.writerIndex()); - - assertEquals(100, arrBuf.getInt(0)); - assertEquals(200, arrBuf.getInt(4)); - } - } - - @Test - public void testChannelReadFullyEos() throws IOException { - final ByteBuffer buf = ByteBuffer.allocate(4).order(ByteOrder.nativeOrder()); - buf.putInt(10); - buf.rewind(); - - try (ReadChannel channel = - new ReadChannel(Channels.newChannel(new ByteArrayInputStream(buf.array()))); - ArrowBuf arrBuf = allocator.buffer(8)) { - int n = channel.readFully(arrBuf.nioBuffer(0, 8)); - assertEquals(4, n); - - // the input has only 4 bytes, so the number of bytes read should be 4 - assertEquals(4, channel.bytesRead()); - - // the first 4 bytes have been read successfully. - assertEquals(10, arrBuf.getInt(0)); - } - } - - @Test - public void testCustomMetaData() throws IOException { - - VarCharVector vector = newVarCharVector("varchar1", allocator); - - List fields = Arrays.asList(vector.getField()); - List vectors = Collections2.asImmutableList(vector); - Map metadata = new HashMap<>(); - metadata.put("key1", "value1"); - metadata.put("key2", "value2"); - try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount()); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out), metadata); ) { - - writer.start(); - writer.end(); - - try (SeekableReadChannel channel = - new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(out.toByteArray())); - ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - reader.getVectorSchemaRoot(); - - Map readMeta = reader.getMetaData(); - assertEquals(2, readMeta.size()); - assertEquals("value1", readMeta.get("key1")); - assertEquals("value2", readMeta.get("key2")); - } - } - } - - /** - * This test case covers the case for which the footer size is extremely large (much larger than - * the file size). Due to integer overflow, our implementation fails detect the problem, which - * leads to extremely large memory allocation and eventually causing an OutOfMemoryError. - */ - @Test - public void testFileFooterSizeOverflow() { - // copy of org.apache.arrow.vector.ipc.ArrowMagic#MAGIC - final byte[] magicBytes = "ARROW1".getBytes(StandardCharsets.UTF_8); - - // prepare input data - byte[] data = new byte[30]; - System.arraycopy(magicBytes, 0, data, 0, ArrowMagic.MAGIC_LENGTH); - int footerLength = Integer.MAX_VALUE; - byte[] footerLengthBytes = - ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(footerLength).array(); - int footerOffset = data.length - ArrowMagic.MAGIC_LENGTH - 4; - System.arraycopy(footerLengthBytes, 0, data, footerOffset, 4); - System.arraycopy(magicBytes, 0, data, footerOffset + 4, ArrowMagic.MAGIC_LENGTH); - - // test file reader - InvalidArrowFileException e = - assertThrows( - InvalidArrowFileException.class, - () -> { - try (SeekableReadChannel channel = - new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(data)); - ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - reader.getVectorSchemaRoot().getSchema(); - } - }); - - assertEquals("invalid footer length: " + footerLength, e.getMessage()); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java deleted file mode 100644 index aa6ceffa0605e..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.util.Collections; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -public class TestArrowStream extends BaseFileTest { - @Test - public void testEmptyStream() throws IOException { - Schema schema = MessageSerializerTest.testSchema(); - VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator); - - // Write the stream. - ByteArrayOutputStream out = new ByteArrayOutputStream(); - ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out); - writer.close(); - assertTrue(out.size() > 0); - - ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); - try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) { - assertEquals(schema, reader.getVectorSchemaRoot().getSchema()); - // Empty should return false - assertFalse(reader.loadNextBatch()); - assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); - assertFalse(reader.loadNextBatch()); - assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); - } - } - - @Test - public void testStreamZeroLengthBatch() throws IOException { - ByteArrayOutputStream os = new ByteArrayOutputStream(); - - try (IntVector vector = new IntVector("foo", allocator); ) { - Schema schema = new Schema(Collections.singletonList(vector.getField())); - try (VectorSchemaRoot root = - new VectorSchemaRoot( - schema, Collections.singletonList(vector), vector.getValueCount()); - ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(os)); ) { - vector.setValueCount(0); - root.setRowCount(0); - writer.writeBatch(); - writer.end(); - } - } - - ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray()); - - try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator); ) { - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - IntVector vector = (IntVector) root.getFieldVectors().get(0); - reader.loadNextBatch(); - assertEquals(0, vector.getValueCount()); - assertEquals(0, root.getRowCount()); - } - } - - @Test - public void testReadWrite() throws IOException { - Schema schema = MessageSerializerTest.testSchema(); - try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - int numBatches = 1; - - root.getFieldVectors().get(0).allocateNew(); - TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0); - for (int i = 0; i < 16; i++) { - vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1)); - } - vector.setValueCount(16); - root.setRowCount(16); - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - long bytesWritten = 0; - try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out)) { - writer.start(); - for (int i = 0; i < numBatches; i++) { - writer.writeBatch(); - } - writer.end(); - bytesWritten = writer.bytesWritten(); - } - - ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); - try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) { - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(schema, readSchema); - for (int i = 0; i < numBatches; i++) { - assertTrue(reader.loadNextBatch()); - } - // TODO figure out why reader isn't getting padding bytes - assertEquals(bytesWritten, reader.bytesRead() + 8); - assertFalse(reader.loadNextBatch()); - assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); - } - } - } - - @Test - public void testReadWriteMultipleBatches() throws IOException { - ByteArrayOutputStream os = new ByteArrayOutputStream(); - - try (IntVector vector = new IntVector("foo", allocator); ) { - Schema schema = new Schema(Collections.singletonList(vector.getField())); - try (VectorSchemaRoot root = - new VectorSchemaRoot( - schema, Collections.singletonList(vector), vector.getValueCount()); - ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(os)); ) { - writeBatchData(writer, vector, root); - } - } - - ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray()); - - try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator); ) { - IntVector vector = (IntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0); - validateBatchData(reader, vector); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java deleted file mode 100644 index c771d728423d1..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - -import java.io.IOException; -import java.nio.channels.Pipe; -import java.nio.channels.ReadableByteChannel; -import java.nio.channels.WritableByteChannel; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -public class TestArrowStreamPipe { - Schema schema = MessageSerializerTest.testSchema(); - BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE); - - private final class WriterThread extends Thread { - - private final int numBatches; - private final ArrowStreamWriter writer; - private final VectorSchemaRoot root; - - public WriterThread(int numBatches, WritableByteChannel sinkChannel) throws IOException { - this.numBatches = numBatches; - BufferAllocator allocator = alloc.newChildAllocator("writer thread", 0, Integer.MAX_VALUE); - root = VectorSchemaRoot.create(schema, allocator); - writer = new ArrowStreamWriter(root, null, sinkChannel); - } - - @Override - public void run() { - try { - writer.start(); - for (int j = 0; j < numBatches; j++) { - root.getFieldVectors().get(0).allocateNew(); - TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0); - // Send a changing batch id first - vector.set(0, j); - for (int i = 1; i < 16; i++) { - vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1)); - } - vector.setValueCount(16); - root.setRowCount(16); - - writer.writeBatch(); - } - writer.close(); - root.close(); - } catch (IOException e) { - e.printStackTrace(); - fail(e.toString()); // have to explicitly fail since we're in a separate thread - } - } - - public long bytesWritten() { - return writer.bytesWritten(); - } - } - - private final class ReaderThread extends Thread { - private int batchesRead = 0; - private final ArrowStreamReader reader; - private final BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE); - private boolean done = false; - - public ReaderThread(ReadableByteChannel sourceChannel) throws IOException { - reader = - new ArrowStreamReader(sourceChannel, alloc) { - - @Override - public boolean loadNextBatch() throws IOException { - if (super.loadNextBatch()) { - batchesRead++; - } else { - done = true; - return false; - } - VectorSchemaRoot root = getVectorSchemaRoot(); - assertEquals(16, root.getRowCount()); - TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0); - assertEquals((byte) (batchesRead - 1), vector.get(0)); - for (int i = 1; i < 16; i++) { - if (i < 8) { - assertEquals((byte) (i + 1), vector.get(i)); - } else { - assertTrue(vector.isNull(i)); - } - } - - return true; - } - }; - } - - @Override - public void run() { - try { - assertEquals(schema, reader.getVectorSchemaRoot().getSchema()); - while (!done) { - assertTrue(reader.loadNextBatch() != done); - } - reader.close(); - } catch (IOException e) { - e.printStackTrace(); - fail(e.toString()); // have to explicitly fail since we're in a separate thread - } - } - - public int getBatchesRead() { - return batchesRead; - } - - public long bytesRead() { - return reader.bytesRead(); - } - } - - // Starts up a producer and consumer thread to read/write batches. - @Test - public void pipeTest() throws IOException, InterruptedException { - final int NUM_BATCHES = 10; - Pipe pipe = Pipe.open(); - WriterThread writer = new WriterThread(NUM_BATCHES, pipe.sink()); - ReaderThread reader = new ReaderThread(pipe.source()); - - writer.start(); - reader.start(); - reader.join(); - writer.join(); - - assertEquals(NUM_BATCHES, reader.getBatchesRead()); - assertEquals(writer.bytesWritten(), reader.bytesRead()); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java deleted file mode 100644 index 8037212aaea21..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; - -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; -import org.apache.arrow.vector.complex.writer.BaseWriter; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.Validator; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestJSONFile extends BaseFileTest { - private static final Logger LOGGER = LoggerFactory.getLogger(TestJSONFile.class); - - @Test - public void testNoBatches() throws IOException { - File file = new File("target/no_batches.json"); - - try (BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - BaseWriter.ComplexWriter writer = new ComplexWriterImpl("root", parent); - BaseWriter.StructWriter rootWriter = writer.rootAsStruct(); - rootWriter.integer("int"); - rootWriter.uInt1("uint1"); - rootWriter.bigInt("bigInt"); - rootWriter.float4("float"); - JsonFileWriter jsonWriter = new JsonFileWriter(file, JsonFileWriter.config().pretty(true)); - jsonWriter.start(new VectorSchemaRoot(parent.getChild("root")).getSchema(), null); - jsonWriter.close(); - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - } - } - - @Test - public void testWriteRead() throws IOException { - File file = new File("target/mytest.json"); - int count = COUNT; - - // write - try (BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - writeData(count, parent); - writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null); - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateContent(count, root); - } - } - } - - @Test - public void testWriteReadComplexJSON() throws IOException { - File file = new File("target/mytest_complex.json"); - int count = COUNT; - - // write - try (BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - writeComplexData(count, parent); - writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null); - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator); ) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateComplexContent(count, root); - } - } - } - - @Test - public void testWriteComplexJSON() throws IOException { - File file = new File("target/mytest_write_complex.json"); - int count = COUNT; - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", vectorAllocator)) { - writeComplexData(count, parent); - VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); - validateComplexContent(root.getRowCount(), root); - writeJSON(file, root, null); - } - } - - public void writeJSON(File file, VectorSchemaRoot root, DictionaryProvider provider) - throws IOException { - JsonFileWriter writer = new JsonFileWriter(file, JsonFileWriter.config().pretty(true)); - writer.start(root.getSchema(), provider); - writer.write(root); - writer.close(); - } - - @Test - public void testWriteReadUnionJSON() throws IOException { - File file = new File("target/mytest_write_union.json"); - int count = COUNT; - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", vectorAllocator)) { - writeUnionData(count, parent); - printVectors(parent.getChildrenFromFields()); - - try (VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"))) { - validateUnionData(count, root); - writeJSON(file, root, null); - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE)) { - JsonFileReader reader = new JsonFileReader(file, readerAllocator); - - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - try (VectorSchemaRoot rootFromJson = reader.read(); ) { - validateUnionData(count, rootFromJson); - Validator.compareVectorSchemaRoot(root, rootFromJson); - } - } - } - } - } - - @Test - public void testWriteReadDateTimeJSON() throws IOException { - File file = new File("target/mytest_datetime.json"); - int count = COUNT; - - // write - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", vectorAllocator)) { - - writeDateTimeData(count, parent); - - printVectors(parent.getChildrenFromFields()); - - VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); - validateDateTimeContent(count, root); - - writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null); - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateDateTimeContent(count, root); - } - } - } - - @Test - public void testWriteReadDictionaryJSON() throws IOException { - File file = new File("target/mytest_dictionary.json"); - - // write - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE)) { - MapDictionaryProvider provider = new MapDictionaryProvider(); - - try (VectorSchemaRoot root = writeFlatDictionaryData(vectorAllocator, provider)) { - printVectors(root.getFieldVectors()); - validateFlatDictionary(root, provider); - writeJSON(file, root, provider); - } - - // Need to close dictionary vectors - for (long id : provider.getDictionaryIds()) { - provider.lookup(id).getVector().close(); - } - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateFlatDictionary(root, reader); - } - } - } - - @Test - public void testWriteReadNestedDictionaryJSON() throws IOException { - File file = new File("target/mytest_dict_nested.json"); - - // data being written: - // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]] - - // write - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE)) { - MapDictionaryProvider provider = new MapDictionaryProvider(); - - try (VectorSchemaRoot root = writeNestedDictionaryData(vectorAllocator, provider)) { - printVectors(root.getFieldVectors()); - validateNestedDictionary(root, provider); - writeJSON(file, root, provider); - } - - // Need to close dictionary vectors - for (long id : provider.getDictionaryIds()) { - provider.lookup(id).getVector().close(); - } - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateNestedDictionary(root, reader); - } - } - } - - @Test - public void testWriteReadDecimalJSON() throws IOException { - File file = new File("target/mytest_decimal.json"); - - // write - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - VectorSchemaRoot root = writeDecimalData(vectorAllocator)) { - printVectors(root.getFieldVectors()); - validateDecimalData(root); - writeJSON(file, root, null); - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateDecimalData(root); - } - } - } - - @Test - public void testSetStructLength() throws IOException { - File file = new File("../../docs/source/format/integration_json_examples/struct.json"); - if (!file.exists()) { - file = new File("../docs/source/format/integration_json_examples/struct.json"); - } - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - FieldVector vector = root.getVector("struct_nullable"); - assertEquals(7, vector.getValueCount()); - } - } - } - - @Test - public void testWriteReadVarBinJSON() throws IOException { - File file = new File("target/mytest_varbin.json"); - int count = COUNT; - - // write - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - StructVector parent = StructVector.empty("parent", vectorAllocator)) { - writeVarBinaryData(count, parent); - VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); - validateVarBinary(count, root); - writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null); - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateVarBinary(count, root); - } - } - } - - @Test - public void testWriteReadMapJSON() throws IOException { - File file = new File("target/mytest_map.json"); - - // write - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - VectorSchemaRoot root = writeMapData(vectorAllocator)) { - printVectors(root.getFieldVectors()); - validateMapData(root); - writeJSON(file, root, null); - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateMapData(root); - } - } - } - - @Test - public void testWriteReadNullJSON() throws IOException { - File file = new File("target/mytest_null.json"); - int valueCount = 10; - - // write - try (BufferAllocator vectorAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - VectorSchemaRoot root = writeNullData(valueCount)) { - printVectors(root.getFieldVectors()); - validateNullData(root, valueCount); - writeJSON(file, root, null); - } - - // read - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE); - JsonFileReader reader = new JsonFileReader(file, readerAllocator)) { - - Schema schema = reader.start(); - LOGGER.debug("reading schema: " + schema); - - // initialize vectors - try (VectorSchemaRoot root = reader.read(); ) { - validateNullData(root, valueCount); - } - } - } - - /** Regression test for ARROW-17107. */ - @Test - public void testRoundtripEmptyVector() throws Exception { - final List fields = - Arrays.asList( - Field.nullable("utf8", ArrowType.Utf8.INSTANCE), - Field.nullable("largeutf8", ArrowType.LargeUtf8.INSTANCE), - Field.nullable("binary", ArrowType.Binary.INSTANCE), - Field.nullable("largebinary", ArrowType.LargeBinary.INSTANCE), - Field.nullable("fixedsizebinary", new ArrowType.FixedSizeBinary(2)), - Field.nullable("decimal128", new ArrowType.Decimal(3, 2, 128)), - Field.nullable("decimal128", new ArrowType.Decimal(3, 2, 256)), - new Field( - "list", - FieldType.nullable(ArrowType.List.INSTANCE), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - new Field( - "listview", - FieldType.nullable(ArrowType.ListView.INSTANCE), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - new Field( - "largelist", - FieldType.nullable(ArrowType.LargeList.INSTANCE), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - new Field( - "largelistview", - FieldType.nullable(ArrowType.LargeListView.INSTANCE), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - new Field( - "map", - FieldType.nullable(new ArrowType.Map(/*keyssorted*/ false)), - Collections.singletonList( - new Field( - "items", - FieldType.notNullable(ArrowType.Struct.INSTANCE), - Arrays.asList( - Field.notNullable("keys", new ArrowType.Int(32, true)), - Field.nullable("values", new ArrowType.Int(32, true)))))), - new Field( - "fixedsizelist", - FieldType.nullable(new ArrowType.FixedSizeList(2)), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - new Field( - "denseunion", - FieldType.nullable(new ArrowType.Union(UnionMode.Dense, new int[] {0})), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))), - new Field( - "sparseunion", - FieldType.nullable(new ArrowType.Union(UnionMode.Sparse, new int[] {0})), - Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true))))); - - for (final Field field : fields) { - final Schema schema = new Schema(Collections.singletonList(field)); - try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - Path outputPath = Files.createTempFile("arrow-", ".json"); - File outputFile = outputPath.toFile(); - outputFile.deleteOnExit(); - - // Try with no allocation - try (final JsonFileWriter jsonWriter = - new JsonFileWriter(outputFile, JsonFileWriter.config().pretty(true))) { - jsonWriter.start(schema, null); - jsonWriter.write(root); - } catch (Exception e) { - throw new RuntimeException("Test failed for empty vector of type " + field, e); - } - - try (JsonFileReader reader = new JsonFileReader(outputFile, allocator)) { - final Schema readSchema = reader.start(); - assertEquals(schema, readSchema); - try (final VectorSchemaRoot data = reader.read()) { - assertNotNull(data); - assertEquals(0, data.getRowCount()); - } - assertNull(reader.read()); - } - - // Try with an explicit allocation - root.allocateNew(); - root.setRowCount(0); - try (final JsonFileWriter jsonWriter = - new JsonFileWriter(outputFile, JsonFileWriter.config().pretty(true))) { - jsonWriter.start(schema, null); - jsonWriter.write(root); - } catch (Exception e) { - throw new RuntimeException("Test failed for empty vector of type " + field, e); - } - - try (JsonFileReader reader = new JsonFileReader(outputFile, allocator)) { - final Schema readSchema = reader.start(); - assertEquals(schema, readSchema); - try (final VectorSchemaRoot data = reader.read()) { - assertNotNull(data); - assertEquals(0, data.getRowCount()); - } - assertNull(reader.read()); - } - } - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java deleted file mode 100644 index 65a3791dd4ab1..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java +++ /dev/null @@ -1,720 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assumptions.assumeTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.BiConsumer; -import java.util.stream.Stream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.VectorUnloader; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowBlock; -import org.apache.arrow.vector.ipc.message.ArrowBuffer; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.IpcOption; -import org.apache.arrow.vector.ipc.message.MessageMetadataResult; -import org.apache.arrow.vector.ipc.message.MessageSerializer; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.MetadataVersion; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestRoundTrip extends BaseFileTest { - private static final Logger LOGGER = LoggerFactory.getLogger(TestRoundTrip.class); - private static BufferAllocator allocator; - - static Stream getWriteOption() { - final IpcOption legacy = new IpcOption(true, MetadataVersion.V4); - final IpcOption version4 = new IpcOption(false, MetadataVersion.V4); - return Stream.of( - new Object[] {"V4Legacy", legacy}, - new Object[] {"V4", version4}, - new Object[] {"V5", IpcOption.DEFAULT}); - } - - @BeforeAll - public static void setUpClass() { - allocator = new RootAllocator(Integer.MAX_VALUE); - } - - @AfterAll - public static void tearDownClass() { - allocator.close(); - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testStruct(String name, IpcOption writeOption) throws Exception { - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - writeData(COUNT, parent); - roundTrip( - name, - writeOption, - new VectorSchemaRoot(parent.getChild("root")), - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches(new int[] {COUNT}, this::validateContent), - validateStreamBatches(new int[] {COUNT}, this::validateContent)); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testComplex(String name, IpcOption writeOption) throws Exception { - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - writeComplexData(COUNT, parent); - roundTrip( - name, - writeOption, - new VectorSchemaRoot(parent.getChild("root")), - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches(new int[] {COUNT}, this::validateComplexContent), - validateStreamBatches(new int[] {COUNT}, this::validateComplexContent)); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testMultipleRecordBatches(String name, IpcOption writeOption) throws Exception { - int[] counts = {10, 5}; - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - writeData(counts[0], parent); - roundTrip( - name, - writeOption, - new VectorSchemaRoot(parent.getChild("root")), - /* dictionaryProvider */ null, - (root, writer) -> { - writer.start(); - parent.allocateNew(); - writeData(counts[0], parent); - root.setRowCount(counts[0]); - writer.writeBatch(); - - parent.allocateNew(); - // if we write the same data we don't catch that the metadata is stored in the wrong - // order. - writeData(counts[1], parent); - root.setRowCount(counts[1]); - writer.writeBatch(); - - writer.end(); - }, - validateFileBatches(counts, this::validateContent), - validateStreamBatches(counts, this::validateContent)); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testUnionV4(String name, IpcOption writeOption) throws Exception { - assumeTrue(writeOption.metadataVersion == MetadataVersion.V4); - final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow"); - temp.deleteOnExit(); - final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream(); - - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - writeUnionData(COUNT, parent); - final VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); - IllegalArgumentException e = - assertThrows( - IllegalArgumentException.class, - () -> { - try (final FileOutputStream fileStream = new FileOutputStream(temp)) { - new ArrowFileWriter(root, null, fileStream.getChannel(), writeOption); - new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption); - } - }); - assertTrue(e.getMessage().contains("Cannot write union with V4 metadata"), e.getMessage()); - e = - assertThrows( - IllegalArgumentException.class, - () -> { - new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption); - }); - assertTrue(e.getMessage().contains("Cannot write union with V4 metadata"), e.getMessage()); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testUnionV5(String name, IpcOption writeOption) throws Exception { - assumeTrue(writeOption.metadataVersion == MetadataVersion.V5); - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - writeUnionData(COUNT, parent); - VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); - validateUnionData(COUNT, root); - roundTrip( - name, - writeOption, - root, - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches(new int[] {COUNT}, this::validateUnionData), - validateStreamBatches(new int[] {COUNT}, this::validateUnionData)); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testTiny(String name, IpcOption writeOption) throws Exception { - try (final VectorSchemaRoot root = - VectorSchemaRoot.create(MessageSerializerTest.testSchema(), allocator)) { - root.getFieldVectors().get(0).allocateNew(); - int count = 16; - TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0); - for (int i = 0; i < count; i++) { - vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1)); - } - vector.setValueCount(count); - root.setRowCount(count); - - roundTrip( - name, - writeOption, - root, - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches(new int[] {count}, this::validateTinyData), - validateStreamBatches(new int[] {count}, this::validateTinyData)); - } - } - - private void validateTinyData(int count, VectorSchemaRoot root) { - assertEquals(count, root.getRowCount()); - TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0); - for (int i = 0; i < count; i++) { - if (i < 8) { - assertEquals((byte) (i + 1), vector.get(i)); - } else { - assertTrue(vector.isNull(i)); - } - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testMetadata(String name, IpcOption writeOption) throws Exception { - List childFields = new ArrayList<>(); - childFields.add( - new Field( - "varchar-child", - new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata(1)), - null)); - childFields.add( - new Field( - "float-child", - new FieldType( - true, - new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), - null, - metadata(2)), - null)); - childFields.add( - new Field( - "int-child", - new FieldType(false, new ArrowType.Int(32, true), null, metadata(3)), - null)); - childFields.add( - new Field( - "list-child", - new FieldType(true, ArrowType.List.INSTANCE, null, metadata(4)), - Collections2.asImmutableList( - new Field("l1", FieldType.nullable(new ArrowType.Int(16, true)), null)))); - Field field = - new Field( - "meta", new FieldType(true, ArrowType.Struct.INSTANCE, null, metadata(0)), childFields); - Map metadata = new HashMap<>(); - metadata.put("s1", "v1"); - metadata.put("s2", "v2"); - Schema originalSchema = new Schema(Collections2.asImmutableList(field), metadata); - assertEquals(metadata, originalSchema.getCustomMetadata()); - - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector vector = (StructVector) field.createVector(originalVectorAllocator)) { - vector.allocateNewSafe(); - vector.setValueCount(0); - - List vectors = Collections2.asImmutableList(vector); - VectorSchemaRoot root = new VectorSchemaRoot(originalSchema, vectors, 0); - - BiConsumer validate = - (count, readRoot) -> { - Schema schema = readRoot.getSchema(); - assertEquals(originalSchema, schema); - assertEquals(originalSchema.getCustomMetadata(), schema.getCustomMetadata()); - Field top = schema.getFields().get(0); - assertEquals(metadata(0), top.getMetadata()); - for (int i = 0; i < 4; i++) { - assertEquals(metadata(i + 1), top.getChildren().get(i).getMetadata()); - } - }; - roundTrip( - name, - writeOption, - root, - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches(new int[] {0}, validate), - validateStreamBatches(new int[] {0}, validate)); - } - } - - private Map metadata(int i) { - Map map = new HashMap<>(); - map.put("k_" + i, "v_" + i); - map.put("k2_" + i, "v2_" + i); - return Collections.unmodifiableMap(map); - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testFlatDictionary(String name, IpcOption writeOption) throws Exception { - AtomicInteger numDictionaryBlocksWritten = new AtomicInteger(); - MapDictionaryProvider provider = new MapDictionaryProvider(); - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final VectorSchemaRoot root = writeFlatDictionaryData(originalVectorAllocator, provider)) { - roundTrip( - name, - writeOption, - root, - provider, - (ignored, writer) -> { - writer.start(); - writer.writeBatch(); - writer.end(); - if (writer instanceof ArrowFileWriter) { - numDictionaryBlocksWritten.set( - ((ArrowFileWriter) writer).getDictionaryBlocks().size()); - } - }, - (fileReader) -> { - VectorSchemaRoot readRoot = fileReader.getVectorSchemaRoot(); - Schema schema = readRoot.getSchema(); - LOGGER.debug("reading schema: " + schema); - assertTrue(fileReader.loadNextBatch()); - validateFlatDictionary(readRoot, fileReader); - assertEquals(numDictionaryBlocksWritten.get(), fileReader.getDictionaryBlocks().size()); - }, - (streamReader) -> { - VectorSchemaRoot readRoot = streamReader.getVectorSchemaRoot(); - Schema schema = readRoot.getSchema(); - LOGGER.debug("reading schema: " + schema); - assertTrue(streamReader.loadNextBatch()); - validateFlatDictionary(readRoot, streamReader); - }); - - // Need to close dictionary vectors - for (long id : provider.getDictionaryIds()) { - provider.lookup(id).getVector().close(); - } - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testNestedDictionary(String name, IpcOption writeOption) throws Exception { - AtomicInteger numDictionaryBlocksWritten = new AtomicInteger(); - MapDictionaryProvider provider = new MapDictionaryProvider(); - // data being written: - // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]] - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final VectorSchemaRoot root = - writeNestedDictionaryData(originalVectorAllocator, provider)) { - CheckedConsumer validateDictionary = - (streamReader) -> { - VectorSchemaRoot readRoot = streamReader.getVectorSchemaRoot(); - Schema schema = readRoot.getSchema(); - LOGGER.debug("reading schema: " + schema); - assertTrue(streamReader.loadNextBatch()); - validateNestedDictionary(readRoot, streamReader); - }; - roundTrip( - name, - writeOption, - root, - provider, - (ignored, writer) -> { - writer.start(); - writer.writeBatch(); - writer.end(); - if (writer instanceof ArrowFileWriter) { - numDictionaryBlocksWritten.set( - ((ArrowFileWriter) writer).getDictionaryBlocks().size()); - } - }, - validateDictionary, - validateDictionary); - - // Need to close dictionary vectors - for (long id : provider.getDictionaryIds()) { - provider.lookup(id).getVector().close(); - } - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testFixedSizeBinary(String name, IpcOption writeOption) throws Exception { - final int count = 10; - final int typeWidth = 11; - byte[][] byteValues = new byte[count][typeWidth]; - for (int i = 0; i < count; i++) { - for (int j = 0; j < typeWidth; j++) { - byteValues[i][j] = ((byte) i); - } - } - - BiConsumer validator = - (expectedCount, root) -> { - for (int i = 0; i < expectedCount; i++) { - assertArrayEquals( - byteValues[i], ((byte[]) root.getVector("fixed-binary").getObject(i))); - } - }; - - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - FixedSizeBinaryVector fixedSizeBinaryVector = - parent.addOrGet( - "fixed-binary", - FieldType.nullable(new ArrowType.FixedSizeBinary(typeWidth)), - FixedSizeBinaryVector.class); - parent.allocateNew(); - for (int i = 0; i < count; i++) { - fixedSizeBinaryVector.set(i, byteValues[i]); - } - parent.setValueCount(count); - - roundTrip( - name, - writeOption, - new VectorSchemaRoot(parent), - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches(new int[] {count}, validator), - validateStreamBatches(new int[] {count}, validator)); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testFixedSizeList(String name, IpcOption writeOption) throws Exception { - BiConsumer validator = - (expectedCount, root) -> { - for (int i = 0; i < expectedCount; i++) { - assertEquals( - Collections2.asImmutableList(i + 0.1f, i + 10.1f), - root.getVector("float-pairs").getObject(i)); - assertEquals(i, root.getVector("ints").getObject(i)); - } - }; - - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - FixedSizeListVector tuples = - parent.addOrGet( - "float-pairs", - FieldType.nullable(new ArrowType.FixedSizeList(2)), - FixedSizeListVector.class); - Float4Vector floats = - (Float4Vector) - tuples - .addOrGetVector(FieldType.nullable(Types.MinorType.FLOAT4.getType())) - .getVector(); - IntVector ints = - parent.addOrGet("ints", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - parent.allocateNew(); - for (int i = 0; i < COUNT; i++) { - tuples.setNotNull(i); - floats.set(i * 2, i + 0.1f); - floats.set(i * 2 + 1, i + 10.1f); - ints.set(i, i); - } - parent.setValueCount(COUNT); - - roundTrip( - name, - writeOption, - new VectorSchemaRoot(parent), - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches(new int[] {COUNT}, validator), - validateStreamBatches(new int[] {COUNT}, validator)); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testVarBinary(String name, IpcOption writeOption) throws Exception { - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { - writeVarBinaryData(COUNT, parent); - VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); - validateVarBinary(COUNT, root); - - roundTrip( - name, - writeOption, - root, - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches(new int[] {COUNT}, this::validateVarBinary), - validateStreamBatches(new int[] {COUNT}, this::validateVarBinary)); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testReadWriteMultipleBatches(String name, IpcOption writeOption) throws IOException { - File file = new File("target/mytest_nulls_multibatch.arrow"); - int numBlocksWritten = 0; - - try (IntVector vector = new IntVector("foo", allocator); ) { - Schema schema = new Schema(Collections.singletonList(vector.getField())); - try (FileOutputStream fileOutputStream = new FileOutputStream(file); - VectorSchemaRoot root = - new VectorSchemaRoot( - schema, Collections.singletonList((FieldVector) vector), vector.getValueCount()); - ArrowFileWriter writer = - new ArrowFileWriter(root, null, fileOutputStream.getChannel(), writeOption)) { - writeBatchData(writer, vector, root); - numBlocksWritten = writer.getRecordBlocks().size(); - } - } - - try (FileInputStream fileInputStream = new FileInputStream(file); - ArrowFileReader reader = new ArrowFileReader(fileInputStream.getChannel(), allocator); ) { - IntVector vector = (IntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0); - validateBatchData(reader, vector); - assertEquals(numBlocksWritten, reader.getRecordBlocks().size()); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testMap(String name, IpcOption writeOption) throws Exception { - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final VectorSchemaRoot root = writeMapData(originalVectorAllocator)) { - roundTrip( - name, - writeOption, - root, - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches( - new int[] {root.getRowCount()}, (count, readRoot) -> validateMapData(readRoot)), - validateStreamBatches( - new int[] {root.getRowCount()}, (count, readRoot) -> validateMapData(readRoot))); - } - } - - @ParameterizedTest(name = "options = {0}") - @MethodSource("getWriteOption") - public void testListAsMap(String name, IpcOption writeOption) throws Exception { - try (final BufferAllocator originalVectorAllocator = - allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); - final VectorSchemaRoot root = writeListAsMapData(originalVectorAllocator)) { - roundTrip( - name, - writeOption, - root, - /* dictionaryProvider */ null, - TestRoundTrip::writeSingleBatch, - validateFileBatches( - new int[] {root.getRowCount()}, (count, readRoot) -> validateListAsMapData(readRoot)), - validateStreamBatches( - new int[] {root.getRowCount()}, - (count, readRoot) -> validateListAsMapData(readRoot))); - } - } - - // Generic test helpers - - private static void writeSingleBatch(VectorSchemaRoot root, ArrowWriter writer) - throws IOException { - writer.start(); - writer.writeBatch(); - writer.end(); - } - - private CheckedConsumer validateFileBatches( - int[] counts, BiConsumer validator) { - return (arrowReader) -> { - VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); - VectorUnloader unloader = new VectorUnloader(root); - Schema schema = root.getSchema(); - LOGGER.debug("reading schema: " + schema); - int i = 0; - List recordBatches = arrowReader.getRecordBlocks(); - assertEquals(counts.length, recordBatches.size()); - long previousOffset = 0; - for (ArrowBlock rbBlock : recordBatches) { - assertTrue( - rbBlock.getOffset() > previousOffset, rbBlock.getOffset() + " > " + previousOffset); - previousOffset = rbBlock.getOffset(); - arrowReader.loadRecordBatch(rbBlock); - assertEquals(counts[i], root.getRowCount(), "RB #" + i); - validator.accept(counts[i], root); - try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { - List buffersLayout = batch.getBuffersLayout(); - for (ArrowBuffer arrowBuffer : buffersLayout) { - assertEquals(0, arrowBuffer.getOffset() % 8); - } - } - ++i; - } - }; - } - - private CheckedConsumer validateStreamBatches( - int[] counts, BiConsumer validator) { - return (arrowReader) -> { - VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); - VectorUnloader unloader = new VectorUnloader(root); - Schema schema = root.getSchema(); - LOGGER.debug("reading schema: " + schema); - int i = 0; - - for (int n = 0; n < counts.length; n++) { - assertTrue(arrowReader.loadNextBatch()); - assertEquals(counts[i], root.getRowCount(), "RB #" + i); - validator.accept(counts[i], root); - try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { - final List buffersLayout = batch.getBuffersLayout(); - for (ArrowBuffer arrowBuffer : buffersLayout) { - assertEquals(0, arrowBuffer.getOffset() % 8); - } - } - ++i; - } - assertFalse(arrowReader.loadNextBatch()); - }; - } - - @FunctionalInterface - interface CheckedConsumer { - void accept(T t) throws Exception; - } - - @FunctionalInterface - interface CheckedBiConsumer { - void accept(T t, U u) throws Exception; - } - - private void roundTrip( - String name, - IpcOption writeOption, - VectorSchemaRoot root, - DictionaryProvider provider, - CheckedBiConsumer writer, - CheckedConsumer fileValidator, - CheckedConsumer streamValidator) - throws Exception { - final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow"); - temp.deleteOnExit(); - final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream(); - final Map metadata = new HashMap<>(); - metadata.put("foo", "bar"); - try (final FileOutputStream fileStream = new FileOutputStream(temp); - final ArrowFileWriter fileWriter = - new ArrowFileWriter(root, provider, fileStream.getChannel(), metadata, writeOption); - final ArrowStreamWriter streamWriter = - new ArrowStreamWriter(root, provider, Channels.newChannel(memoryStream), writeOption)) { - writer.accept(root, fileWriter); - writer.accept(root, streamWriter); - } - - MessageMetadataResult metadataResult = - MessageSerializer.readMessage( - new ReadChannel( - Channels.newChannel(new ByteArrayInputStream(memoryStream.toByteArray())))); - assertNotNull(metadataResult); - assertEquals(writeOption.metadataVersion.toFlatbufID(), metadataResult.getMessage().version()); - - try (BufferAllocator readerAllocator = - allocator.newChildAllocator("reader", 0, allocator.getLimit()); - FileInputStream fileInputStream = new FileInputStream(temp); - ByteArrayInputStream inputStream = new ByteArrayInputStream(memoryStream.toByteArray()); - ArrowFileReader fileReader = - new ArrowFileReader(fileInputStream.getChannel(), readerAllocator); - ArrowStreamReader streamReader = new ArrowStreamReader(inputStream, readerAllocator)) { - fileValidator.accept(fileReader); - streamValidator.accept(streamReader); - assertEquals(writeOption.metadataVersion, fileReader.getFooter().getMetadataVersion()); - assertEquals(metadata, fileReader.getMetaData()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java deleted file mode 100644 index a47d4bd43887e..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc; - -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.channels.Channels; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Map; -import java.util.function.ToIntBiFunction; -import java.util.stream.Stream; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -/** Test the round-trip of dictionary encoding, with unsigned integer as indices. */ -public class TestUIntDictionaryRoundTrip { - - private BufferAllocator allocator; - - private DictionaryProvider.MapDictionaryProvider dictionaryProvider; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - dictionaryProvider = new DictionaryProvider.MapDictionaryProvider(); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - private byte[] writeData(boolean streamMode, FieldVector encodedVector) throws IOException { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - VectorSchemaRoot root = - new VectorSchemaRoot( - Arrays.asList(encodedVector.getField()), - Arrays.asList(encodedVector), - encodedVector.getValueCount()); - try (ArrowWriter writer = - streamMode - ? new ArrowStreamWriter(root, dictionaryProvider, out) - : new ArrowFileWriter(root, dictionaryProvider, Channels.newChannel(out))) { - writer.start(); - writer.writeBatch(); - writer.end(); - - return out.toByteArray(); - } - } - - private void readData( - boolean streamMode, - byte[] data, - Field expectedField, - ToIntBiFunction valGetter, - long dictionaryID, - int[] expectedIndices, - String[] expectedDictItems) - throws IOException { - try (ArrowReader reader = - streamMode - ? new ArrowStreamReader(new ByteArrayInputStream(data), allocator) - : new ArrowFileReader( - new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(data)), - allocator)) { - - // verify schema - Schema readSchema = reader.getVectorSchemaRoot().getSchema(); - assertEquals(1, readSchema.getFields().size()); - assertEquals(expectedField, readSchema.getFields().get(0)); - - // verify vector schema root - assertTrue(reader.loadNextBatch()); - VectorSchemaRoot root = reader.getVectorSchemaRoot(); - - assertEquals(1, root.getFieldVectors().size()); - ValueVector encodedVector = root.getVector(0); - assertEquals(expectedIndices.length, encodedVector.getValueCount()); - - for (int i = 0; i < expectedIndices.length; i++) { - assertEquals(expectedIndices[i], valGetter.applyAsInt(encodedVector, i)); - } - - // verify dictionary - Map dictVectors = reader.getDictionaryVectors(); - assertEquals(1, dictVectors.size()); - Dictionary dictionary = dictVectors.get(dictionaryID); - assertNotNull(dictionary); - - assertTrue(dictionary.getVector() instanceof VarCharVector); - VarCharVector dictVector = (VarCharVector) dictionary.getVector(); - assertEquals(expectedDictItems.length, dictVector.getValueCount()); - for (int i = 0; i < dictVector.getValueCount(); i++) { - assertArrayEquals(expectedDictItems[i].getBytes(StandardCharsets.UTF_8), dictVector.get(i)); - } - } - } - - private ValueVector createEncodedVector(int bitWidth, VarCharVector dictionaryVector) { - final DictionaryEncoding dictionaryEncoding = - new DictionaryEncoding(bitWidth, false, new ArrowType.Int(bitWidth, false)); - Dictionary dictionary = new Dictionary(dictionaryVector, dictionaryEncoding); - dictionaryProvider.put(dictionary); - - final FieldType type = - new FieldType(true, dictionaryEncoding.getIndexType(), dictionaryEncoding, null); - final Field field = new Field("encoded", type, null); - return field.createVector(allocator); - } - - @ParameterizedTest(name = "stream mode = {0}") - @MethodSource("getRepeat") - public void testUInt1RoundTrip(boolean streamMode) throws IOException { - final int vectorLength = UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK; - try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); - UInt1Vector encodedVector1 = (UInt1Vector) createEncodedVector(8, dictionaryVector)) { - int[] indices = new int[vectorLength]; - String[] dictionaryItems = new String[vectorLength]; - for (int i = 0; i < vectorLength; i++) { - encodedVector1.setSafe(i, (byte) i); - indices[i] = i; - dictionaryItems[i] = String.valueOf(i); - } - encodedVector1.setValueCount(vectorLength); - setVector(dictionaryVector, dictionaryItems); - byte[] data = writeData(streamMode, encodedVector1); - readData( - streamMode, - data, - encodedVector1.getField(), - (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index), - 8L, - indices, - dictionaryItems); - } - } - - @ParameterizedTest(name = "stream mode = {0}") - @MethodSource("getRepeat") - public void testUInt2RoundTrip(boolean streamMode) throws IOException { - try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); - UInt2Vector encodedVector2 = (UInt2Vector) createEncodedVector(16, dictionaryVector)) { - int[] indices = new int[] {1, 3, 5, 7, 9, UInt2Vector.MAX_UINT2}; - String[] dictItems = new String[UInt2Vector.MAX_UINT2]; - for (int i = 0; i < UInt2Vector.MAX_UINT2; i++) { - dictItems[i] = String.valueOf(i); - } - - setVector( - encodedVector2, (char) 1, (char) 3, (char) 5, (char) 7, (char) 9, UInt2Vector.MAX_UINT2); - setVector(dictionaryVector, dictItems); - - byte[] data = writeData(streamMode, encodedVector2); - readData( - streamMode, - data, - encodedVector2.getField(), - (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index), - 16L, - indices, - dictItems); - } - } - - @ParameterizedTest(name = "stream mode = {0}") - @MethodSource("getRepeat") - public void testUInt4RoundTrip(boolean streamMode) throws IOException { - final int dictLength = 10; - try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); - UInt4Vector encodedVector4 = (UInt4Vector) createEncodedVector(32, dictionaryVector)) { - int[] indices = new int[] {1, 3, 5, 7, 9}; - String[] dictItems = new String[dictLength]; - for (int i = 0; i < dictLength; i++) { - dictItems[i] = String.valueOf(i); - } - - setVector(encodedVector4, 1, 3, 5, 7, 9); - setVector(dictionaryVector, dictItems); - - setVector(encodedVector4, 1, 3, 5, 7, 9); - byte[] data = writeData(streamMode, encodedVector4); - readData( - streamMode, - data, - encodedVector4.getField(), - (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index), - 32L, - indices, - dictItems); - } - } - - @ParameterizedTest(name = "stream mode = {0}") - @MethodSource("getRepeat") - public void testUInt8RoundTrip(boolean streamMode) throws IOException { - final int dictLength = 10; - try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); - UInt8Vector encodedVector8 = (UInt8Vector) createEncodedVector(64, dictionaryVector)) { - int[] indices = new int[] {1, 3, 5, 7, 9}; - String[] dictItems = new String[dictLength]; - for (int i = 0; i < dictLength; i++) { - dictItems[i] = String.valueOf(i); - } - - setVector(encodedVector8, 1L, 3L, 5L, 7L, 9L); - setVector(dictionaryVector, dictItems); - - byte[] data = writeData(streamMode, encodedVector8); - readData( - streamMode, - data, - encodedVector8.getField(), - (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index), - 64L, - indices, - dictItems); - } - } - - static Stream getRepeat() { - return Stream.of(Arguments.of(true), Arguments.of(false)); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java deleted file mode 100644 index cd5f89b745243..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.ipc.message; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.nio.ByteBuffer; -import org.junit.jupiter.api.Test; - -public class TestMessageMetadataResult { - - @Test - public void getMessageLength_returnsConstructValue() { - // This API is used by spark. - MessageMetadataResult result = - new MessageMetadataResult( - 1, ByteBuffer.allocate(0), new org.apache.arrow.flatbuf.Message()); - assertEquals(1, result.getMessageLength()); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java deleted file mode 100644 index 62e5355d05c8b..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.pojo; - -import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; -import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; - -import com.google.flatbuffers.FlatBufferBuilder; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.util.Collections2; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.apache.arrow.vector.types.pojo.ArrowType.List; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; -import org.apache.arrow.vector.types.pojo.ArrowType.Union; -import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -/** Test conversion between Flatbuf and Pojo field representations. */ -public class TestConvert { - - @Test - public void simple() { - Field initialField = new Field("a", FieldType.nullable(new Int(32, true)), null); - run(initialField); - } - - @Test - public void complex() { - java.util.List children = new ArrayList<>(); - children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null)); - children.add( - new Field( - "child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList())); - - Field initialField = new Field("a", FieldType.nullable(Struct.INSTANCE), children); - run(initialField); - } - - @Test - public void list() throws Exception { - java.util.List children = new ArrayList<>(); - try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - ListVector writeVector = ListVector.empty("list", allocator); - FixedSizeListVector writeFixedVector = - FixedSizeListVector.empty("fixedlist", 5, allocator)) { - Field listVectorField = writeVector.getField(); - children.add(listVectorField); - Field listFixedVectorField = writeFixedVector.getField(); - children.add(listFixedVectorField); - } - - Field initialField = new Field("a", FieldType.nullable(Struct.INSTANCE), children); - java.util.List parent = new ArrayList<>(); - parent.add(initialField); - FlatBufferBuilder builder = new FlatBufferBuilder(); - builder.finish(initialField.getField(builder)); - org.apache.arrow.flatbuf.Field flatBufField = - org.apache.arrow.flatbuf.Field.getRootAsField(builder.dataBuffer()); - Field finalField = Field.convertField(flatBufField); - assertEquals(initialField, finalField); - assertFalse(finalField.toString().contains("[DEFAULT]")); - - Schema initialSchema = new Schema(parent); - String jsonSchema = initialSchema.toJson(); - String modifiedSchema = jsonSchema.replace("$data$", "[DEFAULT]"); - - Schema tempSchema = Schema.fromJSON(modifiedSchema); - FlatBufferBuilder schemaBuilder = new FlatBufferBuilder(); - org.apache.arrow.vector.types.pojo.Schema schema = - new org.apache.arrow.vector.types.pojo.Schema(tempSchema.getFields()); - schemaBuilder.finish(schema.getSchema(schemaBuilder)); - Schema finalSchema = Schema.deserialize(ByteBuffer.wrap(schemaBuilder.sizedByteArray())); - assertFalse(finalSchema.toString().contains("[DEFAULT]")); - } - - @Test - public void schema() { - java.util.List children = new ArrayList<>(); - children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null)); - children.add( - new Field( - "child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList())); - Schema initialSchema = new Schema(children); - run(initialSchema); - } - - @Test - public void schemaMetadata() { - java.util.List children = new ArrayList<>(); - children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null)); - children.add( - new Field( - "child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList())); - Map metadata = new HashMap<>(); - metadata.put("key1", "value1"); - metadata.put("key2", "value2"); - Schema initialSchema = new Schema(children, metadata); - run(initialSchema); - } - - @Test - public void nestedSchema() { - java.util.List children = new ArrayList<>(); - children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null)); - children.add( - new Field( - "child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList())); - children.add( - new Field( - "child3", - FieldType.nullable(new Struct()), - Collections2.asImmutableList( - new Field("child3.1", FieldType.nullable(Utf8.INSTANCE), null), - new Field( - "child3.2", - FieldType.nullable(new FloatingPoint(DOUBLE)), - Collections.emptyList())))); - children.add( - new Field( - "child4", - FieldType.nullable(new List()), - Collections2.asImmutableList( - new Field("child4.1", FieldType.nullable(Utf8.INSTANCE), null)))); - children.add( - new Field( - "child5", - FieldType.nullable( - new Union( - UnionMode.Sparse, - new int[] {MinorType.TIMESTAMPMILLI.ordinal(), MinorType.FLOAT8.ordinal()})), - Collections2.asImmutableList( - new Field( - "child5.1", - FieldType.nullable(new Timestamp(TimeUnit.MILLISECOND, null)), - null), - new Field( - "child5.2", - FieldType.nullable(new FloatingPoint(DOUBLE)), - Collections.emptyList()), - new Field( - "child5.3", - FieldType.nullable(new Timestamp(TimeUnit.MILLISECOND, "UTC")), - null)))); - Schema initialSchema = new Schema(children); - run(initialSchema); - } - - private void run(Field initialField) { - FlatBufferBuilder builder = new FlatBufferBuilder(); - builder.finish(initialField.getField(builder)); - org.apache.arrow.flatbuf.Field flatBufField = - org.apache.arrow.flatbuf.Field.getRootAsField(builder.dataBuffer()); - Field finalField = Field.convertField(flatBufField); - assertEquals(initialField, finalField); - } - - private void run(Schema initialSchema) { - FlatBufferBuilder builder = new FlatBufferBuilder(); - builder.finish(initialSchema.getSchema(builder)); - org.apache.arrow.flatbuf.Schema flatBufSchema = - org.apache.arrow.flatbuf.Schema.getRootAsSchema(builder.dataBuffer()); - Schema finalSchema = Schema.convertSchema(flatBufSchema); - assertEquals(initialSchema, finalSchema); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/table/BaseTableTest.java b/java/vector/src/test/java/org/apache/arrow/vector/table/BaseTableTest.java deleted file mode 100644 index a07e9fc0352d7..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/table/BaseTableTest.java +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.table; - -import static org.apache.arrow.vector.table.TestUtils.INT_VECTOR_NAME; -import static org.apache.arrow.vector.table.TestUtils.INT_VECTOR_NAME_1; -import static org.apache.arrow.vector.table.TestUtils.INT_VECTOR_NAME_2; -import static org.apache.arrow.vector.table.TestUtils.intPlusVarcharColumns; -import static org.apache.arrow.vector.table.TestUtils.twoIntColumns; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.dictionary.DictionaryEncoder; -import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class BaseTableTest { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @Test - void getReaderByName() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.getReader(INT_VECTOR_NAME_1)); - } - } - - @Test - void getReaderByIndex() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.getReader(0)); - } - } - - @Test - void getReaderByField() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.getReader(t.getField(INT_VECTOR_NAME_1))); - } - } - - @Test - void getSchema() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.getSchema()); - assertEquals(2, t.getSchema().getFields().size()); - } - } - - @Test - void insertVector() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - ArrowType intArrowType = new ArrowType.Int(32, true); - FieldType intFieldType = new FieldType(true, intArrowType, null); - IntVector v3 = new IntVector("3", intFieldType, allocator); - List revisedVectors = t.insertVector(2, v3); - assertEquals(3, revisedVectors.size()); - assertEquals(v3, revisedVectors.get(2)); - } - } - - @Test - void insertVectorFirstPosition() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - ArrowType intArrowType = new ArrowType.Int(32, true); - FieldType intFieldType = new FieldType(true, intArrowType, null); - IntVector v3 = new IntVector("3", intFieldType, allocator); - List revisedVectors = t.insertVector(0, v3); - assertEquals(3, revisedVectors.size()); - assertEquals(v3, revisedVectors.get(0)); - } - } - - @Test - void extractVector() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - List revisedVectors = t.extractVector(0); - assertEquals(2, t.getVectorCount()); // vector not removed from table yet - assertEquals(1, revisedVectors.size()); - } - } - - @Test - void close() { - IntVector v = new IntVector(INT_VECTOR_NAME, allocator); - v.setSafe(0, 132); - List vectors = new ArrayList<>(); - vectors.add(v); - v.setValueCount(1); - try (Table t = new Table(vectors)) { - t.close(); - for (FieldVector fieldVector : t.fieldVectors) { - assertEquals(0, fieldVector.getValueCount()); - } - } - } - - @Test - void getRowCount() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertEquals(2, t.getRowCount()); - } - } - - @Test - void toVectorSchemaRoot() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.getVector(INT_VECTOR_NAME_1)); - assertNotNull(t.getVector(INT_VECTOR_NAME_2)); - VectorSchemaRoot vsr = t.toVectorSchemaRoot(); - assertNotNull(vsr.getVector(INT_VECTOR_NAME_1)); - assertNotNull(vsr.getVector(INT_VECTOR_NAME_2)); - assertEquals( - t.getSchema().findField(INT_VECTOR_NAME_1), vsr.getSchema().findField(INT_VECTOR_NAME_1)); - } - } - - @Test - void getVector() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.getVector(0)); - } - } - - @Test - void testGetVector() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.getVector(INT_VECTOR_NAME_1)); - assertThrows(IllegalArgumentException.class, () -> t.getVector("wrong name")); - } - } - - @Test - void getVectorCopyByIndex() { - List vectorList = twoIntColumns(allocator); - List vectorList2 = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - // compare value by value - for (int vIdx = 0; vIdx < vectorList.size(); vIdx++) { - IntVector original = (IntVector) vectorList2.get(vIdx); - IntVector copy = (IntVector) t.getVectorCopy(vIdx); - assertNotNull(copy); - assertEquals(2, copy.getValueCount()); - assertEquals(0, copy.getNullCount()); - for (int i = 0; i < t.getRowCount(); i++) { - assertEquals(original.getObject(i), copy.getObject(i)); - } - } - assertThrows(IllegalArgumentException.class, () -> t.getVector("wrong name")); - } - } - - @Test - void getVectorCopyByName() { - List vectorList = twoIntColumns(allocator); - List vectorList2 = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.getVectorCopy(INT_VECTOR_NAME_1)); - for (int vIdx = 0; vIdx < vectorList.size(); vIdx++) { - IntVector original = (IntVector) vectorList2.get(vIdx); - IntVector copy = (IntVector) t.getVectorCopy(original.getName()); - assertEquals(2, copy.getValueCount()); - assertEquals(0, copy.getNullCount()); - for (int i = 0; i < t.getRowCount(); i++) { - assertEquals(original.getObject(i), copy.getObject(i)); - } - } - assertThrows(IllegalArgumentException.class, () -> t.getVector("wrong name")); - } - } - - @Test - void immutableCursor() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertNotNull(t.immutableRow()); - } - } - - @Test - void contentToTsvString() { - IntVector v = new IntVector(INT_VECTOR_NAME, allocator); - v.setSafe(0, 1); - v.setSafe(1, 2); - v.setSafe(2, 3); - v.setValueCount(3); - - try (Table t = Table.of(v)) { - assertEquals(3, t.rowCount); - List values = new ArrayList<>(); - for (Row r : t) { - values.add(r.getInt(INT_VECTOR_NAME)); - } - assertEquals(3, values.size()); - List intList = new ArrayList<>(); - intList.add(1); - intList.add(2); - intList.add(3); - assertTrue(values.containsAll(intList)); - String printed = "intCol\n" + "1\n" + "2\n" + "3\n"; - assertEquals(printed, t.contentToTSVString()); - } - } - - @Test - void isDeletedRow() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertFalse(t.isRowDeleted(0)); - assertFalse(t.isRowDeleted(1)); - } - } - - @Test - void testEncode() { - List vectorList = intPlusVarcharColumns(allocator); - VarCharVector original = (VarCharVector) vectorList.get(1); - DictionaryProvider provider = getDictionary(); - try (Table t = new Table(vectorList, vectorList.get(0).getValueCount(), provider)) { - IntVector v = (IntVector) t.encode(original.getName(), 1L); - assertNotNull(v); - assertEquals(0, v.get(0)); - assertEquals(1, v.get(1)); - } - } - - @Test - void testDecode() { - List vectorList = intPlusVarcharColumns(allocator); - VarCharVector original = (VarCharVector) vectorList.get(1); - - VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); - dictionaryVector.allocateNew(2); - dictionaryVector.set(0, "one".getBytes(StandardCharsets.UTF_8)); - dictionaryVector.set(1, "two".getBytes(StandardCharsets.UTF_8)); - dictionaryVector.setValueCount(2); - Dictionary dictionary = - new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); - - DictionaryEncoder encoder = new DictionaryEncoder(dictionary, allocator); - IntVector encoded = (IntVector) encoder.encode(original); - vectorList.remove(original); - vectorList.add(encoded); - DictionaryProvider provider = getDictionary(); - - try (Table t = new Table(vectorList, vectorList.get(0).getValueCount(), provider)) { - VarCharVector v = (VarCharVector) t.decode(encoded.getName(), 1L); - assertNotNull(v); - assertEquals("one", new String(Objects.requireNonNull(v.get(0)), StandardCharsets.UTF_8)); - assertEquals("two", new String(Objects.requireNonNull(v.get(1)), StandardCharsets.UTF_8)); - } - } - - @Test - void getProvider() { - List vectorList = intPlusVarcharColumns(allocator); - DictionaryProvider provider = getDictionary(); - try (Table t = new Table(vectorList, vectorList.get(0).getValueCount(), provider)) { - assertEquals(provider, t.getDictionaryProvider()); - } - } - - private DictionaryProvider getDictionary() { - - DictionaryProvider.MapDictionaryProvider provider = - new DictionaryProvider.MapDictionaryProvider(); - DictionaryEncoding encoding = new DictionaryEncoding(1L, false, null); - - VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); - dictionaryVector.allocateNew(2); - dictionaryVector.set(0, "one".getBytes(StandardCharsets.UTF_8)); - dictionaryVector.set(1, "two".getBytes(StandardCharsets.UTF_8)); - dictionaryVector.setValueCount(2); - - Dictionary dictionary = new Dictionary(dictionaryVector, encoding); - provider.put(dictionary); - return provider; - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/table/RowTest.java b/java/vector/src/test/java/org/apache/arrow/vector/table/RowTest.java deleted file mode 100644 index c1125d407e145..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/table/RowTest.java +++ /dev/null @@ -1,854 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.table; - -import static org.apache.arrow.vector.table.TestUtils.BIGINT_INT_MAP_VECTOR_NAME; -import static org.apache.arrow.vector.table.TestUtils.FIXEDBINARY_VECTOR_NAME_1; -import static org.apache.arrow.vector.table.TestUtils.INT_LIST_VECTOR_NAME; -import static org.apache.arrow.vector.table.TestUtils.INT_VECTOR_NAME_1; -import static org.apache.arrow.vector.table.TestUtils.STRUCT_VECTOR_NAME; -import static org.apache.arrow.vector.table.TestUtils.UNION_VECTOR_NAME; -import static org.apache.arrow.vector.table.TestUtils.VARBINARY_VECTOR_NAME_1; -import static org.apache.arrow.vector.table.TestUtils.VARCHAR_VECTOR_NAME_1; -import static org.apache.arrow.vector.table.TestUtils.fixedWidthVectors; -import static org.apache.arrow.vector.table.TestUtils.intPlusFixedBinaryColumns; -import static org.apache.arrow.vector.table.TestUtils.intPlusLargeVarBinaryColumns; -import static org.apache.arrow.vector.table.TestUtils.intPlusLargeVarcharColumns; -import static org.apache.arrow.vector.table.TestUtils.intPlusVarBinaryColumns; -import static org.apache.arrow.vector.table.TestUtils.intPlusVarcharColumns; -import static org.apache.arrow.vector.table.TestUtils.simpleDenseUnionVector; -import static org.apache.arrow.vector.table.TestUtils.simpleListVector; -import static org.apache.arrow.vector.table.TestUtils.simpleMapVector; -import static org.apache.arrow.vector.table.TestUtils.simpleStructVector; -import static org.apache.arrow.vector.table.TestUtils.simpleUnionVector; -import static org.apache.arrow.vector.table.TestUtils.timezoneTemporalVectors; -import static org.apache.arrow.vector.table.TestUtils.twoIntColumns; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.time.Duration; -import java.time.LocalDateTime; -import java.time.Period; -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.PeriodDuration; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableBitHolder; -import org.apache.arrow.vector.holders.NullableDecimalHolder; -import org.apache.arrow.vector.holders.NullableDurationHolder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableIntervalDayHolder; -import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder; -import org.apache.arrow.vector.holders.NullableIntervalYearHolder; -import org.apache.arrow.vector.holders.NullableSmallIntHolder; -import org.apache.arrow.vector.holders.NullableTimeMicroHolder; -import org.apache.arrow.vector.holders.NullableTimeMilliHolder; -import org.apache.arrow.vector.holders.NullableTimeNanoHolder; -import org.apache.arrow.vector.holders.NullableTimeSecHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMicroTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder; -import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder; -import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder; -import org.apache.arrow.vector.holders.NullableTimeStampSecHolder; -import org.apache.arrow.vector.holders.NullableTimeStampSecTZHolder; -import org.apache.arrow.vector.holders.NullableTinyIntHolder; -import org.apache.arrow.vector.holders.NullableUInt1Holder; -import org.apache.arrow.vector.holders.NullableUInt2Holder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.holders.NullableUInt8Holder; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.TestExtensionType; -import org.apache.arrow.vector.util.JsonStringHashMap; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class RowTest { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() { - allocator.close(); - } - - @Test - void constructor() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - assertEquals(StandardCharsets.UTF_8, c.getDefaultCharacterSet()); - } - } - - @Test - void at() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - assertEquals(c.getRowNumber(), -1); - c.setPosition(1); - assertEquals(c.getRowNumber(), 1); - } - } - - @Test - void getIntByVectorIndex() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(2, c.getInt(0)); - } - } - - @Test - void getIntByVectorName() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(2, c.getInt(INT_VECTOR_NAME_1)); - } - } - - @Test - void testNameNotFound() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertThrows(IllegalArgumentException.class, () -> c.getVarCharObj("wrong name")); - } - } - - @Test - void testWrongType() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertThrows(ClassCastException.class, () -> c.getVarCharObj(INT_VECTOR_NAME_1)); - } - } - - @Test - void getDecimal() { - List vectors = new ArrayList<>(); - DecimalVector decimalVector = new DecimalVector("decimal_vector", allocator, 55, 10); - vectors.add(decimalVector); - decimalVector.setSafe(0, new BigDecimal("0.0543278923")); - decimalVector.setSafe(1, new BigDecimal("2.0543278923")); - decimalVector.setValueCount(2); - BigDecimal one = decimalVector.getObject(1); - - NullableDecimalHolder holder1 = new NullableDecimalHolder(); - NullableDecimalHolder holder2 = new NullableDecimalHolder(); - try (Table t = new Table(vectors)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(one, c.getDecimalObj("decimal_vector")); - assertEquals(one, c.getDecimalObj(0)); - c.getDecimal(0, holder1); - c.getDecimal("decimal_vector", holder2); - assertEquals(holder1.buffer, holder2.buffer); - assertEquals(c.getDecimal(0).memoryAddress(), c.getDecimal("decimal_vector").memoryAddress()); - } - } - - @Test - void getDuration() { - List vectors = new ArrayList<>(); - TimeUnit unit = TimeUnit.SECOND; - final FieldType fieldType = FieldType.nullable(new ArrowType.Duration(unit)); - - DurationVector durationVector = new DurationVector("duration_vector", fieldType, allocator); - NullableDurationHolder holder1 = new NullableDurationHolder(); - NullableDurationHolder holder2 = new NullableDurationHolder(); - - holder1.value = 100; - holder1.unit = TimeUnit.SECOND; - holder1.isSet = 1; - holder2.value = 200; - holder2.unit = TimeUnit.SECOND; - holder2.isSet = 1; - - vectors.add(durationVector); - durationVector.setSafe(0, holder1); - durationVector.setSafe(1, holder2); - durationVector.setValueCount(2); - - Duration one = durationVector.getObject(1); - try (Table t = new Table(vectors)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(one, c.getDurationObj("duration_vector")); - assertEquals(one, c.getDurationObj(0)); - c.getDuration(0, holder1); - c.getDuration("duration_vector", holder2); - assertEquals(holder1.value, holder2.value); - ArrowBuf durationBuf1 = c.getDuration(0); - ArrowBuf durationBuf2 = c.getDuration("duration_vector"); - assertEquals(durationBuf1.memoryAddress(), durationBuf2.memoryAddress()); - } - } - - @Test - void getIntervalDay() { - List vectors = new ArrayList<>(); - IntervalUnit unit = IntervalUnit.DAY_TIME; - final FieldType fieldType = FieldType.nullable(new ArrowType.Interval(unit)); - - IntervalDayVector intervalDayVector = - new IntervalDayVector("intervalDay_vector", fieldType, allocator); - NullableIntervalDayHolder holder1 = new NullableIntervalDayHolder(); - NullableIntervalDayHolder holder2 = new NullableIntervalDayHolder(); - - holder1.days = 100; - holder1.milliseconds = 1000; - holder1.isSet = 1; - holder2.days = 200; - holder2.milliseconds = 2000; - holder2.isSet = 1; - - vectors.add(intervalDayVector); - intervalDayVector.setSafe(0, holder1); - intervalDayVector.setSafe(1, holder2); - intervalDayVector.setValueCount(2); - - Duration one = intervalDayVector.getObject(1); - try (Table t = new Table(vectors)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(one, c.getIntervalDayObj("intervalDay_vector")); - assertEquals(one, c.getIntervalDayObj(0)); - c.getIntervalDay(0, holder1); - c.getIntervalDay("intervalDay_vector", holder2); - assertEquals(holder1.days, holder2.days); - assertEquals(holder1.milliseconds, holder2.milliseconds); - ArrowBuf intDayBuf1 = c.getIntervalDay(0); - ArrowBuf intDayBuf2 = c.getIntervalDay("intervalDay_vector"); - assertEquals(intDayBuf1.memoryAddress(), intDayBuf2.memoryAddress()); - } - } - - @Test - void getIntervalMonth() { - List vectors = new ArrayList<>(); - IntervalUnit unit = IntervalUnit.MONTH_DAY_NANO; - final FieldType fieldType = FieldType.nullable(new ArrowType.Interval(unit)); - - IntervalMonthDayNanoVector intervalMonthVector = - new IntervalMonthDayNanoVector("intervalMonth_vector", fieldType, allocator); - NullableIntervalMonthDayNanoHolder holder1 = new NullableIntervalMonthDayNanoHolder(); - NullableIntervalMonthDayNanoHolder holder2 = new NullableIntervalMonthDayNanoHolder(); - - holder1.days = 1; - holder1.months = 10; - holder1.isSet = 1; - holder2.days = 2; - holder2.months = 20; - holder2.isSet = 1; - - vectors.add(intervalMonthVector); - intervalMonthVector.setSafe(0, holder1); - intervalMonthVector.setSafe(1, holder2); - intervalMonthVector.setValueCount(2); - - PeriodDuration one = intervalMonthVector.getObject(1); - try (Table t = new Table(vectors)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(one, c.getIntervalMonthDayNanoObj("intervalMonth_vector")); - assertEquals(one, c.getIntervalMonthDayNanoObj(0)); - c.getIntervalMonthDayNano(0, holder1); - c.getIntervalMonthDayNano("intervalMonth_vector", holder2); - assertEquals(holder1.days, holder2.days); - assertEquals(holder1.months, holder2.months); - ArrowBuf intMonthBuf1 = c.getIntervalMonthDayNano(0); - ArrowBuf intMonthBuf2 = c.getIntervalMonthDayNano("intervalMonth_vector"); - assertEquals(intMonthBuf1.memoryAddress(), intMonthBuf2.memoryAddress()); - } - } - - @Test - void getIntervalYear() { - List vectors = new ArrayList<>(); - IntervalUnit unit = IntervalUnit.YEAR_MONTH; - final FieldType fieldType = FieldType.nullable(new ArrowType.Interval(unit)); - - IntervalYearVector intervalYearVector = - new IntervalYearVector("intervalYear_vector", fieldType, allocator); - NullableIntervalYearHolder holder1 = new NullableIntervalYearHolder(); - NullableIntervalYearHolder holder2 = new NullableIntervalYearHolder(); - - holder1.value = 1; - holder1.isSet = 1; - holder2.value = 2; - holder2.isSet = 1; - - vectors.add(intervalYearVector); - intervalYearVector.setSafe(0, holder1); - intervalYearVector.setSafe(1, holder2); - intervalYearVector.setValueCount(2); - - Period one = intervalYearVector.getObject(1); - try (Table t = new Table(vectors)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(one, c.getIntervalYearObj("intervalYear_vector")); - assertEquals(one, c.getIntervalYearObj(0)); - c.getIntervalYear(0, holder1); - c.getIntervalYear("intervalYear_vector", holder2); - assertEquals(holder1.value, holder2.value); - int intYear1 = c.getIntervalYear(0); - int intYear2 = c.getIntervalYear("intervalYear_vector"); - assertEquals(2, intYear1); - assertEquals(intYear1, intYear2); - } - } - - @Test - void getBit() { - List vectors = new ArrayList<>(); - - BitVector bitVector = new BitVector("bit_vector", allocator); - NullableBitHolder holder1 = new NullableBitHolder(); - NullableBitHolder holder2 = new NullableBitHolder(); - - vectors.add(bitVector); - bitVector.setSafe(0, 0); - bitVector.setSafe(1, 1); - bitVector.setValueCount(2); - - int one = bitVector.get(1); - try (Table t = new Table(vectors)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(one, c.getBit("bit_vector")); - assertEquals(one, c.getBit(0)); - c.getBit(0, holder1); - c.getBit("bit_vector", holder2); - assertEquals(holder1.value, holder2.value); - } - } - - @Test - void hasNext() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - assertTrue(c.hasNext()); - c.setPosition(1); - assertFalse(c.hasNext()); - } - } - - @Test - void next() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(0); - c.next(); - assertEquals(1, c.getRowNumber()); - } - } - - @Test - void isNull() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertFalse(c.isNull(0)); - } - } - - @Test - void isNullByFieldName() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertFalse(c.isNull(INT_VECTOR_NAME_1)); - } - } - - @Test - void fixedWidthVectorTest() { - List vectorList = fixedWidthVectors(allocator, 2); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - // integer tests using vector name and index - assertFalse(c.isNull("bigInt_vector")); - assertEquals(c.getInt("int_vector"), c.getInt(0)); - assertEquals(c.getBigInt("bigInt_vector"), c.getBigInt(1)); - assertEquals(c.getSmallInt("smallInt_vector"), c.getSmallInt(2)); - assertEquals(c.getTinyInt("tinyInt_vector"), c.getTinyInt(3)); - - // integer tests using Nullable Holders - NullableIntHolder int4Holder = new NullableIntHolder(); - NullableTinyIntHolder int1Holder = new NullableTinyIntHolder(); - NullableSmallIntHolder int2Holder = new NullableSmallIntHolder(); - NullableBigIntHolder int8Holder = new NullableBigIntHolder(); - c.getInt(0, int4Holder); - c.getBigInt(1, int8Holder); - c.getSmallInt(2, int2Holder); - c.getTinyInt(3, int1Holder); - assertEquals(c.getInt("int_vector"), int4Holder.value); - assertEquals(c.getBigInt("bigInt_vector"), int8Holder.value); - assertEquals(c.getSmallInt("smallInt_vector"), int2Holder.value); - assertEquals(c.getTinyInt("tinyInt_vector"), int1Holder.value); - - c.getInt("int_vector", int4Holder); - c.getBigInt("bigInt_vector", int8Holder); - c.getSmallInt("smallInt_vector", int2Holder); - c.getTinyInt("tinyInt_vector", int1Holder); - assertEquals(c.getInt("int_vector"), int4Holder.value); - assertEquals(c.getBigInt("bigInt_vector"), int8Holder.value); - assertEquals(c.getSmallInt("smallInt_vector"), int2Holder.value); - assertEquals(c.getTinyInt("tinyInt_vector"), int1Holder.value); - - // uint tests using vector name and index - assertEquals(c.getUInt1("uInt1_vector"), c.getUInt1(4)); - assertEquals(c.getUInt2("uInt2_vector"), c.getUInt2(5)); - assertEquals(c.getUInt4("uInt4_vector"), c.getUInt4(6)); - assertEquals(c.getUInt8("uInt8_vector"), c.getUInt8(7)); - - // UInt tests using Nullable Holders - NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - NullableUInt1Holder uInt1Holder = new NullableUInt1Holder(); - NullableUInt2Holder uInt2Holder = new NullableUInt2Holder(); - NullableUInt8Holder uInt8Holder = new NullableUInt8Holder(); - // fill the holders using vector index and test - c.getUInt1(4, uInt1Holder); - c.getUInt2(5, uInt2Holder); - c.getUInt4(6, uInt4Holder); - c.getUInt8(7, uInt8Holder); - assertEquals(c.getUInt1("uInt1_vector"), uInt1Holder.value); - assertEquals(c.getUInt2("uInt2_vector"), uInt2Holder.value); - assertEquals(c.getUInt4("uInt4_vector"), uInt4Holder.value); - assertEquals(c.getUInt8("uInt8_vector"), uInt8Holder.value); - - // refill the holders using vector name and retest - c.getUInt1("uInt1_vector", uInt1Holder); - c.getUInt2("uInt2_vector", uInt2Holder); - c.getUInt4("uInt4_vector", uInt4Holder); - c.getUInt8("uInt8_vector", uInt8Holder); - assertEquals(c.getUInt1("uInt1_vector"), uInt1Holder.value); - assertEquals(c.getUInt2("uInt2_vector"), uInt2Holder.value); - assertEquals(c.getUInt4("uInt4_vector"), uInt4Holder.value); - assertEquals(c.getUInt8("uInt8_vector"), uInt8Holder.value); - - // tests floating point - assertEquals(c.getFloat4("float4_vector"), c.getFloat4(8)); - assertEquals(c.getFloat8("float8_vector"), c.getFloat8(9)); - - // floating point tests using Nullable Holders - NullableFloat4Holder float4Holder = new NullableFloat4Holder(); - NullableFloat8Holder float8Holder = new NullableFloat8Holder(); - // fill the holders using vector index and test - c.getFloat4(8, float4Holder); - c.getFloat8(9, float8Holder); - assertEquals(c.getFloat4("float4_vector"), float4Holder.value); - assertEquals(c.getFloat8("float8_vector"), float8Holder.value); - - // refill the holders using vector name and retest - c.getFloat4("float4_vector", float4Holder); - c.getFloat8("float8_vector", float8Holder); - assertEquals(c.getFloat4("float4_vector"), float4Holder.value); - assertEquals(c.getFloat8("float8_vector"), float8Holder.value); - - // test time values using vector name versus vector index - assertEquals(c.getTimeSec("timeSec_vector"), c.getTimeSec(10)); - assertEquals(c.getTimeMilli("timeMilli_vector"), c.getTimeMilli(11)); - assertEquals(c.getTimeMicro("timeMicro_vector"), c.getTimeMicro(12)); - assertEquals(c.getTimeNano("timeNano_vector"), c.getTimeNano(13)); - - // time tests using Nullable Holders - NullableTimeSecHolder timeSecHolder = new NullableTimeSecHolder(); - NullableTimeMilliHolder timeMilliHolder = new NullableTimeMilliHolder(); - NullableTimeMicroHolder timeMicroHolder = new NullableTimeMicroHolder(); - NullableTimeNanoHolder timeNanoHolder = new NullableTimeNanoHolder(); - // fill the holders using vector index and test - c.getTimeSec(10, timeSecHolder); - c.getTimeMilli(11, timeMilliHolder); - c.getTimeMicro(12, timeMicroHolder); - c.getTimeNano(13, timeNanoHolder); - assertEquals(c.getTimeSec("timeSec_vector"), timeSecHolder.value); - assertEquals(c.getTimeMilli("timeMilli_vector"), timeMilliHolder.value); - assertEquals(c.getTimeMicro("timeMicro_vector"), timeMicroHolder.value); - assertEquals(c.getTimeNano("timeNano_vector"), timeNanoHolder.value); - - LocalDateTime milliDT = c.getTimeMilliObj(11); - assertNotNull(milliDT); - assertEquals(milliDT, c.getTimeMilliObj("timeMilli_vector")); - - // refill the holders using vector name and retest - c.getTimeSec("timeSec_vector", timeSecHolder); - c.getTimeMilli("timeMilli_vector", timeMilliHolder); - c.getTimeMicro("timeMicro_vector", timeMicroHolder); - c.getTimeNano("timeNano_vector", timeNanoHolder); - assertEquals(c.getTimeSec("timeSec_vector"), timeSecHolder.value); - assertEquals(c.getTimeMilli("timeMilli_vector"), timeMilliHolder.value); - assertEquals(c.getTimeMicro("timeMicro_vector"), timeMicroHolder.value); - assertEquals(c.getTimeNano("timeNano_vector"), timeNanoHolder.value); - - assertEquals(c.getTimeStampSec("timeStampSec_vector"), c.getTimeStampSec(14)); - assertEquals(c.getTimeStampMilli("timeStampMilli_vector"), c.getTimeStampMilli(15)); - assertEquals(c.getTimeStampMicro("timeStampMicro_vector"), c.getTimeStampMicro(16)); - assertEquals(c.getTimeStampNano("timeStampNano_vector"), c.getTimeStampNano(17)); - - // time stamp tests using Nullable Holders - NullableTimeStampSecHolder timeStampSecHolder = new NullableTimeStampSecHolder(); - NullableTimeStampMilliHolder timeStampMilliHolder = new NullableTimeStampMilliHolder(); - NullableTimeStampMicroHolder timeStampMicroHolder = new NullableTimeStampMicroHolder(); - NullableTimeStampNanoHolder timeStampNanoHolder = new NullableTimeStampNanoHolder(); - // fill the holders using vector index and test - c.getTimeStampSec(14, timeStampSecHolder); - c.getTimeStampMilli(15, timeStampMilliHolder); - c.getTimeStampMicro(16, timeStampMicroHolder); - c.getTimeStampNano(17, timeStampNanoHolder); - assertEquals(c.getTimeStampSec("timeStampSec_vector"), timeStampSecHolder.value); - assertEquals(c.getTimeStampMilli("timeStampMilli_vector"), timeStampMilliHolder.value); - assertEquals(c.getTimeStampMicro("timeStampMicro_vector"), timeStampMicroHolder.value); - assertEquals(c.getTimeStampNano("timeStampNano_vector"), timeStampNanoHolder.value); - - LocalDateTime secDT = c.getTimeStampSecObj(14); - assertNotNull(secDT); - assertEquals(secDT, c.getTimeStampSecObj("timeStampSec_vector")); - - LocalDateTime milliDT1 = c.getTimeStampMilliObj(15); - assertNotNull(milliDT1); - assertEquals(milliDT1, c.getTimeStampMilliObj("timeStampMilli_vector")); - - LocalDateTime microDT = c.getTimeStampMicroObj(16); - assertNotNull(microDT); - assertEquals(microDT, c.getTimeStampMicroObj("timeStampMicro_vector")); - - LocalDateTime nanoDT = c.getTimeStampNanoObj(17); - assertNotNull(nanoDT); - assertEquals(nanoDT, c.getTimeStampNanoObj("timeStampNano_vector")); - - // refill the holders using vector name and retest - c.getTimeStampSec("timeStampSec_vector", timeStampSecHolder); - c.getTimeStampMilli("timeStampMilli_vector", timeStampMilliHolder); - c.getTimeStampMicro("timeStampMicro_vector", timeStampMicroHolder); - c.getTimeStampNano("timeStampNano_vector", timeStampNanoHolder); - assertEquals(c.getTimeStampSec("timeStampSec_vector"), timeStampSecHolder.value); - assertEquals(c.getTimeStampMilli("timeStampMilli_vector"), timeStampMilliHolder.value); - assertEquals(c.getTimeStampMicro("timeStampMicro_vector"), timeStampMicroHolder.value); - assertEquals(c.getTimeStampNano("timeStampNano_vector"), timeStampNanoHolder.value); - } - } - - @Test - void timestampsWithTimezones() { - List vectorList = timezoneTemporalVectors(allocator, 2); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - - assertEquals(c.getTimeStampSecTZ("timeStampSecTZ_vector"), c.getTimeStampSecTZ(0)); - assertEquals(c.getTimeStampMilliTZ("timeStampMilliTZ_vector"), c.getTimeStampMilliTZ(1)); - assertEquals(c.getTimeStampMicroTZ("timeStampMicroTZ_vector"), c.getTimeStampMicroTZ(2)); - assertEquals(c.getTimeStampNanoTZ("timeStampNanoTZ_vector"), c.getTimeStampNanoTZ(3)); - - // time stamp tests using Nullable Holders - NullableTimeStampSecTZHolder timeStampSecHolder = new NullableTimeStampSecTZHolder(); - NullableTimeStampMilliTZHolder timeStampMilliHolder = new NullableTimeStampMilliTZHolder(); - NullableTimeStampMicroTZHolder timeStampMicroHolder = new NullableTimeStampMicroTZHolder(); - NullableTimeStampNanoTZHolder timeStampNanoHolder = new NullableTimeStampNanoTZHolder(); - - // fill the holders using vector index and test - c.getTimeStampSecTZ(0, timeStampSecHolder); - c.getTimeStampMilliTZ(1, timeStampMilliHolder); - c.getTimeStampMicroTZ(2, timeStampMicroHolder); - c.getTimeStampNanoTZ(3, timeStampNanoHolder); - - long tsSec = timeStampSecHolder.value; - long tsMil = timeStampMilliHolder.value; - long tsMic = timeStampMicroHolder.value; - long tsNan = timeStampNanoHolder.value; - - assertEquals(c.getTimeStampSecTZ("timeStampSecTZ_vector"), timeStampSecHolder.value); - assertEquals(c.getTimeStampMilliTZ("timeStampMilliTZ_vector"), timeStampMilliHolder.value); - assertEquals(c.getTimeStampMicroTZ("timeStampMicroTZ_vector"), timeStampMicroHolder.value); - assertEquals(c.getTimeStampNanoTZ("timeStampNanoTZ_vector"), timeStampNanoHolder.value); - - // fill the holders using vector index and test - c.getTimeStampSecTZ("timeStampSecTZ_vector", timeStampSecHolder); - c.getTimeStampMilliTZ("timeStampMilliTZ_vector", timeStampMilliHolder); - c.getTimeStampMicroTZ("timeStampMicroTZ_vector", timeStampMicroHolder); - c.getTimeStampNanoTZ("timeStampNanoTZ_vector", timeStampNanoHolder); - - assertEquals(tsSec, timeStampSecHolder.value); - assertEquals(tsMil, timeStampMilliHolder.value); - assertEquals(tsMic, timeStampMicroHolder.value); - assertEquals(tsNan, timeStampNanoHolder.value); - } - } - - @Test - void getVarChar() { - List vectorList = intPlusVarcharColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(c.getVarCharObj(1), "two"); - assertEquals(c.getVarCharObj(1), c.getVarCharObj(VARCHAR_VECTOR_NAME_1)); - assertArrayEquals( - "two".getBytes(StandardCharsets.UTF_8), c.getVarChar(VARCHAR_VECTOR_NAME_1)); - assertArrayEquals("two".getBytes(StandardCharsets.UTF_8), c.getVarChar(1)); - } - } - - @Test - void getVarBinary() { - List vectorList = intPlusVarBinaryColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertArrayEquals(c.getVarBinary(1), "two".getBytes(StandardCharsets.UTF_8)); - assertArrayEquals(c.getVarBinary(1), c.getVarBinary(VARBINARY_VECTOR_NAME_1)); - } - } - - @Test - void getLargeVarBinary() { - List vectorList = intPlusLargeVarBinaryColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertArrayEquals(c.getLargeVarBinary(1), "two".getBytes(StandardCharsets.UTF_8)); - assertArrayEquals(c.getLargeVarBinary(1), c.getLargeVarBinary(VARBINARY_VECTOR_NAME_1)); - } - } - - @Test - void getLargeVarChar() { - List vectorList = intPlusLargeVarcharColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertEquals(c.getLargeVarCharObj(1), "two"); - assertEquals(c.getLargeVarCharObj(1), c.getLargeVarCharObj(VARCHAR_VECTOR_NAME_1)); - assertArrayEquals( - "two".getBytes(StandardCharsets.UTF_8), c.getLargeVarChar(VARCHAR_VECTOR_NAME_1)); - assertArrayEquals("two".getBytes(StandardCharsets.UTF_8), c.getLargeVarChar(1)); - } - } - - @Test - void getFixedBinary() { - List vectorList = intPlusFixedBinaryColumns(allocator); - try (Table t = new Table(vectorList)) { - Row c = t.immutableRow(); - c.setPosition(1); - assertArrayEquals(c.getFixedSizeBinary(1), "two".getBytes(StandardCharsets.UTF_8)); - assertArrayEquals(c.getFixedSizeBinary(1), c.getFixedSizeBinary(FIXEDBINARY_VECTOR_NAME_1)); - } - } - - @Test - void testSimpleListVector1() { - try (ListVector listVector = simpleListVector(allocator); - VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.of(listVector); - Table table = new Table(vectorSchemaRoot)) { - for (Row c : table) { - @SuppressWarnings("unchecked") - List list = (List) c.getList(INT_LIST_VECTOR_NAME); - assertEquals(10, list.size()); - } - } - } - - @Test - void testSimpleListVector2() { - try (ListVector listVector = simpleListVector(allocator); - VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.of(listVector); - Table table = new Table(vectorSchemaRoot)) { - for (Row c : table) { - @SuppressWarnings("unchecked") - List list = (List) c.getList(0); - assertEquals(10, list.size()); - } - } - } - - @Test - void testSimpleStructVector1() { - try (StructVector structVector = simpleStructVector(allocator); - VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.of(structVector); - Table table = new Table(vectorSchemaRoot)) { - for (Row c : table) { - @SuppressWarnings("unchecked") - JsonStringHashMap struct = - (JsonStringHashMap) c.getStruct(STRUCT_VECTOR_NAME); - @SuppressWarnings("unchecked") - JsonStringHashMap struct1 = (JsonStringHashMap) c.getStruct(0); - int a = (int) struct.get("struct_int_child"); - double b = (double) struct.get("struct_flt_child"); - int a1 = (int) struct1.get("struct_int_child"); - double b1 = (double) struct1.get("struct_flt_child"); - assertNotNull(struct); - assertEquals(a, a1); - assertEquals(b, b1); - assertTrue(a >= 0); - assertTrue(b <= a, String.format("a = %s and b = %s", a, b)); - } - } - } - - @Test - void testSimpleUnionVector() { - try (UnionVector unionVector = simpleUnionVector(allocator); - VectorSchemaRoot vsr = VectorSchemaRoot.of(unionVector); - Table table = new Table(vsr)) { - Row c = table.immutableRow(); - c.setPosition(0); - Object object0 = c.getUnion(UNION_VECTOR_NAME); - Object object1 = c.getUnion(0); - assertEquals(object0, object1); - c.setPosition(1); - assertNull(c.getUnion(UNION_VECTOR_NAME)); - c.setPosition(2); - Object object2 = c.getUnion(UNION_VECTOR_NAME); - assertEquals(100, object0); - assertEquals(100, object2); - } - } - - @Test - void testSimpleDenseUnionVector() { - try (DenseUnionVector unionVector = simpleDenseUnionVector(allocator); - VectorSchemaRoot vsr = VectorSchemaRoot.of(unionVector); - Table table = new Table(vsr)) { - Row c = table.immutableRow(); - c.setPosition(0); - Object object0 = c.getDenseUnion(UNION_VECTOR_NAME); - Object object1 = c.getDenseUnion(0); - assertEquals(object0, object1); - c.setPosition(1); - assertNull(c.getDenseUnion(UNION_VECTOR_NAME)); - c.setPosition(2); - Object object2 = c.getDenseUnion(UNION_VECTOR_NAME); - assertEquals(100, object0); - assertEquals(100, object2); - } - } - - @Test - void testExtensionTypeVector() { - TestExtensionType.LocationVector vector = - new TestExtensionType.LocationVector("location", allocator); - vector.allocateNew(); - vector.set(0, 34.073814f, -118.240784f); - vector.setValueCount(1); - - try (VectorSchemaRoot vsr = VectorSchemaRoot.of(vector); - Table table = new Table(vsr)) { - Row c = table.immutableRow(); - c.setPosition(0); - Object object0 = c.getExtensionType("location"); - Object object1 = c.getExtensionType(0); - assertEquals(object0, object1); - @SuppressWarnings("unchecked") - JsonStringHashMap struct0 = (JsonStringHashMap) object0; - assertEquals(34.073814f, struct0.get("Latitude")); - } - } - - @Test - void testSimpleMapVector1() { - try (MapVector mapVector = simpleMapVector(allocator); - Table table = Table.of(mapVector)) { - - int i = 1; - for (Row c : table) { - @SuppressWarnings("unchecked") - List> list = - (List>) c.getMap(BIGINT_INT_MAP_VECTOR_NAME); - @SuppressWarnings("unchecked") - List> list1 = (List>) c.getMap(0); - for (int j = 0; j < list1.size(); j++) { - assertEquals(list.get(j), list1.get(j)); - } - if (list != null && !list.isEmpty()) { - assertEquals(i, list.size()); - for (JsonStringHashMap sv : list) { - assertEquals(2, sv.size()); - Long o1 = (Long) sv.get("key"); - Integer o2 = (Integer) sv.get("value"); - assertEquals(o1, o2.longValue()); - } - } - i++; - } - } - } - - @Test - void resetPosition() { - try (ListVector listVector = simpleListVector(allocator); - VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.of(listVector); - Table table = new Table(vectorSchemaRoot)) { - Row row = table.immutableRow(); - row.next(); - assertEquals(0, row.rowNumber); - row.resetPosition(); - assertEquals(-1, row.rowNumber); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/table/TableTest.java b/java/vector/src/test/java/org/apache/arrow/vector/table/TableTest.java deleted file mode 100644 index 0be2bf9a17ee5..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/table/TableTest.java +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.table; - -import static org.apache.arrow.vector.table.TestUtils.INT_VECTOR_NAME_1; -import static org.apache.arrow.vector.table.TestUtils.twoIntColumns; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -class TableTest { - - private final ArrowType intArrowType = new ArrowType.Int(32, true); - private final FieldType intFieldType = new FieldType(true, intArrowType, null); - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @Test - void of() { - List vectorList = twoIntColumns(allocator); - try (Table t = Table.of(vectorList.toArray(new FieldVector[2]))) { - Row c = t.immutableRow(); - assertEquals(2, t.getRowCount()); - assertEquals(2, t.getVectorCount()); - IntVector intVector1 = (IntVector) vectorList.get(0); - assertEquals(INT_VECTOR_NAME_1, intVector1.getName()); - c.setPosition(0); - - // Now test changes to the first vector - // first Table value is 1 - assertEquals(1, c.getInt(INT_VECTOR_NAME_1)); - - // original vector is updated to set first value to 44 - intVector1.setSafe(0, 44); - assertEquals(44, intVector1.get(0)); - - // first Table value is still 1 for the zeroth vector - assertEquals(1, c.getInt(0)); - } - } - - @Test - void constructor() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList, 2)) { - assertEquals(2, t.getRowCount()); - assertEquals(2, t.getVectorCount()); - Row c = t.immutableRow(); - IntVector intVector1 = (IntVector) vectorList.get(0); - c.setPosition(0); - - // Now test changes to the first vector - // first Table value is 1 - assertEquals(1, c.getInt(INT_VECTOR_NAME_1)); - - // original vector is updated to set first value to 44 - intVector1.setSafe(0, 44); - assertEquals(44, intVector1.get(0)); - assertEquals(44, ((IntVector) vectorList.get(0)).get(0)); - - // first Table value is still 1 for the zeroth vector - assertEquals(1, c.getInt(INT_VECTOR_NAME_1)); - } - } - - /** - * Tests construction with an iterable that's not a list (there is a specialty constructor for - * Lists). - */ - @Test - void constructor2() { - List vectorList = twoIntColumns(allocator); - Iterable iterable = new HashSet<>(vectorList); - try (Table t = new Table(iterable)) { - assertEquals(2, t.getRowCount()); - assertEquals(2, t.getVectorCount()); - Row c = t.immutableRow(); - IntVector intVector1 = (IntVector) vectorList.get(0); - c.setPosition(0); - - // Now test changes to the first vector - // first Table value is 1 - assertEquals(1, c.getInt(INT_VECTOR_NAME_1)); - - // original vector is updated to set first value to 44 - intVector1.setSafe(0, 44); - assertEquals(44, intVector1.get(0)); - assertEquals(44, ((IntVector) vectorList.get(0)).get(0)); - - // first Table value is still 1 for the zeroth vector - assertEquals(1, c.getInt(INT_VECTOR_NAME_1)); - } - } - - @Test - void copy() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - assertEquals(2, t.getVectorCount()); - try (Table copy = t.copy()) { - for (FieldVector v : t.fieldVectors) { - FieldVector vCopy = copy.getVector(v.getName()); - assertNotNull(vCopy); - assertEquals(v.getValueCount(), vCopy.getValueCount()); - for (int i = 0; i < v.getValueCount(); i++) { - Integer vValue = ((IntVector) v).getObject(i); - Integer vCopyValue = ((IntVector) vCopy).getObject(i); - assertEquals(vValue, vCopyValue); - } - } - } - } - } - - @Test - void addVector() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - IntVector v3 = new IntVector("3", intFieldType, allocator); - Table t2 = t.addVector(2, v3); - assertEquals(3, t2.fieldVectors.size()); - assertTrue(t2.getVector("3").isNull(0)); - assertTrue(t2.getVector("3").isNull(1)); - t2.close(); - } - } - - @Test - void removeVector() { - List vectorList = twoIntColumns(allocator); - IntVector v2 = (IntVector) vectorList.get(1); - int val1 = v2.get(0); - int val2 = v2.get(1); - try (Table t = new Table(vectorList)) { - - Table t2 = t.removeVector(0); - assertEquals(1, t2.fieldVectors.size()); - assertEquals(val1, ((IntVector) t2.getVector(0)).get(0)); - assertEquals(val2, ((IntVector) t2.getVector(0)).get(1)); - } - } - - /** Tests table iterator in enhanced for loop. */ - @Test - void iterator1() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Iterator iterator = t.iterator(); - assertNotNull(iterator); - assertTrue(iterator.hasNext()); - int sum = 0; - for (Row row : t) { - sum += row.getInt(0); - } - assertEquals(3, sum); - } - } - - /** Tests explicit iterator. */ - @SuppressWarnings("WhileLoopReplaceableByForEach") - @Test - void iterator2() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Iterator iterator = t.iterator(); - assertNotNull(iterator); - assertTrue(iterator.hasNext()); - int sum = 0; - Iterator it = t.iterator(); - while (it.hasNext()) { - Row row = it.next(); - sum += row.getInt(0); - } - assertEquals(3, sum); - } - } - - /** - * Tests a slice operation where no length is provided, so the range extends to the end of the - * table. - */ - @Test - void sliceToEnd() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Table slice = t.slice(1); - assertEquals(1, slice.rowCount); - assertEquals(2, t.rowCount); // memory is copied for slice, not transferred - slice.close(); - } - } - - /** Tests a slice operation with a given length parameter. */ - @Test - void sliceRange() { - List vectorList = twoIntColumns(allocator); - try (Table t = new Table(vectorList)) { - Table slice = t.slice(1, 1); - assertEquals(1, slice.rowCount); - assertEquals(2, t.rowCount); // memory is copied for slice, not transferred - slice.close(); - } - } - - /** - * Tests creation of a table from a VectorSchemaRoot. - * - *

    Also tests that updates to the source Vectors do not impact the values in the Table - */ - @Test - void constructFromVsr() { - List vectorList = twoIntColumns(allocator); - try (VectorSchemaRoot vsr = new VectorSchemaRoot(vectorList)) { - Table t = new Table(vsr); - Row c = t.immutableRow(); - assertEquals(2, t.rowCount); - assertEquals(0, vsr.getRowCount()); // memory is copied for slice, not transferred - IntVector intVector1 = (IntVector) vectorList.get(0); - c.setPosition(0); - - // Now test changes to the first vector - // first Table value is 1 - assertEquals(1, c.getInt(INT_VECTOR_NAME_1)); - - // original vector is updated to set first value to 44 - intVector1.setSafe(0, 44); - assertEquals(44, intVector1.get(0)); - assertEquals(44, ((IntVector) vsr.getVector(0)).get(0)); - - // first Table value is still 1 for the zeroth vector - assertEquals(1, c.getInt(INT_VECTOR_NAME_1)); - - // TEST FIELDS // - Schema schema = t.schema; - Field f1 = t.getField(INT_VECTOR_NAME_1); - FieldVector fv1 = vectorList.get(0); - assertEquals(f1, fv1.getField()); - assertEquals(f1, schema.findField(INT_VECTOR_NAME_1)); - t.close(); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/table/TestUtils.java b/java/vector/src/test/java/org/apache/arrow/vector/table/TestUtils.java deleted file mode 100644 index d39f92e19880b..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/table/TestUtils.java +++ /dev/null @@ -1,441 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.table; - -import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; - -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.GenerateSampleData; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalMonthDayNanoVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.MapVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.UnionMapWriter; -import org.apache.arrow.vector.complex.writer.Float8Writer; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; - -public class TestUtils { - - public static final String INT_VECTOR_NAME = "intCol"; - public static final String INT_VECTOR_NAME_1 = "intCol1"; - public static final String VARCHAR_VECTOR_NAME_1 = "varcharCol1"; - public static final String VARBINARY_VECTOR_NAME_1 = "varbinaryCol1"; - public static final String FIXEDBINARY_VECTOR_NAME_1 = "varbinaryCol1"; - public static final String INT_VECTOR_NAME_2 = "intCol2"; - public static final String INT_LIST_VECTOR_NAME = "int list vector"; - public static final String BIGINT_INT_MAP_VECTOR_NAME = "bigint-int map vector"; - public static final String STRUCT_VECTOR_NAME = "struct_vector"; - public static final String UNION_VECTOR_NAME = "union_vector"; - - /** - * Returns a list of two IntVectors to be used to instantiate Tables for testing. Each IntVector - * has two values set. - */ - static List twoIntColumns(BufferAllocator allocator) { - List vectorList = new ArrayList<>(); - IntVector v1 = getSimpleIntVector(allocator); - IntVector v2 = new IntVector(INT_VECTOR_NAME_2, allocator); - v2.allocateNew(2); - v2.set(0, 3); - v2.set(1, 4); - v2.setValueCount(2); - vectorList.add(v1); - vectorList.add(v2); - return vectorList; - } - - /** - * Returns a list of two FieldVectors to be used to instantiate Tables for testing. The first - * vector is an IntVector and the second is a VarCharVector. Each vector has two values set. - */ - static List intPlusVarcharColumns(BufferAllocator allocator) { - List vectorList = new ArrayList<>(); - IntVector v1 = getSimpleIntVector(allocator); - VarCharVector v2 = new VarCharVector(VARCHAR_VECTOR_NAME_1, allocator); - v2.allocateNew(2); - v2.set(0, "one".getBytes(StandardCharsets.UTF_8)); - v2.set(1, "two".getBytes(StandardCharsets.UTF_8)); - v2.setValueCount(2); - vectorList.add(v1); - vectorList.add(v2); - return vectorList; - } - - /** - * Returns a list of two FieldVectors to be used to instantiate Tables for testing. The first - * vector is an IntVector and the second is a LargeVarCharVector. Each vector has two values set. - */ - static List intPlusLargeVarcharColumns(BufferAllocator allocator) { - List vectorList = new ArrayList<>(); - IntVector v1 = getSimpleIntVector(allocator); - LargeVarCharVector v2 = new LargeVarCharVector(VARCHAR_VECTOR_NAME_1, allocator); - v2.allocateNew(2); - v2.set(0, "one".getBytes(StandardCharsets.UTF_8)); - v2.set(1, "two".getBytes(StandardCharsets.UTF_8)); - v2.setValueCount(2); - vectorList.add(v1); - vectorList.add(v2); - return vectorList; - } - - /** - * Returns a list of two FieldVectors to be used to instantiate Tables for testing. The first - * vector is an IntVector and the second is a VarBinaryVector. Each vector has two values set. The - * large binary vectors values are "one" and "two" encoded with UTF-8 - */ - static List intPlusVarBinaryColumns(BufferAllocator allocator) { - List vectorList = new ArrayList<>(); - IntVector v1 = getSimpleIntVector(allocator); - VarBinaryVector v2 = new VarBinaryVector(VARBINARY_VECTOR_NAME_1, allocator); - v2.allocateNew(2); - v2.set(0, "one".getBytes(StandardCharsets.UTF_8)); - v2.set(1, "two".getBytes(StandardCharsets.UTF_8)); - v2.setValueCount(2); - vectorList.add(v1); - vectorList.add(v2); - return vectorList; - } - - /** - * Returns a list of two FieldVectors to be used to instantiate Tables for testing. The first - * vector is an IntVector and the second is a VarBinaryVector. Each vector has two values set. The - * large binary vectors values are "one" and "two" encoded with UTF-8 - */ - static List intPlusLargeVarBinaryColumns(BufferAllocator allocator) { - List vectorList = new ArrayList<>(); - IntVector v1 = getSimpleIntVector(allocator); - LargeVarBinaryVector v2 = new LargeVarBinaryVector(VARBINARY_VECTOR_NAME_1, allocator); - v2.allocateNew(2); - v2.set(0, "one".getBytes(StandardCharsets.UTF_8)); - v2.set(1, "two".getBytes(StandardCharsets.UTF_8)); - v2.setValueCount(2); - vectorList.add(v1); - vectorList.add(v2); - return vectorList; - } - - /** - * Returns a list of two FieldVectors to be used to instantiate Tables for testing. The first - * vector is an IntVector and the second is a FixedSizeBinary vector. Each vector has two values - * set. The large binary vectors values are "one" and "two" encoded with UTF-8 - */ - static List intPlusFixedBinaryColumns(BufferAllocator allocator) { - List vectorList = new ArrayList<>(); - IntVector v1 = getSimpleIntVector(allocator); - FixedSizeBinaryVector v2 = new FixedSizeBinaryVector(FIXEDBINARY_VECTOR_NAME_1, allocator, 3); - v2.allocateNew(2); - v2.set(0, "one".getBytes(StandardCharsets.UTF_8)); - v2.set(1, "two".getBytes(StandardCharsets.UTF_8)); - v2.setValueCount(2); - vectorList.add(v1); - vectorList.add(v2); - return vectorList; - } - - private static IntVector getSimpleIntVector(BufferAllocator allocator) { - IntVector v1 = new IntVector(INT_VECTOR_NAME_1, allocator); - v1.allocateNew(2); - v1.set(0, 1); - v1.set(1, 2); - v1.setValueCount(2); - return v1; - } - - /** - * Returns a list of fixed-width vectors for testing. It includes - * - *

      - *
    1. all integral and floating point types - *
    2. all basic times and timestamps (second, milli, micro, nano - *
    - * - * The vector names are based on their type name (e.g. BigIntVector is called "bigInt_vector" - */ - static List fixedWidthVectors(BufferAllocator allocator, int rowCount) { - List vectors = new ArrayList<>(); - numericVectors(vectors, allocator, rowCount); - simpleTemporalVectors(vectors, allocator, rowCount); - return vectors; - } - - /** - * Returns a list of all integral and floating point vectors. The vector names are based on their - * type name (e.g. BigIntVector is called "bigInt_vector" - */ - static List numericVectors( - List vectors, BufferAllocator allocator, int rowCount) { - vectors.add(new IntVector("int_vector", allocator)); - vectors.add(new BigIntVector("bigInt_vector", allocator)); - vectors.add(new SmallIntVector("smallInt_vector", allocator)); - vectors.add(new TinyIntVector("tinyInt_vector", allocator)); - vectors.add(new UInt1Vector("uInt1_vector", allocator)); - vectors.add(new UInt2Vector("uInt2_vector", allocator)); - vectors.add(new UInt4Vector("uInt4_vector", allocator)); - vectors.add(new UInt8Vector("uInt8_vector", allocator)); - vectors.add(new Float4Vector("float4_vector", allocator)); - vectors.add(new Float8Vector("float8_vector", allocator)); - vectors.forEach(vec -> GenerateSampleData.generateTestData(vec, rowCount)); - return vectors; - } - - static List numericVectors(BufferAllocator allocator, int rowCount) { - List vectors = new ArrayList<>(); - return numericVectors(vectors, allocator, rowCount); - } - - static List simpleTemporalVectors( - List vectors, BufferAllocator allocator, int rowCount) { - vectors.add(new TimeSecVector("timeSec_vector", allocator)); - vectors.add(new TimeMilliVector("timeMilli_vector", allocator)); - vectors.add(new TimeMicroVector("timeMicro_vector", allocator)); - vectors.add(new TimeNanoVector("timeNano_vector", allocator)); - - vectors.add(new TimeStampSecVector("timeStampSec_vector", allocator)); - vectors.add(new TimeStampMilliVector("timeStampMilli_vector", allocator)); - vectors.add(new TimeStampMicroVector("timeStampMicro_vector", allocator)); - vectors.add(new TimeStampNanoVector("timeStampNano_vector", allocator)); - - vectors.add(new DateMilliVector("dateMilli_vector", allocator)); - vectors.add(new DateDayVector("dateDay_vector", allocator)); - - vectors.forEach(vec -> GenerateSampleData.generateTestData(vec, rowCount)); - return vectors; - } - - static List simpleTemporalVectors(BufferAllocator allocator, int rowCount) { - List vectors = new ArrayList<>(); - return simpleTemporalVectors(vectors, allocator, rowCount); - } - - static List timezoneTemporalVectors(BufferAllocator allocator, int rowCount) { - List vectors = new ArrayList<>(); - vectors.add(new TimeStampSecTZVector("timeStampSecTZ_vector", allocator, "UTC")); - vectors.add(new TimeStampMilliTZVector("timeStampMilliTZ_vector", allocator, "UTC")); - vectors.add(new TimeStampMicroTZVector("timeStampMicroTZ_vector", allocator, "UTC")); - vectors.add(new TimeStampNanoTZVector("timeStampNanoTZ_vector", allocator, "UTC")); - vectors.forEach(vec -> GenerateSampleData.generateTestData(vec, rowCount)); - return vectors; - } - - static List intervalVectors(BufferAllocator allocator, int rowCount) { - List vectors = new ArrayList<>(); - vectors.add(new IntervalDayVector("intervalDay_vector", allocator)); - vectors.add(new IntervalYearVector("intervalYear_vector", allocator)); - vectors.add(new IntervalMonthDayNanoVector("intervalMonthDayNano_vector", allocator)); - vectors.add( - new DurationVector( - "duration_vector", - new FieldType(true, new ArrowType.Duration(TimeUnit.SECOND), null), - allocator)); - vectors.forEach(vec -> GenerateSampleData.generateTestData(vec, rowCount)); - return vectors; - } - - /** Returns a list vector of ints. */ - static ListVector simpleListVector(BufferAllocator allocator) { - ListVector listVector = ListVector.empty(INT_LIST_VECTOR_NAME, allocator); - final int innerCount = 80; // total number of values - final int outerCount = 8; // total number of values in the list vector itself - final int listLength = innerCount / outerCount; // length of an individual list - - Types.MinorType type = Types.MinorType.INT; - listVector.addOrGetVector(FieldType.nullable(type.getType())); - - listVector.allocateNew(); - IntVector dataVector = (IntVector) listVector.getDataVector(); - - for (int i = 0; i < innerCount; i++) { - dataVector.set(i, i); - } - dataVector.setValueCount(innerCount); - - for (int i = 0; i < outerCount; i++) { - BitVectorHelper.setBit(listVector.getValidityBuffer(), i); - listVector.getOffsetBuffer().setInt(i * OFFSET_WIDTH, i * listLength); - listVector.getOffsetBuffer().setInt((i + 1) * OFFSET_WIDTH, (i + 1) * listLength); - } - listVector.setLastSet(outerCount - 1); - listVector.setValueCount(outerCount); - return listVector; - } - - static StructVector simpleStructVector(BufferAllocator allocator) { - final String INT_COL = "struct_int_child"; - final String FLT_COL = "struct_flt_child"; - StructVector structVector = StructVector.empty(STRUCT_VECTOR_NAME, allocator); - final int size = 6; // number of structs - - NullableStructWriter structWriter = structVector.getWriter(); - structVector.addOrGet( - INT_COL, FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class); - structVector.addOrGet( - FLT_COL, FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class); - structVector.allocateNew(); - IntWriter intWriter = structWriter.integer(INT_COL); - Float8Writer float8Writer = structWriter.float8(FLT_COL); - - for (int i = 0; i < size; i++) { - structWriter.setPosition(i); - structWriter.start(); - intWriter.writeInt(i); - float8Writer.writeFloat8(i * .1); - structWriter.end(); - } - - structWriter.setValueCount(size); - - return structVector; - } - - /** Returns a MapVector of longs to doubles. */ - static MapVector simpleMapVector(BufferAllocator allocator) { - MapVector mapVector = MapVector.empty(BIGINT_INT_MAP_VECTOR_NAME, allocator, false); - mapVector.allocateNew(); - int count = 5; - UnionMapWriter mapWriter = mapVector.getWriter(); - for (int i = 0; i < count; i++) { - mapWriter.startMap(); - for (int j = 0; j < i + 1; j++) { - mapWriter.startEntry(); - mapWriter.key().bigInt().writeBigInt(j); - mapWriter.value().integer().writeInt(j); - mapWriter.endEntry(); - } - mapWriter.endMap(); - } - mapWriter.setValueCount(count); - return mapVector; - } - - static List decimalVector(BufferAllocator allocator, int rowCount) { - List vectors = new ArrayList<>(); - vectors.add( - new DecimalVector( - "decimal_vector", - new FieldType(true, new ArrowType.Decimal(38, 10, 128), null), - allocator)); - vectors.forEach(vec -> generateDecimalData((DecimalVector) vec, rowCount)); - return vectors; - } - - static List bitVector(BufferAllocator allocator, int rowCount) { - List vectors = new ArrayList<>(); - vectors.add(new BitVector("bit_vector", allocator)); - vectors.forEach(vec -> GenerateSampleData.generateTestData(vec, rowCount)); - return vectors; - } - - /** Returns a UnionVector. */ - static UnionVector simpleUnionVector(BufferAllocator allocator) { - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 100; - uInt4Holder.isSet = 1; - - UnionVector unionVector = new UnionVector(UNION_VECTOR_NAME, allocator, null, null); - unionVector.allocateNew(); - - // write some data - unionVector.setType(0, Types.MinorType.UINT4); - unionVector.setSafe(0, uInt4Holder); - unionVector.setType(2, Types.MinorType.UINT4); - unionVector.setSafe(2, uInt4Holder); - unionVector.setValueCount(4); - return unionVector; - } - - /** Returns a DenseUnionVector. */ - static DenseUnionVector simpleDenseUnionVector(BufferAllocator allocator) { - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 100; - uInt4Holder.isSet = 1; - - DenseUnionVector unionVector = new DenseUnionVector(UNION_VECTOR_NAME, allocator, null, null); - unionVector.allocateNew(); - - // write some data - byte uint4TypeId = - unionVector.registerNewTypeId(Field.nullable("", Types.MinorType.UINT4.getType())); - unionVector.setTypeId(0, uint4TypeId); - unionVector.setSafe(0, uInt4Holder); - unionVector.setTypeId(2, uint4TypeId); - unionVector.setSafe(2, uInt4Holder); - unionVector.setValueCount(4); - return unionVector; - } - - private static void generateDecimalData(DecimalVector vector, int valueCount) { - final BigDecimal even = new BigDecimal("0.0543278923"); - final BigDecimal odd = new BigDecimal("2.0543278923"); - for (int i = 0; i < valueCount; i++) { - if (i % 2 == 0) { - vector.setSafe(i, even); - } else { - vector.setSafe(i, odd); - } - } - vector.setValueCount(valueCount); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/test/util/ArrowTestDataUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/test/util/ArrowTestDataUtil.java deleted file mode 100644 index 2b0f7af608a95..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/test/util/ArrowTestDataUtil.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.test.util; - -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Objects; - -/** Utility methods and constants for working with the arrow-testing repo. */ -public final class ArrowTestDataUtil { - public static final String TEST_DATA_ENV_VAR = "ARROW_TEST_DATA"; - public static final String TEST_DATA_PROPERTY = "arrow.test.dataRoot"; - - public static Path getTestDataRoot() { - String path = System.getenv(TEST_DATA_ENV_VAR); - if (path == null) { - path = System.getProperty(TEST_DATA_PROPERTY); - } - return Paths.get( - Objects.requireNonNull( - path, - String.format( - "Could not find test data path. Set the environment variable %s or the JVM property %s.", - TEST_DATA_ENV_VAR, TEST_DATA_PROPERTY))); - } - - private ArrowTestDataUtil() {} -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java deleted file mode 100644 index d0ef176cb3c47..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.testing; - -import java.util.Random; -import java.util.function.Supplier; - -/** Utility for generating random data. */ -public class RandomDataGenerator { - - static final Random random = new Random(0); - - public static final Supplier TINY_INT_GENERATOR = () -> (byte) random.nextInt(); - - public static final Supplier SMALL_INT_GENERATOR = () -> (short) random.nextInt(); - - public static final Supplier INT_GENERATOR = () -> random.nextInt(); - - public static final Supplier LONG_GENERATOR = () -> random.nextLong(); - - public static final Supplier FLOAT_GENERATOR = () -> random.nextFloat(); - - public static final Supplier DOUBLE_GENERATOR = () -> random.nextDouble(); - - private RandomDataGenerator() {} -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java deleted file mode 100644 index c0e2ae252e02c..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java +++ /dev/null @@ -1,627 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.testing; - -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.compare.VectorEqualsVisitor; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestValueVectorPopulator { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testPopulateBigIntVector() { - try (final BigIntVector vector1 = new BigIntVector("vector", allocator); - final BigIntVector vector2 = new BigIntVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - - setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateBitVector() { - try (final BitVector vector1 = new BitVector("vector", allocator); - final BitVector vector2 = new BitVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i > 5 ? 0 : 1); - } - } - vector1.setValueCount(10); - - setVector(vector2, null, 1, null, 1, null, 1, null, 0, null, 0); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateDateDayVector() { - try (final DateDayVector vector1 = new DateDayVector("vector", allocator); - final DateDayVector vector2 = new DateDayVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 10); - } - } - vector1.setValueCount(10); - - setVector(vector2, null, 10, null, 30, null, 50, null, 70, null, 90); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateDateMilliVector() { - try (final DateMilliVector vector1 = new DateMilliVector("vector", allocator); - final DateMilliVector vector2 = new DateMilliVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 1000); - } - } - vector1.setValueCount(10); - - setVector(vector2, null, 1000L, null, 3000L, null, 5000L, null, 7000L, null, 9000L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateDecimalVector() { - try (final DecimalVector vector1 = new DecimalVector("vector", allocator, 10, 3); - final DecimalVector vector2 = new DecimalVector("vector", allocator, 10, 3)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - - setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateDurationVector() { - final FieldType fieldType = FieldType.nullable(new ArrowType.Duration(TimeUnit.SECOND)); - try (final DurationVector vector1 = new DurationVector("vector", fieldType, allocator); - final DurationVector vector2 = new DurationVector("vector", fieldType, allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - - setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L); - - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateFixedSizeBinaryVector() { - try (final FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("vector", allocator, 5); - final FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("vector", allocator, 5)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, ("test" + i).getBytes(StandardCharsets.UTF_8)); - } - } - vector1.setValueCount(10); - - setVector( - vector2, - null, - "test1".getBytes(StandardCharsets.UTF_8), - null, - "test3".getBytes(StandardCharsets.UTF_8), - null, - "test5".getBytes(StandardCharsets.UTF_8), - null, - "test7".getBytes(StandardCharsets.UTF_8), - null, - "test9".getBytes(StandardCharsets.UTF_8)); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateFloat4Vector() { - try (final Float4Vector vector1 = new Float4Vector("vector", allocator); - final Float4Vector vector2 = new Float4Vector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 1f, null, 3f, null, 5f, null, 7f, null, 9f); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateFloat8Vector() { - try (final Float8Vector vector1 = new Float8Vector("vector", allocator); - final Float8Vector vector2 = new Float8Vector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 1d, null, 3d, null, 5d, null, 7d, null, 9d); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateIntVector() { - try (final IntVector vector1 = new IntVector("vector", allocator); - final IntVector vector2 = new IntVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - - ValueVectorDataPopulator.setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateSmallIntVector() { - try (final SmallIntVector vector1 = new SmallIntVector("vector", allocator); - final SmallIntVector vector2 = new SmallIntVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - - ValueVectorDataPopulator.setVector( - vector2, null, (short) 1, null, (short) 3, null, (short) 5, null, (short) 7, null, - (short) 9); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateIntervalDayVector() { - try (final IntervalYearVector vector1 = new IntervalYearVector("vector", allocator); - final IntervalYearVector vector2 = new IntervalYearVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - - ValueVectorDataPopulator.setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTimeMicroVector() { - try (final TimeMicroVector vector1 = new TimeMicroVector("vector", allocator); - final TimeMicroVector vector2 = new TimeMicroVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 10000); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTimeMilliVector() { - try (final TimeMilliVector vector1 = new TimeMilliVector("vector", allocator); - final TimeMilliVector vector2 = new TimeMilliVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 100); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 100, null, 300, null, 500, null, 700, null, 900); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTimeNanoVector() { - try (final TimeNanoVector vector1 = new TimeNanoVector("vector", allocator); - final TimeNanoVector vector2 = new TimeNanoVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 10000); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTimeSecVector() { - try (final TimeSecVector vector1 = new TimeSecVector("vector", allocator); - final TimeSecVector vector2 = new TimeSecVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 100); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 100, null, 300, null, 500, null, 700, null, 900); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTimeStampMicroVector() { - try (final TimeStampMicroVector vector1 = new TimeStampMicroVector("vector", allocator); - final TimeStampMicroVector vector2 = new TimeStampMicroVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 10000); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTimeStampMilliVector() { - try (final TimeStampMilliVector vector1 = new TimeStampMilliVector("vector", allocator); - final TimeStampMilliVector vector2 = new TimeStampMilliVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 10000); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTimeStampNanoVector() { - try (final TimeStampNanoVector vector1 = new TimeStampNanoVector("vector", allocator); - final TimeStampNanoVector vector2 = new TimeStampNanoVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 10000); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTimeStampSecVector() { - try (final TimeStampSecVector vector1 = new TimeStampSecVector("vector", allocator); - final TimeStampSecVector vector2 = new TimeStampSecVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i * 100); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 100L, null, 300L, null, 500L, null, 700L, null, 900L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateTinyIntVector() { - try (final TinyIntVector vector1 = new TinyIntVector("vector", allocator); - final TinyIntVector vector2 = new TinyIntVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - setVector( - vector2, null, (byte) 1, null, (byte) 3, null, (byte) 5, null, (byte) 7, null, (byte) 9); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateUInt1Vector() { - try (final UInt1Vector vector1 = new UInt1Vector("vector", allocator); - final UInt1Vector vector2 = new UInt1Vector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - setVector( - vector2, null, (byte) 1, null, (byte) 3, null, (byte) 5, null, (byte) 7, null, (byte) 9); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateUInt2Vector() { - try (final UInt2Vector vector1 = new UInt2Vector("vector", allocator); - final UInt2Vector vector2 = new UInt2Vector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - setVector( - vector2, null, (char) 1, null, (char) 3, null, (char) 5, null, (char) 7, null, (char) 9); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateUInt4Vector() { - try (final UInt4Vector vector1 = new UInt4Vector("vector", allocator); - final UInt4Vector vector2 = new UInt4Vector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateUInt8Vector() { - try (final UInt8Vector vector1 = new UInt8Vector("vector", allocator); - final UInt8Vector vector2 = new UInt8Vector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, i); - } - } - vector1.setValueCount(10); - setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateVarBinaryVector() { - try (final VarBinaryVector vector1 = new VarBinaryVector("vector", allocator); - final VarBinaryVector vector2 = new VarBinaryVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, ("test" + i).getBytes(StandardCharsets.UTF_8)); - } - } - vector1.setValueCount(10); - - setVector( - vector2, - null, - "test1".getBytes(StandardCharsets.UTF_8), - null, - "test3".getBytes(StandardCharsets.UTF_8), - null, - "test5".getBytes(StandardCharsets.UTF_8), - null, - "test7".getBytes(StandardCharsets.UTF_8), - null, - "test9".getBytes(StandardCharsets.UTF_8)); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } - - @Test - public void testPopulateVarCharVector() { - try (final VarCharVector vector1 = new VarCharVector("vector", allocator); - final VarCharVector vector2 = new VarCharVector("vector", allocator)) { - - vector1.allocateNew(10); - for (int i = 0; i < 10; i++) { - if (i % 2 == 0) { - vector1.setNull(i); - } else { - vector1.set(i, ("test" + i).getBytes(StandardCharsets.UTF_8)); - } - } - vector1.setValueCount(10); - - setVector(vector2, null, "test1", null, "test3", null, "test5", null, "test7", null, "test9"); - assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2)); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java deleted file mode 100644 index f599dfa539421..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ /dev/null @@ -1,836 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.testing; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.math.BigDecimal; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.BitVectorHelper; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float2Vector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VariableWidthFieldVector; -import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueVector; -import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.LargeListViewVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.ListViewVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.holders.IntervalDayHolder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.Types.MinorType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; - -/** Utility for populating {@link org.apache.arrow.vector.ValueVector}. */ -public class ValueVectorDataPopulator { - - private ValueVectorDataPopulator() {} - - /** Populate values for BigIntVector. */ - public static void setVector(BigIntVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for BitVector. */ - public static void setVector(BitVector vector, Integer... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for DateDayVector. - * - * @param values numbers of days since UNIX epoch - */ - public static void setVector(DateDayVector vector, Integer... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for DateMilliVector. - * - * @param values numbers of milliseconds since UNIX epoch - */ - public static void setVector(DateMilliVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for DecimalVector. */ - public static void setVector(DecimalVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for Decimal256Vector. */ - public static void setVector(Decimal256Vector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for Decimal256Vector. */ - public static void setVector(Decimal256Vector vector, BigDecimal... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for DurationVector. - * - * @param values values of elapsed time in either seconds, milliseconds, microseconds or - * nanoseconds. - */ - public static void setVector(DurationVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for FixedSizeBinaryVector. */ - public static void setVector(FixedSizeBinaryVector vector, byte[]... values) { - final int length = values.length; - vector.allocateNewSafe(); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for Float2Vector. */ - public static void setVector(Float2Vector vector, Float... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.setWithPossibleTruncate(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for Float4Vector. */ - public static void setVector(Float4Vector vector, Float... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for Float8Vector. */ - public static void setVector(Float8Vector vector, Double... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for IntVector. */ - public static void setVector(IntVector vector, Integer... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for IntervalDayVector. - * - * @param values holders witch holds days and milliseconds values which represents interval in SQL - * style. - */ - public static void setVector(IntervalDayVector vector, IntervalDayHolder... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i].days, values[i].milliseconds); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for IntervalYearVector. - * - * @param values total month intervals in SQL style. - */ - public static void setVector(IntervalYearVector vector, Integer... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for SmallIntVector. */ - public static void setVector(SmallIntVector vector, Short... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeMicroVector. - * - * @param values numbers of microseconds since UNIX epoch - */ - public static void setVector(TimeMicroVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeMicroVector. - * - * @param values numbers of milliseconds since UNIX epoch - */ - public static void setVector(TimeMilliVector vector, Integer... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeNanoVector. - * - * @param values numbers of nanoseconds since UNIX epoch - */ - public static void setVector(TimeNanoVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeSecVector. - * - * @param values numbers of seconds since UNIX epoch - */ - public static void setVector(TimeSecVector vector, Integer... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeStampMicroTZVector. - * - * @param values numbers of microseconds since UNIX epoch - */ - public static void setVector(TimeStampMicroTZVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeStampMicroVector. - * - * @param values numbers of microseconds since UNIX epoch - */ - public static void setVector(TimeStampMicroVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeStampMilliTZVector. - * - * @param values numbers of milliseconds since UNIX epoch - */ - public static void setVector(TimeStampMilliTZVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeStampMilliVector. - * - * @param values numbers of milliseconds since UNIX epoch - */ - public static void setVector(TimeStampMilliVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeStampNanoTZVector. - * - * @param values numbers of nanoseconds since UNIX epoch - */ - public static void setVector(TimeStampNanoTZVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeStampNanoVector. - * - * @param values numbers of nanoseconds since UNIX epoch - */ - public static void setVector(TimeStampNanoVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeStampSecTZVector. - * - * @param values numbers of seconds since UNIX epoch - */ - public static void setVector(TimeStampSecTZVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** - * Populate values for TimeStampSecVector. - * - * @param values numbers of seconds since UNIX epoch - */ - public static void setVector(TimeStampSecVector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for TinyIntVector. */ - public static void setVector(TinyIntVector vector, Byte... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for UInt1Vector. */ - public static void setVector(UInt1Vector vector, Byte... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for UInt2Vector. */ - public static void setVector(UInt2Vector vector, Character... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for UInt4Vector. */ - public static void setVector(UInt4Vector vector, Integer... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for UInt8Vector. */ - public static void setVector(UInt8Vector vector, Long... values) { - final int length = values.length; - vector.allocateNew(length); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for VarBinaryVector. */ - public static void setVector(VarBinaryVector vector, byte[]... values) { - final int length = values.length; - vector.allocateNewSafe(); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for VarCharVector. */ - public static void setVector(VarCharVector vector, byte[]... values) { - final int length = values.length; - vector.allocateNewSafe(); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - public static void setVector(VariableWidthFieldVector vector, byte[]... values) { - final int length = values.length; - vector.allocateNewSafe(); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for LargeVarCharVector. */ - public static void setVector(LargeVarCharVector vector, byte[]... values) { - final int length = values.length; - vector.allocateNewSafe(); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.set(i, values[i]); - } - } - vector.setValueCount(length); - } - - /** Populate values for VarCharVector. */ - public static void setVector(VarCharVector vector, String... values) { - final int length = values.length; - vector.allocateNewSafe(); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8)); - } - } - vector.setValueCount(length); - } - - /** Populate values for LargeVarCharVector. */ - public static void setVector(LargeVarCharVector vector, String... values) { - final int length = values.length; - vector.allocateNewSafe(); - for (int i = 0; i < length; i++) { - if (values[i] != null) { - vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8)); - } - } - vector.setValueCount(length); - } - - /** Populate values for {@link ListVector}. */ - public static void setVector(ListVector vector, List... values) { - vector.allocateNewSafe(); - Types.MinorType type = Types.MinorType.INT; - vector.addOrGetVector(FieldType.nullable(type.getType())); - - IntVector dataVector = (IntVector) vector.getDataVector(); - dataVector.allocateNew(); - - // set underlying vectors - int curPos = 0; - vector.getOffsetBuffer().setInt(0, curPos); - for (int i = 0; i < values.length; i++) { - if (values[i] == null) { - BitVectorHelper.unsetBit(vector.getValidityBuffer(), i); - } else { - BitVectorHelper.setBit(vector.getValidityBuffer(), i); - for (int value : values[i]) { - dataVector.setSafe(curPos, value); - curPos += 1; - } - } - vector.getOffsetBuffer().setInt((i + 1) * BaseRepeatedValueVector.OFFSET_WIDTH, curPos); - } - dataVector.setValueCount(curPos); - vector.setLastSet(values.length - 1); - vector.setValueCount(values.length); - } - - /** Populate values for {@link LargeListVector}. */ - public static void setVector(LargeListVector vector, List... values) { - vector.allocateNewSafe(); - Types.MinorType type = Types.MinorType.INT; - vector.addOrGetVector(FieldType.nullable(type.getType())); - - IntVector dataVector = (IntVector) vector.getDataVector(); - dataVector.allocateNew(); - - // set underlying vectors - int curPos = 0; - vector.getOffsetBuffer().setLong(0, curPos); - for (int i = 0; i < values.length; i++) { - if (values[i] == null) { - BitVectorHelper.unsetBit(vector.getValidityBuffer(), i); - } else { - BitVectorHelper.setBit(vector.getValidityBuffer(), i); - for (int value : values[i]) { - dataVector.setSafe(curPos, value); - curPos += 1; - } - } - vector.getOffsetBuffer().setLong((long) (i + 1) * LargeListVector.OFFSET_WIDTH, curPos); - } - dataVector.setValueCount(curPos); - vector.setLastSet(values.length - 1); - vector.setValueCount(values.length); - } - - /** Populate values for {@link FixedSizeListVector}. */ - public static void setVector(FixedSizeListVector vector, List... values) { - vector.allocateNewSafe(); - for (int i = 0; i < values.length; i++) { - if (values[i] != null) { - assertEquals(vector.getListSize(), values[i].size()); - } - } - - Types.MinorType type = Types.MinorType.INT; - vector.addOrGetVector(FieldType.nullable(type.getType())); - - IntVector dataVector = (IntVector) vector.getDataVector(); - dataVector.allocateNew(); - - // set underlying vectors - int curPos = 0; - for (int i = 0; i < values.length; i++) { - if (values[i] == null) { - BitVectorHelper.unsetBit(vector.getValidityBuffer(), i); - } else { - BitVectorHelper.setBit(vector.getValidityBuffer(), i); - for (int value : values[i]) { - dataVector.setSafe(curPos, value); - curPos += 1; - } - } - } - dataVector.setValueCount(curPos); - vector.setValueCount(values.length); - } - - /** Populate values for {@link StructVector}. */ - public static void setVector(StructVector vector, Map> values) { - vector.allocateNewSafe(); - - int valueCount = 0; - for (final Entry> entry : values.entrySet()) { - // Add the child - final IntVector child = - vector.addOrGet( - entry.getKey(), FieldType.nullable(MinorType.INT.getType()), IntVector.class); - - // Write the values to the child - child.allocateNew(); - final List v = entry.getValue(); - for (int i = 0; i < v.size(); i++) { - if (v.get(i) != null) { - child.set(i, v.get(i)); - vector.setIndexDefined(i); - } else { - child.setNull(i); - } - } - valueCount = Math.max(valueCount, v.size()); - } - vector.setValueCount(valueCount); - } - - /** Populate values for {@link ListViewVector}. */ - public static void setVector(ListViewVector vector, List... values) { - vector.allocateNewSafe(); - Types.MinorType type = Types.MinorType.INT; - vector.addOrGetVector(FieldType.nullable(type.getType())); - - IntVector dataVector = (IntVector) vector.getDataVector(); - dataVector.allocateNew(); - - // set underlying vectors - int curPos = 0; - for (int i = 0; i < values.length; i++) { - vector.getOffsetBuffer().setInt((long) i * BaseRepeatedValueViewVector.OFFSET_WIDTH, curPos); - if (values[i] == null) { - BitVectorHelper.unsetBit(vector.getValidityBuffer(), i); - } else { - BitVectorHelper.setBit(vector.getValidityBuffer(), i); - for (int value : values[i]) { - dataVector.setSafe(curPos, value); - curPos += 1; - } - } - vector - .getSizeBuffer() - .setInt((long) i * BaseRepeatedValueViewVector.SIZE_WIDTH, values[i].size()); - } - dataVector.setValueCount(curPos); - vector.setValueCount(values.length); - } - - /** Populate values for {@link ListViewVector}. */ - public static void setVector(LargeListViewVector vector, List... values) { - vector.allocateNewSafe(); - Types.MinorType type = Types.MinorType.INT; - vector.addOrGetVector(FieldType.nullable(type.getType())); - - IntVector dataVector = (IntVector) vector.getDataVector(); - dataVector.allocateNew(); - - // set underlying vectors - int curPos = 0; - for (int i = 0; i < values.length; i++) { - vector - .getOffsetBuffer() - .setInt((long) i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH, curPos); - if (values[i] == null) { - BitVectorHelper.unsetBit(vector.getValidityBuffer(), i); - } else { - BitVectorHelper.setBit(vector.getValidityBuffer(), i); - for (int value : values[i]) { - dataVector.setSafe(curPos, value); - curPos += 1; - } - } - vector - .getSizeBuffer() - .setInt((long) i * BaseRepeatedValueViewVector.SIZE_WIDTH, values[i].size()); - } - dataVector.setValueCount(curPos); - vector.setValueCount(values.length); - } - - public static void setVector( - RunEndEncodedVector vector, List runEnds, List values) { - int runCount = runEnds.size(); - assert runCount == values.size(); - final FieldType valueType = FieldType.notNullable(MinorType.INT.getType()); - final FieldType runEndType = FieldType.notNullable(Types.MinorType.INT.getType()); - final Field valueField = new Field("value", valueType, null); - final Field runEndField = new Field("ree", runEndType, null); - vector.initializeChildrenFromFields(List.of(runEndField, valueField)); - - IntVector runEndsVector = (IntVector) vector.getRunEndsVector(); - runEndsVector.setValueCount(runCount); - for (int i = 0; i < runCount; i++) { - if (runEnds.get(i) == null) { - runEndsVector.setNull(i); - } else { - runEndsVector.set(i, runEnds.get(i)); - } - } - - IntVector valuesVector = (IntVector) vector.getValuesVector(); - valuesVector.setValueCount(runCount); - for (int i = 0; i < runCount; i++) { - if (runEnds.get(i) == null) { - valuesVector.setNull(i); - } else { - valuesVector.set(i, values.get(i)); - } - } - - if (runCount > 0) { - vector.setValueCount(runEnds.get(runCount - 1)); - } else { - vector.setValueCount(0); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java deleted file mode 100644 index 8f54a6e5d741f..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.File; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.channels.SeekableByteChannel; -import java.nio.channels.WritableByteChannel; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.Collections; -import java.util.UUID; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.hash.ArrowBufHasher; -import org.apache.arrow.vector.ExtensionTypeVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.ValueIterableVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.ipc.ArrowFileReader; -import org.apache.arrow.vector.ipc.ArrowFileWriter; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; -import org.apache.arrow.vector.util.VectorBatchAppender; -import org.apache.arrow.vector.validate.ValidateVectorVisitor; -import org.junit.jupiter.api.Test; - -public class TestExtensionType { - /** Test that a custom UUID type can be round-tripped through a temporary file. */ - @Test - public void roundtripUuid() throws IOException { - ExtensionTypeRegistry.register(new UuidType()); - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new UuidType()))); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - UUID u1 = UUID.randomUUID(); - UUID u2 = UUID.randomUUID(); - UuidVector vector = (UuidVector) root.getVector("a"); - vector.setValueCount(2); - vector.set(0, u1); - vector.set(1, u2); - root.setRowCount(2); - - final File file = File.createTempFile("uuidtest", ".arrow"); - try (final WritableByteChannel channel = - FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); - final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { - writer.start(); - writer.writeBatch(); - writer.end(); - } - - try (final SeekableByteChannel channel = - Files.newByteChannel(Paths.get(file.getAbsolutePath())); - final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - reader.loadNextBatch(); - final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - assertEquals(root.getSchema(), readerRoot.getSchema()); - - final Field field = readerRoot.getSchema().getFields().get(0); - final UuidType expectedType = new UuidType(); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); - - final ExtensionTypeVector deserialized = - (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - assertEquals(vector.getValueCount(), deserialized.getValueCount()); - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(vector.isNull(i), deserialized.isNull(i)); - if (!vector.isNull(i)) { - assertEquals(vector.getObject(i), deserialized.getObject(i)); - } - } - } - } - } - - /** Test that a custom UUID type can be read as its underlying type. */ - @Test - public void readUnderlyingType() throws IOException { - ExtensionTypeRegistry.register(new UuidType()); - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("a", new UuidType()))); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - UUID u1 = UUID.randomUUID(); - UUID u2 = UUID.randomUUID(); - UuidVector vector = (UuidVector) root.getVector("a"); - vector.setValueCount(2); - vector.set(0, u1); - vector.set(1, u2); - root.setRowCount(2); - - final File file = File.createTempFile("uuidtest", ".arrow"); - try (final WritableByteChannel channel = - FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); - final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { - writer.start(); - writer.writeBatch(); - writer.end(); - } - - ExtensionTypeRegistry.unregister(new UuidType()); - - try (final SeekableByteChannel channel = - Files.newByteChannel(Paths.get(file.getAbsolutePath())); - final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - reader.loadNextBatch(); - final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - assertEquals(1, readerRoot.getSchema().getFields().size()); - assertEquals("a", readerRoot.getSchema().getFields().get(0).getName()); - assertTrue( - readerRoot.getSchema().getFields().get(0).getType() - instanceof ArrowType.FixedSizeBinary); - assertEquals( - 16, - ((ArrowType.FixedSizeBinary) readerRoot.getSchema().getFields().get(0).getType()) - .getByteWidth()); - - final Field field = readerRoot.getSchema().getFields().get(0); - final UuidType expectedType = new UuidType(); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); - - final FixedSizeBinaryVector deserialized = - (FixedSizeBinaryVector) readerRoot.getFieldVectors().get(0); - assertEquals(vector.getValueCount(), deserialized.getValueCount()); - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(vector.isNull(i), deserialized.isNull(i)); - if (!vector.isNull(i)) { - final UUID uuid = vector.getObject(i); - final ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - assertArrayEquals(bb.array(), deserialized.get(i)); - } - } - } - } - } - - @Test - public void testNullCheck() { - NullPointerException e = - assertThrows( - NullPointerException.class, - () -> { - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final ExtensionTypeVector vector = new UuidVector("uuid", allocator, null)) { - vector.getField(); - vector.allocateNewSafe(); - } - }); - assertTrue(e.getMessage().contains("underlyingVector cannot be null.")); - } - - /** Test that a custom Location type can be round-tripped through a temporary file. */ - @Test - public void roundtripLocation() throws IOException { - ExtensionTypeRegistry.register(new LocationType()); - final Schema schema = - new Schema(Collections.singletonList(Field.nullable("location", new LocationType()))); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { - LocationVector vector = (LocationVector) root.getVector("location"); - vector.allocateNew(); - vector.set(0, 34.073814f, -118.240784f); - vector.set(2, 37.768056f, -122.3875f); - vector.set(3, 40.739716f, -73.840782f); - vector.setValueCount(4); - root.setRowCount(4); - - final File file = File.createTempFile("locationtest", ".arrow"); - try (final WritableByteChannel channel = - FileChannel.open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE); - final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) { - writer.start(); - writer.writeBatch(); - writer.end(); - } - - try (final SeekableByteChannel channel = - Files.newByteChannel(Paths.get(file.getAbsolutePath())); - final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { - reader.loadNextBatch(); - final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - assertEquals(root.getSchema(), readerRoot.getSchema()); - - final Field field = readerRoot.getSchema().getFields().get(0); - final LocationType expectedType = new LocationType(); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - assertEquals( - field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); - - final ExtensionTypeVector deserialized = - (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - assertTrue(deserialized instanceof LocationVector); - assertEquals("location", deserialized.getName()); - StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector(); - assertNotNull(deserStruct.getChild("Latitude")); - assertNotNull(deserStruct.getChild("Longitude")); - assertEquals(vector.getValueCount(), deserialized.getValueCount()); - for (int i = 0; i < vector.getValueCount(); i++) { - assertEquals(vector.isNull(i), deserialized.isNull(i)); - if (!vector.isNull(i)) { - assertEquals(vector.getObject(i), deserialized.getObject(i)); - } - } - } - } - } - - @Test - public void testVectorCompare() { - UuidType uuidType = new UuidType(); - ExtensionTypeRegistry.register(uuidType); - try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - UuidVector a1 = - (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator); - UuidVector a2 = - (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator); - UuidVector bb = - (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator)) { - UUID u1 = UUID.randomUUID(); - UUID u2 = UUID.randomUUID(); - - // Test out type and vector validation visitors for an ExtensionTypeVector - ValidateVectorVisitor validateVisitor = new ValidateVectorVisitor(); - validateVisitor.visit(a1, null); - - a1.setValueCount(2); - a1.set(0, u1); - a1.set(1, u2); - - a2.setValueCount(2); - a2.set(0, u1); - a2.set(1, u2); - - bb.setValueCount(2); - bb.set(0, u2); - bb.set(1, u1); - - Range range = new Range(0, 0, a1.getValueCount()); - RangeEqualsVisitor visitor = new RangeEqualsVisitor(a1, a2); - assertTrue(visitor.rangeEquals(range)); - - visitor = new RangeEqualsVisitor(a1, bb); - assertFalse(visitor.rangeEquals(range)); - - // Test out vector appender - VectorBatchAppender.batchAppend(a1, a2, bb); - assertEquals(6, a1.getValueCount()); - validateVisitor.visit(a1, null); - } - } - - static class UuidType extends ExtensionType { - - @Override - public ArrowType storageType() { - return new ArrowType.FixedSizeBinary(16); - } - - @Override - public String extensionName() { - return "uuid"; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other instanceof UuidType; - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - if (!storageType.equals(storageType())) { - throw new UnsupportedOperationException( - "Cannot construct UuidType from underlying type " + storageType); - } - return new UuidType(); - } - - @Override - public String serialize() { - return ""; - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16)); - } - } - - static class UuidVector extends ExtensionTypeVector - implements ValueIterableVector { - - public UuidVector( - String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) { - super(name, allocator, underlyingVector); - } - - @Override - public UUID getObject(int index) { - final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index)); - return new UUID(bb.getLong(), bb.getLong()); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - public void set(int index, UUID uuid) { - ByteBuffer bb = ByteBuffer.allocate(16); - bb.putLong(uuid.getMostSignificantBits()); - bb.putLong(uuid.getLeastSignificantBits()); - getUnderlyingVector().set(index, bb.array()); - } - } - - static class LocationType extends ExtensionType { - - @Override - public ArrowType storageType() { - return Struct.INSTANCE; - } - - @Override - public String extensionName() { - return "location"; - } - - @Override - public boolean extensionEquals(ExtensionType other) { - return other instanceof LocationType; - } - - @Override - public ArrowType deserialize(ArrowType storageType, String serializedData) { - if (!storageType.equals(storageType())) { - throw new UnsupportedOperationException( - "Cannot construct LocationType from underlying type " + storageType); - } - return new LocationType(); - } - - @Override - public String serialize() { - return ""; - } - - @Override - public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) { - return new LocationVector(name, allocator); - } - } - - public static class LocationVector extends ExtensionTypeVector - implements ValueIterableVector> { - - private static StructVector buildUnderlyingVector(String name, BufferAllocator allocator) { - final StructVector underlyingVector = - new StructVector(name, allocator, FieldType.nullable(ArrowType.Struct.INSTANCE), null); - underlyingVector.addOrGet( - "Latitude", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - Float4Vector.class); - underlyingVector.addOrGet( - "Longitude", - FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), - Float4Vector.class); - return underlyingVector; - } - - public LocationVector(String name, BufferAllocator allocator) { - super(name, allocator, buildUnderlyingVector(name, allocator)); - } - - @Override - public int hashCode(int index) { - return hashCode(index, null); - } - - @Override - public int hashCode(int index, ArrowBufHasher hasher) { - return getUnderlyingVector().hashCode(index, hasher); - } - - @Override - public java.util.Map getObject(int index) { - return getUnderlyingVector().getObject(index); - } - - public void set(int index, float latitude, float longitude) { - getUnderlyingVector().getChild("Latitude", Float4Vector.class).set(index, latitude); - getUnderlyingVector().getChild("Longitude", Float4Vector.class).set(index, longitude); - getUnderlyingVector().setIndexDefined(index); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java deleted file mode 100644 index 88dedf8837763..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY; -import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.junit.jupiter.api.Test; - -public class TestField { - - private static Field field( - String name, boolean nullable, ArrowType type, Map metadata) { - return new Field(name, new FieldType(nullable, type, null, metadata), Collections.emptyList()); - } - - @Test - public void testMetadata() throws IOException { - Map metadata = new HashMap<>(1); - metadata.put("testKey", "testValue"); - - Schema schema = - new Schema(Collections.singletonList(field("a", false, new Int(8, true), metadata))); - - String json = schema.toJson(); - Schema actual = Schema.fromJSON(json); - - jsonContains( - json, - "\"" + METADATA_KEY + "\" : \"testKey\"", - "\"" + METADATA_VALUE + "\" : \"testValue\""); - - Map actualMetadata = actual.getFields().get(0).getMetadata(); - assertEquals(1, actualMetadata.size()); - assertEquals("testValue", actualMetadata.get("testKey")); - } - - private void jsonContains(String json, String... strings) { - for (String string : strings) { - assertTrue(json.contains(string), json + " contains " + string); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java deleted file mode 100644 index 33685c048e1d5..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.types.pojo; - -import static java.util.Arrays.asList; -import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY; -import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.HashMap; -import java.util.Map; -import org.apache.arrow.vector.types.DateUnit; -import org.apache.arrow.vector.types.FloatingPointPrecision; -import org.apache.arrow.vector.types.IntervalUnit; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.UnionMode; -import org.apache.arrow.vector.types.pojo.ArrowType.Binary; -import org.apache.arrow.vector.types.pojo.ArrowType.Bool; -import org.apache.arrow.vector.types.pojo.ArrowType.Date; -import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; -import org.apache.arrow.vector.types.pojo.ArrowType.Duration; -import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary; -import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; -import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.apache.arrow.vector.types.pojo.ArrowType.Interval; -import org.apache.arrow.vector.types.pojo.ArrowType.List; -import org.apache.arrow.vector.types.pojo.ArrowType.Null; -import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.ArrowType.Time; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; -import org.apache.arrow.vector.types.pojo.ArrowType.Union; -import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.junit.jupiter.api.Test; - -public class TestSchema { - - private static Field field(String name, boolean nullable, ArrowType type, Field... children) { - return new Field(name, new FieldType(nullable, type, null, null), asList(children)); - } - - private static Field field(String name, ArrowType type, Field... children) { - return field(name, true, type, children); - } - - @Test - public void testComplex() throws IOException { - Schema schema = - new Schema( - asList( - field("a", false, new Int(8, true)), - field("b", new Struct(), field("c", new Int(16, true)), field("d", new Utf8())), - field("e", new List(), field(null, new Date(DateUnit.MILLISECOND))), - field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)), - field("g", new Timestamp(TimeUnit.MILLISECOND, "UTC")), - field("h", new Timestamp(TimeUnit.MICROSECOND, null)), - field("i", new Interval(IntervalUnit.DAY_TIME)), - field("j", new ArrowType.Duration(TimeUnit.SECOND)))); - roundTrip(schema); - assertEquals( - "Schema, e: List, " - + "f: FloatingPoint(SINGLE), g: Timestamp(MILLISECOND, UTC), h: Timestamp(MICROSECOND, null), " - + "i: Interval(DAY_TIME), j: Duration(SECOND)>", - schema.toString()); - } - - @Test - public void testAll() throws IOException { - Schema schema = - new Schema( - asList( - field("a", false, new Null()), - field("b", new Struct(), field("ba", new Null())), - field("c", new List(), field("ca", new Null())), - field( - "d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null())), - field("e", new Int(8, true)), - field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)), - field("g", new Utf8()), - field("h", new Binary()), - field("i", new Bool()), - field("j", new Decimal(5, 5, 128)), - field("k", new Date(DateUnit.DAY)), - field("l", new Date(DateUnit.MILLISECOND)), - field("m", new Time(TimeUnit.SECOND, 32)), - field("n", new Time(TimeUnit.MILLISECOND, 32)), - field("o", new Time(TimeUnit.MICROSECOND, 64)), - field("p", new Time(TimeUnit.NANOSECOND, 64)), - field("q", new Timestamp(TimeUnit.MILLISECOND, "UTC")), - field("r", new Timestamp(TimeUnit.MICROSECOND, null)), - field("s", new Interval(IntervalUnit.DAY_TIME)), - field("t", new FixedSizeBinary(100)), - field("u", new Duration(TimeUnit.SECOND)), - field("v", new Duration(TimeUnit.MICROSECOND)))); - roundTrip(schema); - } - - @Test - public void testUnion() throws IOException { - Schema schema = - new Schema( - asList( - field( - "d", - new Union(UnionMode.Sparse, new int[] {1, 2, 3}), - field("da", new Null())))); - roundTrip(schema); - contains(schema, "Sparse"); - } - - @Test - public void testDate() throws IOException { - Schema schema = - new Schema( - asList(field("a", new Date(DateUnit.DAY)), field("b", new Date(DateUnit.MILLISECOND)))); - roundTrip(schema); - assertEquals("Schema", schema.toString()); - } - - @Test - public void testTime() throws IOException { - Schema schema = - new Schema( - asList( - field("a", new Time(TimeUnit.SECOND, 32)), - field("b", new Time(TimeUnit.MILLISECOND, 32)), - field("c", new Time(TimeUnit.MICROSECOND, 64)), - field("d", new Time(TimeUnit.NANOSECOND, 64)))); - roundTrip(schema); - assertEquals( - "Schema", - schema.toString()); - } - - @Test - public void testTS() throws IOException { - Schema schema = - new Schema( - asList( - field("a", new Timestamp(TimeUnit.SECOND, "UTC")), - field("b", new Timestamp(TimeUnit.MILLISECOND, "UTC")), - field("c", new Timestamp(TimeUnit.MICROSECOND, "UTC")), - field("d", new Timestamp(TimeUnit.NANOSECOND, "UTC")), - field("e", new Timestamp(TimeUnit.SECOND, null)), - field("f", new Timestamp(TimeUnit.MILLISECOND, null)), - field("g", new Timestamp(TimeUnit.MICROSECOND, null)), - field("h", new Timestamp(TimeUnit.NANOSECOND, null)))); - roundTrip(schema); - assertEquals( - "Schema", - schema.toString()); - } - - @Test - public void testInterval() throws IOException { - Schema schema = - new Schema( - asList( - field("a", new Interval(IntervalUnit.YEAR_MONTH)), - field("b", new Interval(IntervalUnit.DAY_TIME)))); - roundTrip(schema); - contains(schema, "YEAR_MONTH", "DAY_TIME"); - } - - @Test - public void testRoundTripDurationInterval() throws IOException { - Schema schema = - new Schema( - asList( - field("a", new Duration(TimeUnit.SECOND)), - field("b", new Duration(TimeUnit.MILLISECOND)), - field("c", new Duration(TimeUnit.MICROSECOND)), - field("d", new Duration(TimeUnit.NANOSECOND)))); - roundTrip(schema); - contains(schema, "SECOND", "MILLI", "MICRO", "NANO"); - } - - @Test - public void testFP() throws IOException { - Schema schema = - new Schema( - asList( - field("a", new FloatingPoint(FloatingPointPrecision.HALF)), - field("b", new FloatingPoint(FloatingPointPrecision.SINGLE)), - field("c", new FloatingPoint(FloatingPointPrecision.DOUBLE)))); - roundTrip(schema); - contains(schema, "HALF", "SINGLE", "DOUBLE"); - } - - @Test - public void testMetadata() throws IOException { - Map metadata = new HashMap<>(1); - metadata.put("testKey", "testValue"); - - java.util.List fields = - asList( - field("a", false, new Int(8, true)), - field("b", new Struct(), field("c", new Int(16, true)), field("d", new Utf8())), - field("e", new List(), field(null, new Date(DateUnit.MILLISECOND)))); - Schema schema = new Schema(fields, metadata); - roundTrip(schema); - contains( - schema, - "\"" + METADATA_KEY + "\" : \"testKey\"", - "\"" + METADATA_VALUE + "\" : \"testValue\""); - } - - @Test - public void testMessageSerialization() { - Schema schema = - new Schema( - asList( - field("a", false, new Null()), - field("b", new Struct(), field("ba", new Null())), - field("c", new List(), field("ca", new Null())), - field( - "d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null())), - field("e", new Int(8, true)), - field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)), - field("g", new Utf8()), - field("h", new Binary()), - field("i", new Bool()), - field("j", new Decimal(5, 5, 128)), - field("k", new Date(DateUnit.DAY)), - field("l", new Date(DateUnit.MILLISECOND)), - field("m", new Time(TimeUnit.SECOND, 32)), - field("n", new Time(TimeUnit.MILLISECOND, 32)), - field("o", new Time(TimeUnit.MICROSECOND, 64)), - field("p", new Time(TimeUnit.NANOSECOND, 64)), - field("q", new Timestamp(TimeUnit.MILLISECOND, "UTC")), - field("r", new Timestamp(TimeUnit.MICROSECOND, null)), - field("s", new Interval(IntervalUnit.DAY_TIME)), - field("t", new FixedSizeBinary(100)), - field("u", new Duration(TimeUnit.SECOND)), - field("v", new Duration(TimeUnit.MICROSECOND)))); - roundTripMessage(schema); - } - - private void roundTrip(Schema schema) throws IOException { - String json = schema.toJson(); - Schema actual = Schema.fromJSON(json); - assertEquals(schema.toJson(), actual.toJson()); - assertEquals(schema, actual); - validateFieldsHashcode(schema.getFields(), actual.getFields()); - assertEquals(schema.hashCode(), actual.hashCode()); - } - - private void roundTripMessage(Schema schema) { - byte[] bytes = schema.serializeAsMessage(); - Schema actual = Schema.deserializeMessage(ByteBuffer.wrap(bytes)); - assertEquals(schema.toJson(), actual.toJson()); - assertEquals(schema, actual); - validateFieldsHashcode(schema.getFields(), actual.getFields()); - assertEquals(schema.hashCode(), actual.hashCode()); - } - - private void validateFieldsHashcode( - java.util.List schemaFields, java.util.List actualFields) { - assertEquals(schemaFields.size(), actualFields.size()); - if (schemaFields.size() == 0) { - return; - } - for (int i = 0; i < schemaFields.size(); i++) { - Field schemaField = schemaFields.get(i); - Field actualField = actualFields.get(i); - validateFieldsHashcode(schemaField.getChildren(), actualField.getChildren()); - validateHashCode(schemaField.getType(), actualField.getType()); - validateHashCode(schemaField, actualField); - } - } - - private void validateHashCode(Object o1, Object o2) { - assertEquals(o1, o2); - assertEquals(o1.hashCode(), o2.hashCode(), o1 + " == " + o2); - } - - private void contains(Schema schema, String... s) { - String json = schema.toJson(); - for (String string : s) { - assertTrue(json.contains(string), json + " contains " + string); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java deleted file mode 100644 index 37c38e9a64261..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.math.BigDecimal; -import java.math.BigInteger; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.junit.jupiter.api.Test; - -public class DecimalUtilityTest { - private static final BigInteger[] MAX_BIG_INT = - new BigInteger[] { - BigInteger.valueOf(10).pow(38).subtract(java.math.BigInteger.ONE), - java.math.BigInteger.valueOf(10).pow(76) - }; - private static final BigInteger[] MIN_BIG_INT = - new BigInteger[] { - MAX_BIG_INT[0].multiply(BigInteger.valueOf(-1)), - MAX_BIG_INT[1].multiply(BigInteger.valueOf(-1)) - }; - - @Test - public void testSetLongInDecimalArrowBuf() { - int[] byteLengths = new int[] {16, 32}; - for (int x = 0; x < 2; x++) { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(byteLengths[x]); ) { - int[] intValues = new int[] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0}; - for (int val : intValues) { - buf.clear(); - DecimalUtility.writeLongToArrowBuf((long) val, buf, 0, byteLengths[x]); - BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); - BigDecimal expected = BigDecimal.valueOf(val); - assertEquals(expected, actual); - } - } - } - } - - @Test - public void testSetByteArrayInDecimalArrowBuf() { - int[] byteLengths = new int[] {16, 32}; - for (int x = 0; x < 2; x++) { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(byteLengths[x]); ) { - int[] intValues = new int[] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0}; - for (int val : intValues) { - buf.clear(); - DecimalUtility.writeByteArrayToArrowBuf( - BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]); - BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); - BigDecimal expected = BigDecimal.valueOf(val); - assertEquals(expected, actual); - } - - long[] longValues = new long[] {Long.MIN_VALUE, 0, Long.MAX_VALUE}; - for (long val : longValues) { - buf.clear(); - DecimalUtility.writeByteArrayToArrowBuf( - BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]); - BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); - BigDecimal expected = BigDecimal.valueOf(val); - assertEquals(expected, actual); - } - - BigInteger[] decimals = - new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]}; - for (BigInteger val : decimals) { - buf.clear(); - DecimalUtility.writeByteArrayToArrowBuf(val.toByteArray(), buf, 0, byteLengths[x]); - BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); - BigDecimal expected = new BigDecimal(val); - assertEquals(expected, actual); - } - } - } - } - - @Test - public void testSetBigDecimalInDecimalArrowBuf() { - int[] byteLengths = new int[] {16, 32}; - for (int x = 0; x < 2; x++) { - try (BufferAllocator allocator = new RootAllocator(128); - ArrowBuf buf = allocator.buffer(byteLengths[x]); ) { - int[] intValues = new int[] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0}; - for (int val : intValues) { - buf.clear(); - DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]); - BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); - BigDecimal expected = BigDecimal.valueOf(val); - assertEquals(expected, actual); - } - - long[] longValues = new long[] {Long.MIN_VALUE, 0, Long.MAX_VALUE}; - for (long val : longValues) { - buf.clear(); - DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]); - BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); - BigDecimal expected = BigDecimal.valueOf(val); - assertEquals(expected, actual); - } - - BigInteger[] decimals = - new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]}; - for (BigInteger val : decimals) { - buf.clear(); - DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(val), buf, 0, byteLengths[x]); - BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); - BigDecimal expected = new BigDecimal(val); - assertEquals(expected, actual); - } - } - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java deleted file mode 100644 index 7726c146ae0db..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.Test; - -/** Test cases for {@link DataSizeRoundingUtil}. */ -public class TestDataSizeRoundingUtil { - - @Test - public void testRoundUpTo8MultipleInt() { - assertEquals(0, DataSizeRoundingUtil.roundUpTo8Multiple(0)); - assertEquals(16, DataSizeRoundingUtil.roundUpTo8Multiple(9)); - assertEquals(24, DataSizeRoundingUtil.roundUpTo8Multiple(20)); - assertEquals(128, DataSizeRoundingUtil.roundUpTo8Multiple(128)); - } - - @Test - public void testRoundUpTo8MultipleLong() { - assertEquals(0L, DataSizeRoundingUtil.roundUpTo8Multiple(0L)); - assertEquals(40L, DataSizeRoundingUtil.roundUpTo8Multiple(37L)); - assertEquals(32L, DataSizeRoundingUtil.roundUpTo8Multiple(29L)); - assertEquals(512L, DataSizeRoundingUtil.roundUpTo8Multiple(512L)); - } - - @Test - public void testRoundDownTo8MultipleInt() { - assertEquals(0, DataSizeRoundingUtil.roundDownTo8Multiple(0)); - assertEquals(16, DataSizeRoundingUtil.roundDownTo8Multiple(23)); - assertEquals(24, DataSizeRoundingUtil.roundDownTo8Multiple(27)); - assertEquals(128, DataSizeRoundingUtil.roundDownTo8Multiple(128)); - } - - @Test - public void testRoundDownTo8MultipleLong() { - assertEquals(0L, DataSizeRoundingUtil.roundDownTo8Multiple(0L)); - assertEquals(40L, DataSizeRoundingUtil.roundDownTo8Multiple(45L)); - assertEquals(32L, DataSizeRoundingUtil.roundDownTo8Multiple(39L)); - assertEquals(512L, DataSizeRoundingUtil.roundDownTo8Multiple(512L)); - } - - @Test - public void testDivideBy8CeilInt() { - assertEquals(0, DataSizeRoundingUtil.divideBy8Ceil(0)); - assertEquals(3, DataSizeRoundingUtil.divideBy8Ceil(23)); - assertEquals(5, DataSizeRoundingUtil.divideBy8Ceil(35)); - assertEquals(24, DataSizeRoundingUtil.divideBy8Ceil(192)); - } - - @Test - public void testDivideBy8CeilLong() { - assertEquals(0L, DataSizeRoundingUtil.divideBy8Ceil(0L)); - assertEquals(5L, DataSizeRoundingUtil.divideBy8Ceil(37L)); - assertEquals(10L, DataSizeRoundingUtil.divideBy8Ceil(73L)); - assertEquals(25L, DataSizeRoundingUtil.divideBy8Ceil(200L)); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java deleted file mode 100644 index 657254d10822d..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; - -import java.nio.charset.StandardCharsets; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.memory.util.ArrowBufPointer; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link ElementAddressableVectorIterator}. */ -public class TestElementAddressableVectorIterator { - - private final int VECTOR_LENGTH = 100; - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testIterateIntVector() { - try (IntVector intVector = new IntVector("", allocator)) { - intVector.allocateNew(VECTOR_LENGTH); - intVector.setValueCount(VECTOR_LENGTH); - - // prepare data in sorted order - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i == 0) { - intVector.setNull(i); - } else { - intVector.set(i, i); - } - } - - // iterate - ElementAddressableVectorIterator it = - new ElementAddressableVectorIterator<>(intVector); - int index = 0; - while (it.hasNext()) { - ArrowBufPointer pt; - - if (index % 2 == 0) { - // use populated pointer. - pt = new ArrowBufPointer(); - it.next(pt); - } else { - // use iterator inner pointer - pt = it.next(); - } - if (index == 0) { - assertNull(pt.getBuf()); - } else { - assertEquals(index, pt.getBuf().getInt(pt.getOffset())); - } - index += 1; - } - } - } - - @Test - public void testIterateVarCharVector() { - try (VarCharVector strVector = new VarCharVector("", allocator)) { - strVector.allocateNew(VECTOR_LENGTH * 10, VECTOR_LENGTH); - strVector.setValueCount(VECTOR_LENGTH); - - // prepare data in sorted order - for (int i = 0; i < VECTOR_LENGTH; i++) { - if (i == 0) { - strVector.setNull(i); - } else { - strVector.set(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); - } - } - - // iterate - ElementAddressableVectorIterator it = - new ElementAddressableVectorIterator<>(strVector); - int index = 0; - while (it.hasNext()) { - ArrowBufPointer pt; - - if (index % 2 == 0) { - // use populated pointer. - pt = new ArrowBufPointer(); - it.next(pt); - } else { - // use iterator inner pointer - pt = it.next(); - } - - if (index == 0) { - assertNull(pt.getBuf()); - } else { - String expected = String.valueOf(index); - byte[] actual = new byte[expected.length()]; - assertEquals(expected.length(), pt.getLength()); - - pt.getBuf().getBytes(pt.getOffset(), actual); - assertEquals(expected, new String(actual, StandardCharsets.UTF_8)); - } - index += 1; - } - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java deleted file mode 100644 index f21dc70e6d7a8..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.Collection; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestMapWithOrdinal { - - private MapWithOrdinal map; - - @BeforeEach - public void setUp() { - map = new MapWithOrdinalImpl<>(); - } - - @Test - public void testGetByOrdinal() { - map.put("key0", "val0", true); - assertEquals("val0", map.getByOrdinal(0)); - - map.put("key1", "val1", true); - assertEquals("val1", map.getByOrdinal(1)); - assertEquals("val0", map.getByOrdinal(0)); - } - - @Test - public void testGetByKey() { - map.put("key0", "val0", true); - assertEquals("val0", map.get("key0")); - - map.put("key1", "val1", true); - assertEquals("val1", map.get("key1")); - assertEquals("val0", map.get("key0")); - } - - @Test - public void testInvalidOrdinal() { - map.put("key0", "val0", true); - assertNull(map.getByOrdinal(1)); - - map.removeAll("key0"); - assertNull(map.getByOrdinal(0)); - } - - @Test - public void testInvalidKey() { - MapWithOrdinalImpl map = new MapWithOrdinalImpl<>(); - map.put("key0", "val0", true); - assertNull(map.get("fake_key")); - - map.removeAll("key0"); - assertNull(map.get("key0")); - } - - @Test - public void testValues() { - map.put("key0", "val0", true); - map.put("key1", "val1", true); - - Collection values = map.values(); - assertTrue(values.contains("val0")); - assertTrue(values.contains("val1")); - - map.put("key1", "new_val1", true); - values = map.values(); - assertTrue(values.contains("val0")); - assertTrue(values.contains("new_val1")); - assertFalse(values.contains("val1")); - - map.removeAll("key0"); - assertTrue(values.contains("new_val1")); - assertFalse(values.contains("val0")); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java deleted file mode 100644 index cdaa5f3fa84d8..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.Test; - -public class TestMultiMapWithOrdinal { - - @Test - public void test() { - MultiMapWithOrdinal map = new MultiMapWithOrdinal<>(); - - map.put("x", "1", false); - assertEquals(1, map.size()); - map.remove("x", "1"); - assertTrue(map.isEmpty()); - map.put("x", "1", false); - map.put("x", "2", false); - map.put("y", "0", false); - assertEquals(3, map.size()); - assertEquals(2, map.getAll("x").size()); - assertEquals("1", map.getAll("x").stream().findFirst().get()); - assertEquals("1", map.getByOrdinal(0)); - assertEquals("2", map.getByOrdinal(1)); - assertEquals("0", map.getByOrdinal(2)); - assertTrue(map.remove("x", "1")); - assertFalse(map.remove("x", "1")); - assertEquals("0", map.getByOrdinal(0)); - assertEquals(2, map.size()); - map.put("x", "3", true); - assertEquals(1, map.getAll("x").size()); - assertEquals("3", map.getAll("x").stream().findFirst().get()); - map.put("z", "4", false); - assertEquals(3, map.size()); - map.put("z", "5", false); - map.put("z", "6", false); - assertEquals(5, map.size()); - map.removeAll("z"); - assertEquals(2, map.size()); - assertFalse(map.containsKey("z")); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java deleted file mode 100644 index 50d9504fcc8f6..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.junit.jupiter.api.Assertions.assertSame; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Base64; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseValueVector; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestReusableByteArray { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - // Permit allocating 4 vectors of max size. - allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testSetByteArrayRepeatedly() { - ReusableByteArray byteArray = new ReusableByteArray(); - try (ArrowBuf workingBuf = allocator.buffer(100)) { - final String str = "test"; - workingBuf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); - byteArray.set(workingBuf, 0, str.getBytes(StandardCharsets.UTF_8).length); - assertEquals(str.getBytes(StandardCharsets.UTF_8).length, byteArray.getLength()); - assertArrayEquals( - str.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength())); - assertEquals( - Base64.getEncoder().encodeToString(str.getBytes(StandardCharsets.UTF_8)), - byteArray.toString()); - assertEquals(new ReusableByteArray(str.getBytes(StandardCharsets.UTF_8)), byteArray); - assertEquals( - new ReusableByteArray(str.getBytes(StandardCharsets.UTF_8)).hashCode(), - byteArray.hashCode()); - - // Test a longer string. Should require reallocation. - final String str2 = "test_longer"; - byte[] oldBuffer = byteArray.getBuffer(); - workingBuf.clear(); - workingBuf.setBytes(0, str2.getBytes(StandardCharsets.UTF_8)); - byteArray.set(workingBuf, 0, str2.getBytes(StandardCharsets.UTF_8).length); - assertEquals(str2.getBytes(StandardCharsets.UTF_8).length, byteArray.getLength()); - assertArrayEquals( - str2.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength())); - assertEquals( - Base64.getEncoder().encodeToString(str2.getBytes(StandardCharsets.UTF_8)), - byteArray.toString()); - assertEquals(new ReusableByteArray(str2.getBytes(StandardCharsets.UTF_8)), byteArray); - assertEquals( - new ReusableByteArray(str2.getBytes(StandardCharsets.UTF_8)).hashCode(), - byteArray.hashCode()); - - // Verify reallocation needed. - assertNotSame(oldBuffer, byteArray.getBuffer()); - assertTrue(byteArray.getBuffer().length > oldBuffer.length); - - // Test writing a shorter string. Should not require reallocation. - final String str3 = "short"; - oldBuffer = byteArray.getBuffer(); - workingBuf.clear(); - workingBuf.setBytes(0, str3.getBytes(StandardCharsets.UTF_8)); - byteArray.set(workingBuf, 0, str3.getBytes(StandardCharsets.UTF_8).length); - assertEquals(str3.getBytes(StandardCharsets.UTF_8).length, byteArray.getLength()); - assertArrayEquals( - str3.getBytes(StandardCharsets.UTF_8), - Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength())); - assertEquals( - Base64.getEncoder().encodeToString(str3.getBytes(StandardCharsets.UTF_8)), - byteArray.toString()); - assertEquals(new ReusableByteArray(str3.getBytes(StandardCharsets.UTF_8)), byteArray); - assertEquals( - new ReusableByteArray(str3.getBytes(StandardCharsets.UTF_8)).hashCode(), - byteArray.hashCode()); - - // Verify reallocation was not needed. - assertSame(oldBuffer, byteArray.getBuffer()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java deleted file mode 100644 index 5d3de5d73a26f..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static java.util.Arrays.asList; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.io.IOException; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.jupiter.api.Test; - -public class TestSchemaUtil { - - private static Field field(String name, boolean nullable, ArrowType type, Field... children) { - return new Field(name, new FieldType(nullable, type, null, null), asList(children)); - } - - @Test - public void testSerializationAndDeserialization() throws IOException { - Schema schema = - new Schema( - asList( - field("a", false, new ArrowType.Null()), - field("b", true, new ArrowType.Utf8()), - field("c", true, new ArrowType.Binary()))); - - byte[] serialized = SchemaUtility.serialize(schema); - Schema deserialized = SchemaUtility.deserialize(serialized, new RootAllocator(Long.MAX_VALUE)); - assertEquals(schema, deserialized); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java deleted file mode 100644 index a7a4035550c21..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.apache.arrow.vector.util.Validator.equalEnough; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.junit.jupiter.api.Test; - -public class TestValidator { - - @Test - public void testFloatComp() { - assertTrue(equalEnough(912.4140000000002F, 912.414F)); - assertTrue(equalEnough(912.4140000000002D, 912.414D)); - assertTrue(equalEnough(912.414F, 912.4140000000002F)); - assertTrue(equalEnough(912.414D, 912.4140000000002D)); - assertFalse(equalEnough(912.414D, 912.4140001D)); - assertFalse(equalEnough(null, 912.414D)); - assertTrue(equalEnough((Float) null, null)); - assertTrue(equalEnough((Double) null, null)); - assertFalse(equalEnough(912.414D, null)); - assertFalse(equalEnough(Double.MAX_VALUE, Double.MIN_VALUE)); - assertFalse(equalEnough(Double.MIN_VALUE, Double.MAX_VALUE)); - assertTrue(equalEnough(Double.MAX_VALUE, Double.MAX_VALUE)); - assertTrue(equalEnough(Double.MIN_VALUE, Double.MIN_VALUE)); - assertTrue(equalEnough(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY)); - assertFalse(equalEnough(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY)); - assertTrue(equalEnough(Double.NaN, Double.NaN)); - assertFalse(equalEnough(1.0, Double.NaN)); - assertFalse(equalEnough(Float.MAX_VALUE, Float.MIN_VALUE)); - assertFalse(equalEnough(Float.MIN_VALUE, Float.MAX_VALUE)); - assertTrue(equalEnough(Float.MAX_VALUE, Float.MAX_VALUE)); - assertTrue(equalEnough(Float.MIN_VALUE, Float.MIN_VALUE)); - assertTrue(equalEnough(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY)); - assertFalse(equalEnough(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY)); - assertTrue(equalEnough(Float.NaN, Float.NaN)); - assertFalse(equalEnough(1.0F, Float.NaN)); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java deleted file mode 100644 index 19eafd1b20197..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ /dev/null @@ -1,932 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.compare.Range; -import org.apache.arrow.vector.compare.RangeEqualsVisitor; -import org.apache.arrow.vector.compare.TypeEqualsVisitor; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.holders.NullableBigIntHolder; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link VectorAppender}. */ -public class TestVectorAppender { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - // Permit allocating 4 vectors of max size. - allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testAppendFixedWidthVector() { - final int length1 = 10; - final int length2 = 5; - try (IntVector target = new IntVector("", allocator); - IntVector delta = new IntVector("", allocator)) { - - target.allocateNew(length1); - delta.allocateNew(length2); - - ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, null, 8, 9); - ValueVectorDataPopulator.setVector(delta, null, 11, 12, 13, 14); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(length1 + length2, target.getValueCount()); - - try (IntVector expected = new IntVector("expected", allocator)) { - expected.allocateNew(); - ValueVectorDataPopulator.setVector( - expected, 0, 1, 2, 3, 4, 5, 6, null, 8, 9, null, 11, 12, 13, 14); - assertVectorsEqual(expected, target); - } - } - } - - @Test - public void testAppendBitVector() { - final int length1 = 10; - final int length2 = 5; - try (BitVector target = new BitVector("", allocator); - BitVector delta = new BitVector("", allocator)) { - - target.allocateNew(length1); - delta.allocateNew(length2); - - ValueVectorDataPopulator.setVector(target, 0, 1, 0, 1, 0, 1, 0, null, 0, 1); - ValueVectorDataPopulator.setVector(delta, null, 1, 1, 0, 0); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(length1 + length2, target.getValueCount()); - - try (BitVector expected = new BitVector("expected", allocator)) { - expected.allocateNew(); - ValueVectorDataPopulator.setVector( - expected, 0, 1, 0, 1, 0, 1, 0, null, 0, 1, null, 1, 1, 0, 0); - assertVectorsEqual(expected, target); - } - } - } - - @Test - public void testAppendEmptyFixedWidthVector() { - try (IntVector target = new IntVector("", allocator); - IntVector delta = new IntVector("", allocator)) { - - ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, null, 8, 9); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(10, target.getValueCount()); - - try (IntVector expected = new IntVector("expected", allocator)) { - ValueVectorDataPopulator.setVector(expected, 0, 1, 2, 3, 4, 5, 6, null, 8, 9); - assertVectorsEqual(expected, target); - } - } - } - - @Test - public void testAppendVariableWidthVector() { - final int length1 = 10; - final int length2 = 5; - try (VarCharVector target = new VarCharVector("", allocator); - VarCharVector delta = new VarCharVector("", allocator)) { - - target.allocateNew(5, length1); - delta.allocateNew(5, length2); - - ValueVectorDataPopulator.setVector( - target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9"); - ValueVectorDataPopulator.setVector(delta, "a10", "a11", "a12", "a13", null); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - try (VarCharVector expected = new VarCharVector("expected", allocator)) { - expected.allocateNew(); - ValueVectorDataPopulator.setVector( - expected, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9", "a10", "a11", - "a12", "a13", null); - assertVectorsEqual(expected, target); - } - } - } - - @Test - public void testAppendEmptyVariableWidthVector() { - try (VarCharVector target = new VarCharVector("", allocator); - VarCharVector delta = new VarCharVector("", allocator)) { - - ValueVectorDataPopulator.setVector( - target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9"); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - try (VarCharVector expected = new VarCharVector("expected", allocator)) { - ValueVectorDataPopulator.setVector( - expected, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9"); - assertVectorsEqual(expected, target); - } - } - } - - @Test - public void testAppendLargeAndSmallVariableVectorsWithinLimit() { - int sixteenthOfMaxAllocation = Math.toIntExact(BaseValueVector.MAX_ALLOCATION_SIZE / 16); - try (VarCharVector target = makeVarCharVec(1, sixteenthOfMaxAllocation); - VarCharVector delta = makeVarCharVec(sixteenthOfMaxAllocation, 1)) { - new VectorAppender(delta).visit(target, null); - new VectorAppender(target).visit(delta, null); - } - } - - private VarCharVector makeVarCharVec(int numElements, int bytesPerElement) { - VarCharVector v = new VarCharVector("text", allocator); - v.allocateNew((long) numElements * bytesPerElement, numElements); - for (int i = 0; i < numElements; i++) { - String s = String.join("", Collections.nCopies(bytesPerElement, "a")); - v.setSafe(i, s.getBytes(StandardCharsets.US_ASCII)); - } - v.setValueCount(numElements); - return v; - } - - @Test - public void testAppendLargeVariableWidthVector() { - final int length1 = 5; - final int length2 = 10; - try (LargeVarCharVector target = new LargeVarCharVector("", allocator); - LargeVarCharVector delta = new LargeVarCharVector("", allocator)) { - - target.allocateNew(5, length1); - delta.allocateNew(5, length2); - - ValueVectorDataPopulator.setVector(target, "a0", null, "a2", "a3", null); - ValueVectorDataPopulator.setVector( - delta, "a5", "a6", "a7", null, null, "a10", "a11", "a12", "a13", null); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - try (LargeVarCharVector expected = new LargeVarCharVector("expected", allocator)) { - expected.allocateNew(); - ValueVectorDataPopulator.setVector( - expected, "a0", null, "a2", "a3", null, "a5", "a6", "a7", null, null, "a10", "a11", - "a12", "a13", null); - assertVectorsEqual(expected, target); - } - } - } - - @Test - public void testAppendEmptyLargeVariableWidthVector() { - try (LargeVarCharVector target = new LargeVarCharVector("", allocator); - LargeVarCharVector delta = new LargeVarCharVector("", allocator)) { - - ValueVectorDataPopulator.setVector(target, "a0", null, "a2", "a3", null); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - try (LargeVarCharVector expected = new LargeVarCharVector("expected", allocator)) { - ValueVectorDataPopulator.setVector(expected, "a0", null, "a2", "a3", null); - assertVectorsEqual(expected, target); - } - } - } - - @Test - public void testAppendListVector() { - final int length1 = 5; - final int length2 = 2; - try (ListVector target = ListVector.empty("target", allocator); - ListVector delta = ListVector.empty("delta", allocator)) { - - target.allocateNew(); - ValueVectorDataPopulator.setVector( - target, - Arrays.asList(0, 1), - Arrays.asList(2, 3), - null, - Arrays.asList(6, 7), - Arrays.asList(8, 9)); - assertEquals(length1, target.getValueCount()); - - delta.allocateNew(); - ValueVectorDataPopulator.setVector( - delta, Arrays.asList(10, 11, 12, 13, 14), Arrays.asList(15, 16, 17, 18, 19)); - assertEquals(length2, delta.getValueCount()); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(7, target.getValueCount()); - - List expected = Arrays.asList(0, 1); - assertEquals(expected, target.getObject(0)); - - expected = Arrays.asList(2, 3); - assertEquals(expected, target.getObject(1)); - - assertTrue(target.isNull(2)); - - expected = Arrays.asList(6, 7); - assertEquals(expected, target.getObject(3)); - - expected = Arrays.asList(8, 9); - assertEquals(expected, target.getObject(4)); - - expected = Arrays.asList(10, 11, 12, 13, 14); - assertEquals(expected, target.getObject(5)); - - expected = Arrays.asList(15, 16, 17, 18, 19); - assertEquals(expected, target.getObject(6)); - } - } - - @Test - public void testAppendEmptyListVector() { - try (ListVector target = ListVector.empty("target", allocator); - ListVector delta = ListVector.empty("delta", allocator)) { - // populate target with data - ValueVectorDataPopulator.setVector( - target, Arrays.asList(0, 1), Arrays.asList(2, 3), null, Arrays.asList(6, 7)); - assertEquals(4, target.getValueCount()); - - // leave delta vector empty and unallocated - delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - // verify delta vector has original data - assertEquals(4, target.getValueCount()); - - List expected = Arrays.asList(0, 1); - assertEquals(expected, target.getObject(0)); - - expected = Arrays.asList(2, 3); - assertEquals(expected, target.getObject(1)); - - assertTrue(target.isNull(2)); - - expected = Arrays.asList(6, 7); - assertEquals(expected, target.getObject(3)); - } - } - - @Test - public void testAppendFixedSizeListVector() { - try (FixedSizeListVector target = FixedSizeListVector.empty("target", 5, allocator); - FixedSizeListVector delta = FixedSizeListVector.empty("delta", 5, allocator)) { - - target.allocateNew(); - ValueVectorDataPopulator.setVector(target, Arrays.asList(0, 1, 2, 3, 4), null); - assertEquals(2, target.getValueCount()); - - delta.allocateNew(); - ValueVectorDataPopulator.setVector( - delta, Arrays.asList(10, 11, 12, 13, 14), Arrays.asList(15, 16, 17, 18, 19)); - assertEquals(2, delta.getValueCount()); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(4, target.getValueCount()); - - assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0)); - assertTrue(target.isNull(1)); - assertEquals(Arrays.asList(10, 11, 12, 13, 14), target.getObject(2)); - assertEquals(Arrays.asList(15, 16, 17, 18, 19), target.getObject(3)); - } - } - - @Test - public void testAppendEmptyFixedSizeListVector() { - try (FixedSizeListVector target = FixedSizeListVector.empty("target", 5, allocator); - FixedSizeListVector delta = FixedSizeListVector.empty("delta", 5, allocator)) { - - ValueVectorDataPopulator.setVector(target, Arrays.asList(0, 1, 2, 3, 4), null); - assertEquals(2, target.getValueCount()); - - // leave delta vector empty and unallocated - delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(2, target.getValueCount()); - - assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0)); - assertTrue(target.isNull(1)); - } - } - - @Test - public void testAppendEmptyLargeListVector() { - try (LargeListVector target = LargeListVector.empty("target", allocator); - LargeListVector delta = LargeListVector.empty("delta", allocator)) { - - ValueVectorDataPopulator.setVector(target, Arrays.asList(0, 1, 2, 3, 4), null); - assertEquals(2, target.getValueCount()); - - // leave delta vector empty and unallocated - delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(2, target.getValueCount()); - - assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0)); - assertTrue(target.isNull(1)); - } - } - - @Test - public void testAppendStructVector() { - final int length1 = 10; - final int length2 = 5; - try (final StructVector target = StructVector.empty("target", allocator); - final StructVector delta = StructVector.empty("delta", allocator)) { - - IntVector targetChild1 = - target.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - VarCharVector targetChild2 = - target.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - targetChild1.allocateNew(); - targetChild2.allocateNew(); - ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9); - ValueVectorDataPopulator.setVector( - targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9"); - target.setValueCount(length1); - - IntVector deltaChild1 = - delta.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - VarCharVector deltaChild2 = - delta.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - deltaChild1.allocateNew(); - deltaChild2.allocateNew(); - ValueVectorDataPopulator.setVector(deltaChild1, 10, 11, 12, null, 14); - ValueVectorDataPopulator.setVector(deltaChild2, "a10", "a11", "a12", "a13", "a14"); - delta.setValueCount(length2); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(length1 + length2, target.getValueCount()); - - try (IntVector expected1 = new IntVector("expected1", allocator); - VarCharVector expected2 = new VarCharVector("expected2", allocator)) { - expected1.allocateNew(); - expected2.allocateNew(); - - ValueVectorDataPopulator.setVector( - expected1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9, 10, 11, 12, null, 14); - ValueVectorDataPopulator.setVector( - expected2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9", "a10", "a11", - "a12", "a13", "a14"); - - assertVectorsEqual(expected1, target.getChild("f0")); - assertVectorsEqual(expected2, target.getChild("f1")); - } - } - } - - @Test - public void testAppendEmptyStructVector() { - try (final StructVector target = StructVector.empty("target", allocator); - final StructVector delta = StructVector.empty("delta", allocator)) { - - IntVector targetChild1 = - target.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - VarCharVector targetChild2 = - target.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9); - ValueVectorDataPopulator.setVector( - targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9"); - target.setValueCount(10); - - // leave delta vector fields empty and unallocated - delta.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - delta.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(10, target.getValueCount()); - - try (IntVector expected1 = new IntVector("expected1", allocator); - VarCharVector expected2 = new VarCharVector("expected2", allocator)) { - ValueVectorDataPopulator.setVector(expected1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9); - ValueVectorDataPopulator.setVector( - expected2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9"); - - assertVectorsEqual(expected1, target.getChild("f0")); - assertVectorsEqual(expected2, target.getChild("f1")); - } - } - } - - @Test - public void testAppendUnionVector() { - final int length1 = 10; - final int length2 = 5; - - try (final UnionVector target = UnionVector.empty("target", allocator); - final UnionVector delta = UnionVector.empty("delta", allocator)) { - - // alternating ints and big ints - target.setType(0, Types.MinorType.INT); - target.setType(1, Types.MinorType.BIGINT); - target.setType(2, Types.MinorType.INT); - target.setType(3, Types.MinorType.BIGINT); - target.setType(4, Types.MinorType.INT); - target.setType(5, Types.MinorType.BIGINT); - target.setType(6, Types.MinorType.INT); - target.setType(7, Types.MinorType.BIGINT); - target.setType(8, Types.MinorType.INT); - target.setType(9, Types.MinorType.BIGINT); - target.setType(10, Types.MinorType.INT); - target.setType(11, Types.MinorType.BIGINT); - target.setType(12, Types.MinorType.INT); - target.setType(13, Types.MinorType.BIGINT); - target.setType(14, Types.MinorType.INT); - target.setType(15, Types.MinorType.BIGINT); - target.setType(16, Types.MinorType.INT); - target.setType(17, Types.MinorType.BIGINT); - target.setType(18, Types.MinorType.INT); - target.setType(19, Types.MinorType.BIGINT); - - IntVector targetIntVec = target.getIntVector(); - targetIntVec.allocateNew(); - ValueVectorDataPopulator.setVector( - targetIntVec, - 0, - null, - 1, - null, - 2, - null, - 3, - null, - 4, - null, - 5, - null, - 6, - null, - 7, - null, - 8, - null, - 9, - null); - assertEquals(length1 * 2, targetIntVec.getValueCount()); - - BigIntVector targetBigIntVec = target.getBigIntVector(); - targetBigIntVec.allocateNew(); - ValueVectorDataPopulator.setVector( - targetBigIntVec, - null, - 0L, - null, - 1L, - null, - 2L, - null, - 3L, - null, - 4L, - null, - 5L, - null, - 6L, - null, - 7L, - null, - 8L, - null, - 9L); - assertEquals(length1 * 2, targetBigIntVec.getValueCount()); - - target.setValueCount(length1 * 2); - - // populate the delta vector - delta.setType(0, Types.MinorType.FLOAT4); - delta.setType(1, Types.MinorType.FLOAT4); - delta.setType(2, Types.MinorType.FLOAT4); - delta.setType(3, Types.MinorType.FLOAT4); - delta.setType(4, Types.MinorType.FLOAT4); - - Float4Vector deltaFloatVector = delta.getFloat4Vector(); - deltaFloatVector.allocateNew(); - ValueVectorDataPopulator.setVector(deltaFloatVector, 10f, 11f, 12f, 13f, 14f); - assertEquals(length2, deltaFloatVector.getValueCount()); - delta.setValueCount(length2); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(length1 * 2 + length2, target.getValueCount()); - - for (int i = 0; i < length1; i++) { - Object intObj = target.getObject(i * 2); - assertTrue(intObj instanceof Integer); - assertEquals(i, ((Integer) intObj).intValue()); - - Object longObj = target.getObject(i * 2 + 1); - assertTrue(longObj instanceof Long); - assertEquals(i, ((Long) longObj).longValue()); - } - - for (int i = 0; i < length2; i++) { - Object floatObj = target.getObject(length1 * 2 + i); - assertTrue(floatObj instanceof Float); - assertEquals(i + length1, ((Float) floatObj).intValue()); - } - } - } - - @Test - public void testAppendEmptyUnionVector() { - final int length1 = 10; - - try (final UnionVector target = UnionVector.empty("target", allocator); - final UnionVector delta = UnionVector.empty("delta", allocator)) { - - // alternating ints and big ints - target.setType(0, Types.MinorType.INT); - target.setType(1, Types.MinorType.BIGINT); - target.setType(2, Types.MinorType.INT); - target.setType(3, Types.MinorType.BIGINT); - target.setType(4, Types.MinorType.INT); - target.setType(5, Types.MinorType.BIGINT); - target.setType(6, Types.MinorType.INT); - target.setType(7, Types.MinorType.BIGINT); - target.setType(8, Types.MinorType.INT); - target.setType(9, Types.MinorType.BIGINT); - target.setType(10, Types.MinorType.INT); - target.setType(11, Types.MinorType.BIGINT); - target.setType(12, Types.MinorType.INT); - target.setType(13, Types.MinorType.BIGINT); - target.setType(14, Types.MinorType.INT); - target.setType(15, Types.MinorType.BIGINT); - target.setType(16, Types.MinorType.INT); - target.setType(17, Types.MinorType.BIGINT); - target.setType(18, Types.MinorType.INT); - target.setType(19, Types.MinorType.BIGINT); - - IntVector targetIntVec = target.getIntVector(); - ValueVectorDataPopulator.setVector( - targetIntVec, - 0, - null, - 1, - null, - 2, - null, - 3, - null, - 4, - null, - 5, - null, - 6, - null, - 7, - null, - 8, - null, - 9, - null); - assertEquals(length1 * 2, targetIntVec.getValueCount()); - - BigIntVector targetBigIntVec = target.getBigIntVector(); - ValueVectorDataPopulator.setVector( - targetBigIntVec, - null, - 0L, - null, - 1L, - null, - 2L, - null, - 3L, - null, - 4L, - null, - 5L, - null, - 6L, - null, - 7L, - null, - 8L, - null, - 9L); - assertEquals(length1 * 2, targetBigIntVec.getValueCount()); - - target.setValueCount(length1 * 2); - - // initialize the delta vector but leave it empty and unallocated - delta.setType(0, Types.MinorType.FLOAT4); - delta.setType(1, Types.MinorType.FLOAT4); - delta.setType(2, Types.MinorType.FLOAT4); - delta.setType(3, Types.MinorType.FLOAT4); - delta.setType(4, Types.MinorType.FLOAT4); - - VectorAppender appender = new VectorAppender(target); - delta.accept(appender, null); - - assertEquals(length1 * 2, target.getValueCount()); - - for (int i = 0; i < length1; i++) { - Object intObj = target.getObject(i * 2); - assertTrue(intObj instanceof Integer); - assertEquals(i, ((Integer) intObj).intValue()); - - Object longObj = target.getObject(i * 2 + 1); - assertTrue(longObj instanceof Long); - assertEquals(i, ((Long) longObj).longValue()); - } - } - } - - private DenseUnionVector getTargetVector() { - // create a vector, and populate it with values {1, 2, null, 10L} - - final NullableIntHolder intHolder = new NullableIntHolder(); - intHolder.isSet = 1; - final NullableBigIntHolder longHolder = new NullableBigIntHolder(); - longHolder.isSet = 1; - final NullableFloat4Holder floatHolder = new NullableFloat4Holder(); - floatHolder.isSet = 1; - DenseUnionVector targetVector = new DenseUnionVector("target vector", allocator, null, null); - - targetVector.allocateNew(); - - while (targetVector.getValueCapacity() < 4) { - targetVector.reAlloc(); - } - - byte intTypeId = - targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType())); - targetVector.setTypeId(0, intTypeId); - intHolder.value = 1; - targetVector.setSafe(0, intHolder); - targetVector.setTypeId(1, intTypeId); - intHolder.value = 2; - targetVector.setSafe(1, intHolder); - byte longTypeId = - targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType())); - targetVector.setTypeId(3, longTypeId); - longHolder.value = 10L; - targetVector.setSafe(3, longHolder); - targetVector.setValueCount(4); - - assertVectorValuesEqual(targetVector, new Object[] {1, 2, null, 10L}); - return targetVector; - } - - private DenseUnionVector getDeltaVector() { - // create a vector, and populate it with values {7, null, 8L, 9.0f} - - final NullableIntHolder intHolder = new NullableIntHolder(); - intHolder.isSet = 1; - final NullableBigIntHolder longHolder = new NullableBigIntHolder(); - longHolder.isSet = 1; - final NullableFloat4Holder floatHolder = new NullableFloat4Holder(); - floatHolder.isSet = 1; - - DenseUnionVector deltaVector = new DenseUnionVector("target vector", allocator, null, null); - - while (deltaVector.getValueCapacity() < 4) { - deltaVector.reAlloc(); - } - byte intTypeId = - deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType())); - deltaVector.setTypeId(0, intTypeId); - intHolder.value = 7; - deltaVector.setSafe(0, intHolder); - byte longTypeId = - deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType())); - deltaVector.setTypeId(2, longTypeId); - longHolder.value = 8L; - deltaVector.setSafe(2, longHolder); - byte floatTypeId = - deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType())); - deltaVector.setTypeId(3, floatTypeId); - floatHolder.value = 9.0f; - deltaVector.setSafe(3, floatHolder); - - deltaVector.setValueCount(4); - - assertVectorValuesEqual(deltaVector, new Object[] {7, null, 8L, 9.0f}); - return deltaVector; - } - - @Test - public void testAppendDenseUnionVector() { - try (DenseUnionVector targetVector = getTargetVector(); - DenseUnionVector deltaVector = getDeltaVector()) { - - // append - VectorAppender appender = new VectorAppender(targetVector); - deltaVector.accept(appender, null); - assertVectorValuesEqual(targetVector, new Object[] {1, 2, null, 10L, 7, null, 8L, 9.0f}); - } - - // test reverse append - try (DenseUnionVector targetVector = getTargetVector(); - DenseUnionVector deltaVector = getDeltaVector()) { - - // append - VectorAppender appender = new VectorAppender(deltaVector); - targetVector.accept(appender, null); - assertVectorValuesEqual(deltaVector, new Object[] {7, null, 8L, 9.0f, 1, 2, null, 10L}); - } - } - - private DenseUnionVector getEmptyDeltaVector() { - // create a vector, but leave it empty and uninitialized - DenseUnionVector deltaVector = new DenseUnionVector("target vector", allocator, null, null); - - byte intTypeId = - deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType())); - deltaVector.setTypeId(0, intTypeId); - - byte longTypeId = - deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType())); - deltaVector.setTypeId(2, longTypeId); - - byte floatTypeId = - deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType())); - deltaVector.setTypeId(3, floatTypeId); - - return deltaVector; - } - - @Test - public void testAppendEmptyDenseUnionVector() { - try (DenseUnionVector targetVector = getTargetVector(); - DenseUnionVector deltaVector = getEmptyDeltaVector()) { - - // append - VectorAppender appender = new VectorAppender(targetVector); - deltaVector.accept(appender, null); - assertVectorValuesEqual(targetVector, new Object[] {1, 2, null, 10L}); - } - } - - /** Test appending dense union vectors where the child vectors do not match. */ - @Test - public void testAppendDenseUnionVectorMismatch() { - final NullableIntHolder intHolder = new NullableIntHolder(); - intHolder.isSet = 1; - - final NullableBigIntHolder longHolder = new NullableBigIntHolder(); - longHolder.isSet = 1; - - final NullableFloat4Holder floatHolder = new NullableFloat4Holder(); - floatHolder.isSet = 1; - - try (DenseUnionVector targetVector = - new DenseUnionVector("target vector", allocator, null, null); - DenseUnionVector deltaVector = - new DenseUnionVector("target vector", allocator, null, null)) { - targetVector.allocateNew(); - deltaVector.allocateNew(); - - // populate the target vector with values {1, 2L} - while (targetVector.getValueCapacity() < 2) { - targetVector.reAlloc(); - } - byte intTypeId = - targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType())); - targetVector.setTypeId(0, intTypeId); - intHolder.value = 1; - targetVector.setSafe(0, intHolder); - byte longTypeId = - targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType())); - targetVector.setTypeId(1, longTypeId); - longHolder.value = 2L; - targetVector.setSafe(1, longHolder); - targetVector.setValueCount(2); - - assertVectorValuesEqual(targetVector, new Object[] {1, 2L}); - - // populate the delta vector with values {3, 5.0f} - while (deltaVector.getValueCapacity() < 2) { - deltaVector.reAlloc(); - } - intTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType())); - deltaVector.setTypeId(0, intTypeId); - intHolder.value = 3; - deltaVector.setSafe(0, intHolder); - byte floatTypeId = - deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType())); - deltaVector.setTypeId(1, floatTypeId); - floatHolder.value = 5.0f; - deltaVector.setSafe(1, floatHolder); - deltaVector.setValueCount(2); - - assertVectorValuesEqual(deltaVector, new Object[] {3, 5.0f}); - - // append - VectorAppender appender = new VectorAppender(targetVector); - assertThrows(IllegalArgumentException.class, () -> deltaVector.accept(appender, null)); - } - } - - @Test - public void testAppendVectorNegative() { - final int vectorLength = 10; - try (IntVector target = new IntVector("", allocator); - VarCharVector delta = new VarCharVector("", allocator)) { - - target.allocateNew(vectorLength); - delta.allocateNew(vectorLength); - - VectorAppender appender = new VectorAppender(target); - - assertThrows(IllegalArgumentException.class, () -> delta.accept(appender, null)); - } - } - - private void assertVectorValuesEqual(ValueVector vector, Object[] values) { - assertEquals(vector.getValueCount(), values.length); - for (int i = 0; i < values.length; i++) { - assertEquals(vector.getObject(i), values[i]); - } - } - - public static void assertVectorsEqual(ValueVector vector1, ValueVector vector2) { - assertEquals(vector1.getValueCount(), vector2.getValueCount()); - - TypeEqualsVisitor typeEqualsVisitor = new TypeEqualsVisitor(vector1, false, false); - RangeEqualsVisitor equalsVisitor = - new RangeEqualsVisitor(vector1, vector2, (v1, v2) -> typeEqualsVisitor.equals(vector2)); - assertTrue(equalsVisitor.rangeEquals(new Range(0, 0, vector1.getValueCount()))); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java deleted file mode 100644 index d5355bd6cb0cc..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link VectorBatchAppender}. */ -public class TestVectorBatchAppender { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testBatchAppendIntVector() { - final int length1 = 10; - final int length2 = 5; - final int length3 = 7; - try (IntVector target = new IntVector("", allocator); - IntVector delta1 = new IntVector("", allocator); - IntVector delta2 = new IntVector("", allocator)) { - - target.allocateNew(length1); - delta1.allocateNew(length2); - delta2.allocateNew(length3); - - ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - ValueVectorDataPopulator.setVector(delta1, 10, 11, 12, 13, 14); - ValueVectorDataPopulator.setVector(delta2, 15, 16, 17, 18, 19, 20, 21); - - VectorBatchAppender.batchAppend(target, delta1, delta2); - - assertEquals(length1 + length2 + length3, target.getValueCount()); - for (int i = 0; i < target.getValueCount(); i++) { - assertEquals(i, target.get(i)); - } - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java deleted file mode 100644 index cfc70c2227a41..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.util; - -import static org.apache.arrow.vector.util.TestVectorAppender.assertVectorsEqual; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link VectorSchemaRootAppender}. */ -public class TestVectorSchemaRootAppender { - - private BufferAllocator allocator; - - @BeforeEach - public void prepare() { - allocator = new RootAllocator(1024 * 1024); - } - - @AfterEach - public void shutdown() { - allocator.close(); - } - - @Test - public void testVectorSchemaRootAppend() { - final int length1 = 5; - final int length2 = 3; - final int length3 = 2; - - try (IntVector targetChild1 = new IntVector("t1", allocator); - VarCharVector targetChild2 = new VarCharVector("t2", allocator); - BigIntVector targetChild3 = new BigIntVector("t3", allocator); - IntVector deltaChildOne1 = new IntVector("do1", allocator); - VarCharVector deltaChildOne2 = new VarCharVector("do2", allocator); - BigIntVector deltaChildOne3 = new BigIntVector("do3", allocator); - IntVector deltaChildTwo1 = new IntVector("dt1", allocator); - VarCharVector deltaChildTwo2 = new VarCharVector("dt2", allocator); - BigIntVector deltaChildTwo3 = new BigIntVector("dt3", allocator)) { - - ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4); - ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four"); - ValueVectorDataPopulator.setVector(targetChild3, 0L, 10L, null, 30L, 40L); - VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2, targetChild3); - root1.setRowCount(length1); - - ValueVectorDataPopulator.setVector(deltaChildOne1, 5, 6, 7); - ValueVectorDataPopulator.setVector(deltaChildOne2, "five", "six", "seven"); - ValueVectorDataPopulator.setVector(deltaChildOne3, 50L, 60L, 70L); - VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChildOne1, deltaChildOne2, deltaChildOne3); - root2.setRowCount(length2); - - ValueVectorDataPopulator.setVector(deltaChildTwo1, null, 9); - ValueVectorDataPopulator.setVector(deltaChildTwo2, null, "nine"); - ValueVectorDataPopulator.setVector(deltaChildTwo3, null, 90L); - VectorSchemaRoot root3 = VectorSchemaRoot.of(deltaChildTwo1, deltaChildTwo2, deltaChildTwo3); - root3.setRowCount(length3); - - VectorSchemaRootAppender.append(root1, root2, root3); - assertEquals(length1 + length2 + length3, root1.getRowCount()); - assertEquals(3, root1.getFieldVectors().size()); - - try (IntVector expected1 = new IntVector("", allocator); - VarCharVector expected2 = new VarCharVector("", allocator); - BigIntVector expected3 = new BigIntVector("", allocator)) { - - ValueVectorDataPopulator.setVector(expected1, 0, 1, null, 3, 4, 5, 6, 7, null, 9); - ValueVectorDataPopulator.setVector( - expected2, "zero", "one", null, "three", "four", "five", "six", "seven", null, "nine"); - ValueVectorDataPopulator.setVector( - expected3, 0L, 10L, null, 30L, 40L, 50L, 60L, 70L, null, 90L); - - assertVectorsEqual(expected1, root1.getVector(0)); - assertVectorsEqual(expected2, root1.getVector(1)); - assertVectorsEqual(expected3, root1.getVector(2)); - } - } - } - - @Test - public void testRootWithDifferentChildCounts() { - try (IntVector targetChild1 = new IntVector("t1", allocator); - VarCharVector targetChild2 = new VarCharVector("t2", allocator); - BigIntVector targetChild3 = new BigIntVector("t3", allocator); - IntVector deltaChild1 = new IntVector("d1", allocator); - VarCharVector deltaChild2 = new VarCharVector("d2", allocator)) { - - ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4); - ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four"); - ValueVectorDataPopulator.setVector(targetChild3, 0L, 10L, null, 30L, 40L); - VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2, targetChild3); - root1.setRowCount(5); - - ValueVectorDataPopulator.setVector(deltaChild1, 5, 6, 7); - ValueVectorDataPopulator.setVector(deltaChild2, "five", "six", "seven"); - VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChild1, deltaChild2); - root2.setRowCount(3); - - IllegalArgumentException exp = - assertThrows( - IllegalArgumentException.class, () -> VectorSchemaRootAppender.append(root1, root2)); - - assertEquals( - "Vector schema roots have different numbers of child vectors.", exp.getMessage()); - } - } - - @Test - public void testRootWithDifferentChildTypes() { - try (IntVector targetChild1 = new IntVector("t1", allocator); - VarCharVector targetChild2 = new VarCharVector("t2", allocator); - IntVector deltaChild1 = new IntVector("d1", allocator); - VarCharVector deltaChild2 = new VarCharVector("d2", allocator)) { - - ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4); - ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four"); - VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2); - root1.setRowCount(5); - - ValueVectorDataPopulator.setVector(deltaChild1, 5, 6, 7); - ValueVectorDataPopulator.setVector(deltaChild2, "five", "six", "seven"); - - // note that the child vectors are in reverse order - VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChild2, deltaChild1); - root2.setRowCount(3); - - IllegalArgumentException exp = - assertThrows( - IllegalArgumentException.class, () -> VectorSchemaRootAppender.append(root1, root2)); - - assertEquals("Vector schema roots have different schemas.", exp.getMessage()); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java deleted file mode 100644 index 35c15bdf538f3..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.apache.arrow.vector.util.ValueVectorUtility.validate; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.Charset; -import java.util.Arrays; -import java.util.List; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.RunEndEncodedVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestValidateVector { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - private static final Charset utf8Charset = Charset.forName("UTF-8"); - private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset); - private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); - private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testBaseFixedWidthVector() { - try (final IntVector vector = new IntVector("v", allocator)) { - validate(vector); - setVector(vector, 1, 2, 3); - validate(vector); - - vector.getDataBuffer().capacity(0); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e.getMessage().contains("Not enough capacity for fixed width data buffer")); - } - } - - @Test - public void testBaseVariableWidthVector() { - try (final VarCharVector vector = new VarCharVector("v", allocator)) { - validate(vector); - setVector(vector, STR1, STR2, STR3); - validate(vector); - - vector.getDataBuffer().capacity(0); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e.getMessage().contains("Not enough capacity for data buffer")); - } - } - - @Test - public void testBaseLargeVariableWidthVector() { - try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) { - validate(vector); - setVector(vector, STR1, STR2, null, STR3); - validate(vector); - - vector.getDataBuffer().capacity(0); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e.getMessage().contains("Not enough capacity for data buffer")); - } - } - - @Test - public void testListVector() { - try (final ListVector vector = ListVector.empty("v", allocator)) { - validate(vector); - setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5)); - validate(vector); - - vector.getDataVector().setValueCount(3); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e.getMessage().contains("Inner vector does not contain enough elements.")); - } - } - - @Test - public void testLargeListVector() { - try (final LargeListVector vector = LargeListVector.empty("v", allocator)) { - validate(vector); - setVector(vector, Arrays.asList(1, 2, 3, 4), Arrays.asList(5, 6)); - validate(vector); - - vector.getDataVector().setValueCount(4); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e.getMessage().contains("Inner vector does not contain enough elements.")); - } - } - - @Test - public void testFixedSizeListVector() { - try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 3, allocator)) { - validate(vector); - setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6)); - validate(vector); - - vector.getDataVector().setValueCount(3); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e.getMessage().contains("Inner vector does not contain enough elements.")); - } - } - - @Test - public void testStructVectorRangeEquals() { - try (final StructVector vector = StructVector.empty("struct", allocator)) { - vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - validate(vector); - - NullableStructWriter writer = vector.getWriter(); - writer.allocate(); - - writeStructVector(writer, 1, 10L); - writeStructVector(writer, 2, 20L); - writeStructVector(writer, 3, 30L); - writeStructVector(writer, 4, 40L); - writeStructVector(writer, 5, 50L); - writer.setValueCount(5); - - vector.getChild("f0").setValueCount(2); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e.getMessage().contains("Struct vector length not equal to child vector length")); - - vector.getChild("f0").setValueCount(5); - validate(vector); - - vector.getChild("f0").getDataBuffer().capacity(0); - ValidateUtil.ValidateException e2 = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e2.getMessage().contains("Not enough capacity for fixed width data buffer")); - } - } - - @Test - public void testUnionVector() { - try (final UnionVector vector = UnionVector.empty("union", allocator)) { - validate(vector); - - final NullableFloat4Holder float4Holder = new NullableFloat4Holder(); - float4Holder.value = 1.01f; - float4Holder.isSet = 1; - - final NullableFloat8Holder float8Holder = new NullableFloat8Holder(); - float8Holder.value = 2.02f; - float8Holder.isSet = 1; - - vector.setType(0, Types.MinorType.FLOAT4); - vector.setSafe(0, float4Holder); - vector.setType(1, Types.MinorType.FLOAT8); - vector.setSafe(1, float8Holder); - vector.setValueCount(2); - - validate(vector); - - vector.getChildrenFromFields().get(0).setValueCount(1); - ValidateUtil.ValidateException e1 = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e1.getMessage().contains("Union vector length not equal to child vector length")); - - vector.getChildrenFromFields().get(0).setValueCount(2); - validate(vector); - - vector.getChildrenFromFields().get(0).getDataBuffer().capacity(0); - ValidateUtil.ValidateException e2 = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e2.getMessage().contains("Not enough capacity for fixed width data buffer")); - } - } - - @Test - public void testDenseUnionVector() { - try (final DenseUnionVector vector = DenseUnionVector.empty("union", allocator)) { - validate(vector); - - final NullableFloat4Holder float4Holder = new NullableFloat4Holder(); - float4Holder.value = 1.01f; - float4Holder.isSet = 1; - - final NullableFloat8Holder float8Holder = new NullableFloat8Holder(); - float8Holder.value = 2.02f; - float8Holder.isSet = 1; - - byte float4TypeId = - vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType())); - byte float8TypeId = - vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT8.getType())); - - vector.setTypeId(0, float4TypeId); - vector.setSafe(0, float4Holder); - vector.setTypeId(1, float8TypeId); - vector.setSafe(1, float8Holder); - vector.setValueCount(2); - - validate(vector); - - vector.getChildrenFromFields().get(0).getDataBuffer().capacity(0); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(vector)); - assertTrue(e.getMessage().contains("Not enough capacity for fixed width data buffer")); - } - } - - @Test - public void testBaseFixedWidthVectorInstanceMethod() { - try (final IntVector vector = new IntVector("v", allocator)) { - vector.validate(); - setVector(vector, 1, 2, 3); - vector.validate(); - - vector.getDataBuffer().capacity(0); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> vector.validate()); - assertTrue(e.getMessage().contains("Not enough capacity for fixed width data buffer")); - } - } - - @Test - public void testRunEndEncodedVector() { - final FieldType valueType = FieldType.notNullable(Types.MinorType.BIGINT.getType()); - final FieldType runEndType = FieldType.notNullable(Types.MinorType.INT.getType()); - - final Field valueField = new Field("value", valueType, null); - final Field runEndField = new Field("ree", runEndType, null); - - try (RunEndEncodedVector vector = - new RunEndEncodedVector( - new Field( - "ree", - FieldType.notNullable(RunEndEncoded.INSTANCE), - List.of(runEndField, valueField)), - allocator, - null)) { - vector.validate(); - - int runCount = 1; - vector.allocateNew(); - ((BigIntVector) vector.getValuesVector()).set(0, 1); - ((IntVector) vector.getRunEndsVector()).set(0, 10); - vector.getValuesVector().setValueCount(runCount); - vector.getRunEndsVector().setValueCount(runCount); - vector.setValueCount(10); - - vector.validate(); - - vector.getRunEndsVector().setValueCount(0); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> vector.validate()); - assertTrue(e.getMessage().contains("Run end vector does not contain enough elements")); - } - } - - private void writeStructVector(NullableStructWriter writer, int value1, long value2) { - writer.start(); - writer.integer("f0").writeInt(value1); - writer.bigInt("f1").writeBigInt(value2); - writer.end(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java deleted file mode 100644 index 6993fde8fa509..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.holders.NullableFloat4Holder; -import org.apache.arrow.vector.holders.NullableFloat8Holder; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestValidateVectorFull { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testBaseVariableWidthVector() { - try (final VarCharVector vector = new VarCharVector("v", allocator)) { - validateFull(vector); - setVector(vector, "aaa", "bbb", "ccc"); - validateFull(vector); - - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - offsetBuf.setInt(0, 100); - offsetBuf.setInt(4, 50); - - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(vector)); - assertTrue( - e.getMessage() - .contains("The values in positions 0 and 1 of the offset buffer are decreasing")); - } - } - - @Test - public void testBaseLargeVariableWidthVector() { - try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) { - validateFull(vector); - setVector(vector, "aaa", "bbb", null, "ccc"); - validateFull(vector); - - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - offsetBuf.setLong(0, 100); - offsetBuf.setLong(8, 50); - - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(vector)); - assertTrue( - e.getMessage() - .contains( - "The values in positions 0 and 1 of the large offset buffer are decreasing")); - } - } - - @Test - public void testListVector() { - try (final ListVector vector = ListVector.empty("v", allocator)) { - validateFull(vector); - setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7, 8, 9)); - validateFull(vector); - - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - offsetBuf.setInt(0, 100); - offsetBuf.setInt(8, 50); - - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(vector)); - assertTrue( - e.getMessage() - .contains("The values in positions 0 and 1 of the offset buffer are decreasing")); - } - } - - @Test - public void testLargeListVector() { - try (final LargeListVector vector = LargeListVector.empty("v", allocator)) { - validateFull(vector); - setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7, 8, 9)); - validateFull(vector); - - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - offsetBuf.setLong(0, 100); - offsetBuf.setLong(16, 50); - - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(vector)); - assertTrue( - e.getMessage() - .contains( - "The values in positions 0 and 1 of the large offset buffer are decreasing")); - } - } - - @Test - public void testStructVectorRangeEquals() { - try (final StructVector vector = StructVector.empty("struct", allocator)) { - IntVector intVector = - vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - VarCharVector strVector = - vector.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class); - - validateFull(vector); - validateFull(intVector); - validateFull(strVector); - - ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5); - ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e"); - vector.setValueCount(5); - - validateFull(vector); - validateFull(intVector); - validateFull(strVector); - - ArrowBuf offsetBuf = strVector.getOffsetBuffer(); - offsetBuf.setInt(0, 100); - offsetBuf.setInt(8, 50); - - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(strVector)); - assertTrue( - e.getMessage() - .contains("The values in positions 0 and 1 of the offset buffer are decreasing")); - - e = assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(vector)); - assertTrue( - e.getMessage() - .contains("The values in positions 0 and 1 of the offset buffer are decreasing")); - } - } - - @Test - public void testUnionVector() { - try (final UnionVector vector = UnionVector.empty("union", allocator)) { - validateFull(vector); - - final NullableFloat4Holder float4Holder = new NullableFloat4Holder(); - float4Holder.value = 1.01f; - float4Holder.isSet = 1; - - final NullableFloat8Holder float8Holder = new NullableFloat8Holder(); - float8Holder.value = 2.02f; - float8Holder.isSet = 1; - - vector.setType(0, Types.MinorType.FLOAT4); - vector.setSafe(0, float4Holder); - vector.setType(1, Types.MinorType.FLOAT8); - vector.setSafe(1, float8Holder); - vector.setValueCount(2); - - validateFull(vector); - - // negative type id - vector.getTypeBuffer().setByte(0, -1); - - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(vector)); - assertTrue(e.getMessage().contains("The type id at position 0 is negative")); - } - } - - @Test - public void testDenseUnionVector() { - try (final DenseUnionVector vector = DenseUnionVector.empty("union", allocator)) { - validateFull(vector); - - final NullableFloat4Holder float4Holder = new NullableFloat4Holder(); - float4Holder.value = 1.01f; - float4Holder.isSet = 1; - - final NullableFloat8Holder float8Holder = new NullableFloat8Holder(); - float8Holder.value = 2.02f; - float8Holder.isSet = 1; - - byte float4TypeId = - vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType())); - byte float8TypeId = - vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT8.getType())); - - vector.setTypeId(0, float4TypeId); - vector.setSafe(0, float4Holder); - vector.setTypeId(1, float8TypeId); - vector.setSafe(1, float8Holder); - vector.setValueCount(2); - - validateFull(vector); - - ValueVector subVector = vector.getVectorByType(float4TypeId); - assertTrue(subVector instanceof Float4Vector); - assertEquals(1, subVector.getValueCount()); - - // shrink sub-vector - subVector.setValueCount(0); - - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(vector)); - assertTrue(e.getMessage().contains("Dense union vector offset exceeds sub-vector boundary")); - } - } - - @Test - public void testBaseVariableWidthVectorInstanceMethod() { - try (final VarCharVector vector = new VarCharVector("v", allocator)) { - vector.validateFull(); - setVector(vector, "aaa", "bbb", "ccc"); - vector.validateFull(); - - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - offsetBuf.setInt(0, 100); - offsetBuf.setInt(4, 50); - - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, vector::validateFull); - assertTrue( - e.getMessage() - .contains("The values in positions 0 and 1 of the offset buffer are decreasing")); - } - } - - @Test - public void testValidateVarCharUTF8() { - try (final VarCharVector vector = new VarCharVector("v", allocator)) { - vector.validateFull(); - setVector( - vector, - "aaa".getBytes(StandardCharsets.UTF_8), - "bbb".getBytes(StandardCharsets.UTF_8), - new byte[] {(byte) 0xFF, (byte) 0xFE}); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, vector::validateFull); - assertTrue(e.getMessage().contains("UTF")); - } - } - - @Test - public void testValidateLargeVarCharUTF8() { - try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) { - vector.validateFull(); - setVector( - vector, - "aaa".getBytes(StandardCharsets.UTF_8), - "bbb".getBytes(StandardCharsets.UTF_8), - new byte[] {(byte) 0xFF, (byte) 0xFE}); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, vector::validateFull); - assertTrue(e.getMessage().contains("UTF")); - } - } - - @Test - public void testValidateDecimal() { - try (final DecimalVector vector = - new DecimalVector( - Field.nullable("v", new ArrowType.Decimal(2, 0, DecimalVector.TYPE_WIDTH * 8)), - allocator)) { - vector.validateFull(); - setVector(vector, 1L); - vector.validateFull(); - vector.clear(); - setVector(vector, Long.MAX_VALUE); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, vector::validateFull); - assertTrue(e.getMessage().contains("Decimal")); - } - } - - @Test - public void testValidateDecimal256() { - try (final Decimal256Vector vector = - new Decimal256Vector( - Field.nullable("v", new ArrowType.Decimal(2, 0, DecimalVector.TYPE_WIDTH * 8)), - allocator)) { - vector.validateFull(); - setVector(vector, 1L); - vector.validateFull(); - vector.clear(); - setVector(vector, Long.MAX_VALUE); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, vector::validateFull); - assertTrue(e.getMessage().contains("Decimal")); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java deleted file mode 100644 index 384045bf02c6e..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import static org.apache.arrow.vector.util.ValueVectorUtility.validate; -import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.arrow.memory.ArrowBuf; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.VectorSchemaRoot; -import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestValidateVectorSchemaRoot { - - private BufferAllocator allocator; - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testValidatePositive() { - try (IntVector intVector = new IntVector("int vector", allocator); - VarCharVector strVector = new VarCharVector("var char vector", allocator)) { - - VectorSchemaRoot root = VectorSchemaRoot.of(intVector, strVector); - - validate(root); - validateFull(root); - - ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5); - ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e"); - root.setRowCount(5); - - validate(root); - validateFull(root); - } - } - - @Test - public void testValidateNegative() { - try (IntVector intVector = new IntVector("int vector", allocator); - VarCharVector strVector = new VarCharVector("var char vector", allocator)) { - - VectorSchemaRoot root = VectorSchemaRoot.of(intVector, strVector); - - ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5); - ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e"); - - // validate mismatching value counts - root.setRowCount(4); - intVector.setValueCount(5); - strVector.setValueCount(5); - ValidateUtil.ValidateException e = - assertThrows(ValidateUtil.ValidateException.class, () -> validate(root)); - assertTrue( - e.getMessage() - .contains("Child vector and vector schema root have different value counts")); - e = assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(root)); - assertTrue( - e.getMessage() - .contains("Child vector and vector schema root have different value counts")); - - // valid problems with the child vector - root.setRowCount(5); - ArrowBuf offsetBuf = strVector.getOffsetBuffer(); - offsetBuf.setInt(0, 100); - offsetBuf.setInt(8, 50); - validate(root); - e = assertThrows(ValidateUtil.ValidateException.class, () -> validateFull(root)); - assertTrue( - e.getMessage() - .contains("The values in positions 0 and 1 of the offset buffer are decreasing")); - } - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java deleted file mode 100644 index 5454008364797..0000000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.arrow.vector.validate; - -import static org.junit.jupiter.api.Assertions.assertThrows; - -import java.util.function.Supplier; -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.BigIntVector; -import org.apache.arrow.vector.BitVector; -import org.apache.arrow.vector.DateDayVector; -import org.apache.arrow.vector.DateMilliVector; -import org.apache.arrow.vector.Decimal256Vector; -import org.apache.arrow.vector.DecimalVector; -import org.apache.arrow.vector.DurationVector; -import org.apache.arrow.vector.FixedSizeBinaryVector; -import org.apache.arrow.vector.Float4Vector; -import org.apache.arrow.vector.Float8Vector; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.IntervalDayVector; -import org.apache.arrow.vector.IntervalYearVector; -import org.apache.arrow.vector.LargeVarBinaryVector; -import org.apache.arrow.vector.LargeVarCharVector; -import org.apache.arrow.vector.NullVector; -import org.apache.arrow.vector.SmallIntVector; -import org.apache.arrow.vector.TimeMicroVector; -import org.apache.arrow.vector.TimeMilliVector; -import org.apache.arrow.vector.TimeNanoVector; -import org.apache.arrow.vector.TimeSecVector; -import org.apache.arrow.vector.TimeStampMicroTZVector; -import org.apache.arrow.vector.TimeStampMicroVector; -import org.apache.arrow.vector.TimeStampMilliTZVector; -import org.apache.arrow.vector.TimeStampMilliVector; -import org.apache.arrow.vector.TimeStampNanoTZVector; -import org.apache.arrow.vector.TimeStampNanoVector; -import org.apache.arrow.vector.TimeStampSecTZVector; -import org.apache.arrow.vector.TimeStampSecVector; -import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.UInt1Vector; -import org.apache.arrow.vector.UInt2Vector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.UInt8Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.VarBinaryVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.complex.DenseUnionVector; -import org.apache.arrow.vector.complex.FixedSizeListVector; -import org.apache.arrow.vector.complex.LargeListVector; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.TimeUnit; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** Test cases for {@link ValidateVectorTypeVisitor}. */ -public class TestValidateVectorTypeVisitor { - - private BufferAllocator allocator; - - private ValidateVectorTypeVisitor visitor = new ValidateVectorTypeVisitor(); - - @BeforeEach - public void init() { - allocator = new RootAllocator(Long.MAX_VALUE); - } - - @AfterEach - public void terminate() throws Exception { - allocator.close(); - } - - private void testPositiveCase(Supplier vectorGenerator) { - try (ValueVector vector = vectorGenerator.get(); ) { - vector.accept(visitor, null); - } - } - - private void testNegativeCase(Supplier vectorGenerator) { - try (ValueVector vector = vectorGenerator.get()) { - assertThrows( - ValidateUtil.ValidateException.class, - () -> { - vector.accept(visitor, null); - }); - } - } - - @Test - public void testFixedWidthVectorsPositive() { - // integer vectors - testPositiveCase(() -> new TinyIntVector("vector", allocator)); - testPositiveCase(() -> new SmallIntVector("vector", allocator)); - testPositiveCase(() -> new IntVector("vector", allocator)); - testPositiveCase(() -> new BigIntVector("vector", allocator)); - testPositiveCase(() -> new UInt1Vector("vector", allocator)); - testPositiveCase(() -> new UInt2Vector("vector", allocator)); - testPositiveCase(() -> new UInt4Vector("vector", allocator)); - testPositiveCase(() -> new UInt8Vector("vector", allocator)); - - testPositiveCase(() -> new BitVector("vector", allocator)); - testPositiveCase(() -> new DecimalVector("vector", allocator, 30, 16)); - - // date vectors - testPositiveCase(() -> new DateDayVector("vector", allocator)); - testPositiveCase(() -> new DateMilliVector("vector", allocator)); - - testPositiveCase( - () -> - new DurationVector( - "vector", FieldType.nullable(new ArrowType.Duration(TimeUnit.SECOND)), allocator)); - - // float vectors - testPositiveCase(() -> new Float4Vector("vector", allocator)); - testPositiveCase(() -> new Float8Vector("vector", allocator)); - - // interval vectors - testPositiveCase(() -> new IntervalDayVector("vector", allocator)); - testPositiveCase(() -> new IntervalYearVector("vector", allocator)); - - // time vectors - testPositiveCase(() -> new TimeMicroVector("vector", allocator)); - testPositiveCase(() -> new TimeMilliVector("vector", allocator)); - testPositiveCase(() -> new TimeMicroVector("vector", allocator)); - testPositiveCase(() -> new TimeSecVector("vector", allocator)); - - // time stamp vectors - testPositiveCase(() -> new TimeStampMicroTZVector("vector", allocator, "cn")); - testPositiveCase(() -> new TimeStampMicroVector("vector", allocator)); - testPositiveCase(() -> new TimeStampMilliTZVector("vector", allocator, "cn")); - testPositiveCase(() -> new TimeStampMilliVector("vector", allocator)); - testPositiveCase(() -> new TimeStampNanoTZVector("vector", allocator, "cn")); - testPositiveCase(() -> new TimeStampNanoVector("vector", allocator)); - testPositiveCase(() -> new TimeStampSecTZVector("vector", allocator, "cn")); - testPositiveCase(() -> new TimeStampSecVector("vector", allocator)); - - testPositiveCase(() -> new FixedSizeBinaryVector("vector", allocator, 5)); - } - - @Test - public void testFixedWidthVectorsNegative() { - // integer vectors - testNegativeCase( - () -> - new TinyIntVector( - "vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator)); - testNegativeCase( - () -> - new SmallIntVector( - "vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator)); - testNegativeCase( - () -> - new BigIntVector( - "vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator)); - testNegativeCase( - () -> - new BigIntVector( - "vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator)); - testNegativeCase( - () -> - new UInt1Vector( - "vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator)); - testNegativeCase( - () -> - new UInt2Vector( - "vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator)); - testNegativeCase( - () -> - new UInt4Vector( - "vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator)); - testNegativeCase( - () -> - new UInt8Vector( - "vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator)); - - testNegativeCase( - () -> - new BitVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - testNegativeCase(() -> new DecimalVector("vector", allocator, 30, -16)); - - // date vectors - testNegativeCase( - () -> - new DateDayVector( - "vector", FieldType.nullable(Types.MinorType.FLOAT4.getType()), allocator)); - testNegativeCase( - () -> - new DateMilliVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - - // float pont vectors - testNegativeCase( - () -> - new Float4Vector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - testNegativeCase( - () -> - new Float8Vector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - - // interval vectors - testNegativeCase( - () -> - new IntervalDayVector( - "vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator)); - testNegativeCase( - () -> - new IntervalYearVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - - // time vectors - testNegativeCase( - () -> - new TimeMilliVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - testNegativeCase( - () -> - new TimeMicroVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - testNegativeCase( - () -> - new TimeNanoVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - testNegativeCase( - () -> - new TimeSecVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - - // time stamp vectors - testNegativeCase(() -> new TimeStampMicroTZVector("vector", allocator, null)); - testNegativeCase( - () -> - new TimeStampMicroVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - testNegativeCase(() -> new TimeStampMilliTZVector("vector", allocator, null)); - testNegativeCase( - () -> - new TimeStampMilliVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - testNegativeCase(() -> new TimeStampNanoTZVector("vector", allocator, null)); - testNegativeCase( - () -> - new TimeStampNanoVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - testNegativeCase(() -> new TimeStampSecTZVector("vector", allocator, null)); - testNegativeCase( - () -> - new TimeStampSecVector( - "vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator)); - } - - @Test - public void testDecimalVector() { - testPositiveCase( - () -> - new DecimalVector( - "dec", - FieldType.nullable(ArrowType.Decimal.createDecimal(10, 10, 128)), - allocator)); - testPositiveCase( - () -> - new DecimalVector( - "dec", - FieldType.nullable(ArrowType.Decimal.createDecimal(38, 10, 128)), - allocator)); - testPositiveCase( - () -> - new Decimal256Vector( - "dec", - FieldType.nullable(ArrowType.Decimal.createDecimal(50, 10, 256)), - allocator)); - testPositiveCase( - () -> - new Decimal256Vector( - "dec", - FieldType.nullable(ArrowType.Decimal.createDecimal(76, 10, 256)), - allocator)); - testNegativeCase( - () -> - new DecimalVector( - "dec", - FieldType.nullable(ArrowType.Decimal.createDecimal(50, 10, 128)), - allocator)); - testNegativeCase( - () -> - new Decimal256Vector( - "dec", - FieldType.nullable(ArrowType.Decimal.createDecimal(100, 10, 256)), - allocator)); - testNegativeCase( - () -> - new DecimalVector( - "dec", FieldType.nullable(ArrowType.Decimal.createDecimal(0, 10, 128)), allocator)); - testNegativeCase( - () -> - new Decimal256Vector( - "dec", - FieldType.nullable(ArrowType.Decimal.createDecimal(-1, 10, 256)), - allocator)); - testNegativeCase( - () -> - new Decimal256Vector( - "dec", FieldType.nullable(ArrowType.Decimal.createDecimal(30, 10, 64)), allocator)); - testNegativeCase( - () -> - new Decimal256Vector( - "dec", - FieldType.nullable(ArrowType.Decimal.createDecimal(10, 20, 256)), - allocator)); - } - - @Test - public void testVariableWidthVectorsPositive() { - testPositiveCase(() -> new VarCharVector("vector", allocator)); - testPositiveCase(() -> new VarBinaryVector("vector", allocator)); - } - - @Test - public void testVariableWidthVectorsNegative() { - testNegativeCase( - () -> - new VarCharVector( - "vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator)); - testNegativeCase( - () -> - new VarBinaryVector( - "vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator)); - } - - @Test - public void testLargeVariableWidthVectorsPositive() { - testPositiveCase(() -> new LargeVarCharVector("vector", allocator)); - testPositiveCase(() -> new LargeVarBinaryVector("vector", allocator)); - } - - @Test - public void testLargeVariableWidthVectorsNegative() { - testNegativeCase( - () -> - new LargeVarCharVector( - "vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator)); - testNegativeCase( - () -> - new LargeVarBinaryVector( - "vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator)); - } - - @Test - public void testListVector() { - testPositiveCase(() -> ListVector.empty("vector", allocator)); - - testNegativeCase( - () -> - new ListVector( - "vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null)); - } - - @Test - public void testLargeListVector() { - testPositiveCase(() -> LargeListVector.empty("vector", allocator)); - - testNegativeCase( - () -> - new LargeListVector( - "vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null)); - } - - @Test - public void testFixedSizeListVector() { - testPositiveCase(() -> FixedSizeListVector.empty("vector", 10, allocator)); - } - - @Test - public void testStructVector() { - testPositiveCase(() -> StructVector.empty("vector", allocator)); - - testNegativeCase( - () -> - new StructVector( - "vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null)); - } - - @Test - public void testUnionVector() { - testPositiveCase(() -> UnionVector.empty("vector", allocator)); - } - - @Test - public void testDenseUnionVector() { - testPositiveCase(() -> DenseUnionVector.empty("vector", allocator)); - } - - @Test - public void testNullVector() { - testPositiveCase(() -> new NullVector("null vec")); - } -} diff --git a/java/vector/src/test/resources/logback.xml b/java/vector/src/test/resources/logback.xml deleted file mode 100644 index f9e449fa67b2e..0000000000000 --- a/java/vector/src/test/resources/logback.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - - diff --git a/js/package.json b/js/package.json index 26649363d2c27..01c85bad29f99 100644 --- a/js/package.json +++ b/js/package.json @@ -64,8 +64,8 @@ }, "devDependencies": { "@openpgp/web-stream-tools": "0.0.13", - "@rollup/plugin-alias": "5.1.0", - "@rollup/plugin-node-resolve": "15.2.3", + "@rollup/plugin-alias": "5.1.1", + "@rollup/plugin-node-resolve": "15.3.0", "@rollup/stream": "3.0.1", "@swc/core": "1.6.6", "@types/benchmark": "2.1.5", @@ -87,7 +87,7 @@ "gulp": "4.0.2", "glob": "10.4.1", "google-closure-compiler": "20240317.0.0", - "gulp-esbuild": "0.12.1", + "gulp-esbuild": "0.13.0", "gulp-json-transform": "0.5.0", "gulp-rename": "2.0.0", "gulp-replace": "1.1.4", diff --git a/js/yarn.lock b/js/yarn.lock index 25a9228c1402b..aae663174aa6a 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -989,22 +989,19 @@ resolved "https://registry.yarnpkg.com/@polka/url/-/url-1.0.0-next.25.tgz#f077fdc0b5d0078d30893396ff4827a13f99e817" integrity sha512-j7P6Rgr3mmtdkeDGTe0E/aYyWEWVtc5yFXtHCRHs28/jptDEWfaVOc5T7cblqy1XKPPfCxJc/8DwQ5YgLOZOVQ== -"@rollup/plugin-alias@5.1.0": - version "5.1.0" - resolved "https://registry.yarnpkg.com/@rollup/plugin-alias/-/plugin-alias-5.1.0.tgz#99a94accc4ff9a3483be5baeedd5d7da3b597e93" - integrity sha512-lpA3RZ9PdIG7qqhEfv79tBffNaoDuukFDrmhLqg9ifv99u/ehn+lOg30x2zmhf8AQqQUZaMk/B9fZraQ6/acDQ== - dependencies: - slash "^4.0.0" +"@rollup/plugin-alias@5.1.1": + version "5.1.1" + resolved "https://registry.yarnpkg.com/@rollup/plugin-alias/-/plugin-alias-5.1.1.tgz#53601d88cda8b1577aa130b4a6e452283605bf26" + integrity sha512-PR9zDb+rOzkRb2VD+EuKB7UC41vU5DIwZ5qqCpk0KJudcWAyi8rvYOhS7+L5aZCspw1stTViLgN5v6FF1p5cgQ== -"@rollup/plugin-node-resolve@15.2.3": - version "15.2.3" - resolved "https://registry.yarnpkg.com/@rollup/plugin-node-resolve/-/plugin-node-resolve-15.2.3.tgz#e5e0b059bd85ca57489492f295ce88c2d4b0daf9" - integrity sha512-j/lym8nf5E21LwBT4Df1VD6hRO2L2iwUeUmP7litikRsVp1H6NWx20NEp0Y7su+7XGc476GnXXc4kFeZNGmaSQ== +"@rollup/plugin-node-resolve@15.3.0": + version "15.3.0" + resolved "https://registry.yarnpkg.com/@rollup/plugin-node-resolve/-/plugin-node-resolve-15.3.0.tgz#efbb35515c9672e541c08d59caba2eff492a55d5" + integrity sha512-9eO5McEICxMzJpDW9OnMYSv4Sta3hmt7VtBFz5zR9273suNOydOyq/FrGeGy+KsTRFm8w0SLVhzig2ILFT63Ag== dependencies: "@rollup/pluginutils" "^5.0.1" "@types/resolve" "1.20.2" deepmerge "^4.2.2" - is-builtin-module "^3.2.1" is-module "^1.0.0" resolve "^1.22.1" @@ -3853,10 +3850,10 @@ gulp-cli@^2.2.0: v8flags "^3.2.0" yargs "^7.1.0" -gulp-esbuild@0.12.1: - version "0.12.1" - resolved "https://registry.yarnpkg.com/gulp-esbuild/-/gulp-esbuild-0.12.1.tgz#f91093f0f68e739f455530804aa533577ec6dfc6" - integrity sha512-dkcN2AHtXTVu+KNw0Zw8SWysziNwpYg6kw41E8frUkil5ZtwktIsot/OCLEpRT6clFpVQ7Hw3+YZQvoNdyTF1A== +gulp-esbuild@0.13.0: + version "0.13.0" + resolved "https://registry.yarnpkg.com/gulp-esbuild/-/gulp-esbuild-0.13.0.tgz#6327433d23497dfc760d3742d34565996f0121d6" + integrity sha512-dydvBeM9JIrSVa9HbYrTCnKLqKX7v3J6Nu6Fe/bYhutUEqtVNFLAQAyRk14nJoI7TsrGmsVTjVKfDCgFGW+01Q== dependencies: esbuild "^0.21.5" plugin-error "^2.0.1" diff --git a/java/Brewfile b/matlab/.editorconfig similarity index 84% rename from java/Brewfile rename to matlab/.editorconfig index af6bd65615d62..f549a4bf17a70 100644 --- a/java/Brewfile +++ b/matlab/.editorconfig @@ -15,5 +15,10 @@ # specific language governing permissions and limitations # under the License. -brew "openjdk@11" -brew "sccache" +# This is an EditorConfig file: https://editorconfig.org/ + +# See ../.editorconfig for inherited values + +[*.m] +indent_size = 4 +indent_style = space diff --git a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.cc b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.cc index ed1052e0a8076..69ba734bd0ef9 100644 --- a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.cc +++ b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.cc @@ -18,9 +18,7 @@ #include "arrow/matlab/io/ipc/proxy/record_batch_file_writer.h" #include "arrow/io/file.h" #include "arrow/matlab/error/error.h" -#include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/tabular/proxy/schema.h" -#include "arrow/matlab/tabular/proxy/table.h" #include "arrow/util/utf8.h" #include "libmexclass/proxy/ProxyManager.h" @@ -29,11 +27,7 @@ namespace arrow::matlab::io::ipc::proxy { RecordBatchFileWriter::RecordBatchFileWriter( const std::shared_ptr writer) - : writer{std::move(writer)} { - REGISTER_METHOD(RecordBatchFileWriter, close); - REGISTER_METHOD(RecordBatchFileWriter, writeRecordBatch); - REGISTER_METHOD(RecordBatchFileWriter, writeTable); -} + : RecordBatchWriter(std::move(writer)) {} libmexclass::proxy::MakeResult RecordBatchFileWriter::make( const libmexclass::proxy::FunctionArguments& constructor_arguments) { @@ -65,43 +59,4 @@ libmexclass::proxy::MakeResult RecordBatchFileWriter::make( return std::make_shared(std::move(writer)); } -void RecordBatchFileWriter::writeRecordBatch( - libmexclass::proxy::method::Context& context) { - namespace mda = ::matlab::data; - using RecordBatchProxy = ::arrow::matlab::tabular::proxy::RecordBatch; - - mda::StructArray opts = context.inputs[0]; - const mda::TypedArray record_batch_proxy_id_mda = - opts[0]["RecordBatchProxyID"]; - const uint64_t record_batch_proxy_id = record_batch_proxy_id_mda[0]; - - auto proxy = libmexclass::proxy::ProxyManager::getProxy(record_batch_proxy_id); - auto record_batch_proxy = std::static_pointer_cast(proxy); - auto record_batch = record_batch_proxy->unwrap(); - - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(writer->WriteRecordBatch(*record_batch), context, - error::IPC_RECORD_BATCH_WRITE_FAILED); -} - -void RecordBatchFileWriter::writeTable(libmexclass::proxy::method::Context& context) { - namespace mda = ::matlab::data; - using TableProxy = ::arrow::matlab::tabular::proxy::Table; - - mda::StructArray opts = context.inputs[0]; - const mda::TypedArray table_proxy_id_mda = opts[0]["TableProxyID"]; - const uint64_t table_proxy_id = table_proxy_id_mda[0]; - - auto proxy = libmexclass::proxy::ProxyManager::getProxy(table_proxy_id); - auto table_proxy = std::static_pointer_cast(proxy); - auto table = table_proxy->unwrap(); - - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(writer->WriteTable(*table), context, - error::IPC_RECORD_BATCH_WRITE_FAILED); -} - -void RecordBatchFileWriter::close(libmexclass::proxy::method::Context& context) { - MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(writer->Close(), context, - error::IPC_RECORD_BATCH_WRITE_CLOSE_FAILED); -} - -} // namespace arrow::matlab::io::ipc::proxy \ No newline at end of file +} // namespace arrow::matlab::io::ipc::proxy diff --git a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.h b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.h index bfd83504f190a..ac76afaf23957 100644 --- a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.h +++ b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.h @@ -16,27 +16,20 @@ // under the License. #include "arrow/ipc/writer.h" +#include "arrow/matlab/io/ipc/proxy/record_batch_writer.h" + #include "libmexclass/proxy/Proxy.h" namespace arrow::matlab::io::ipc::proxy { -class RecordBatchFileWriter : public libmexclass::proxy::Proxy { +class RecordBatchFileWriter : public RecordBatchWriter { public: RecordBatchFileWriter(std::shared_ptr writer); - ~RecordBatchFileWriter() = default; + virtual ~RecordBatchFileWriter() = default; static libmexclass::proxy::MakeResult make( const libmexclass::proxy::FunctionArguments& constructor_arguments); - - protected: - std::shared_ptr writer; - - void writeRecordBatch(libmexclass::proxy::method::Context& context); - - void writeTable(libmexclass::proxy::method::Context& context); - - void close(libmexclass::proxy::method::Context& context); }; -} // namespace arrow::matlab::io::ipc::proxy \ No newline at end of file +} // namespace arrow::matlab::io::ipc::proxy diff --git a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_writer.cc b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_writer.cc new file mode 100644 index 0000000000000..4640a54819b83 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_writer.cc @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/io/ipc/proxy/record_batch_stream_writer.h" +#include "arrow/io/file.h" +#include "arrow/ipc/writer.h" +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/tabular/proxy/schema.h" +#include "arrow/util/utf8.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::io::ipc::proxy { + +RecordBatchStreamWriter::RecordBatchStreamWriter( + const std::shared_ptr writer) + : RecordBatchWriter(std::move(writer)) {} + +libmexclass::proxy::MakeResult RecordBatchStreamWriter::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + using RecordBatchStreamWriterProxy = + arrow::matlab::io::ipc::proxy::RecordBatchStreamWriter; + using SchemaProxy = arrow::matlab::tabular::proxy::Schema; + + const mda::StructArray opts = constructor_arguments[0]; + + const mda::StringArray filename_mda = opts[0]["Filename"]; + const auto filename_utf16 = std::u16string(filename_mda[0]); + MATLAB_ASSIGN_OR_ERROR(const auto filename_utf8, + arrow::util::UTF16StringToUTF8(filename_utf16), + error::UNICODE_CONVERSION_ERROR_ID); + + const mda::TypedArray arrow_schema_proxy_id_mda = opts[0]["SchemaProxyID"]; + auto proxy = libmexclass::proxy::ProxyManager::getProxy(arrow_schema_proxy_id_mda[0]); + auto arrow_schema_proxy = std::static_pointer_cast(proxy); + auto arrow_schema = arrow_schema_proxy->unwrap(); + + MATLAB_ASSIGN_OR_ERROR(auto output_stream, + arrow::io::FileOutputStream::Open(filename_utf8), + error::FAILED_TO_OPEN_FILE_FOR_WRITE); + + MATLAB_ASSIGN_OR_ERROR(auto writer, + arrow::ipc::MakeStreamWriter(output_stream, arrow_schema), + "arrow:matlab:MakeFailed"); + + return std::make_shared(std::move(writer)); +} + +} // namespace arrow::matlab::io::ipc::proxy diff --git a/java/gandiva/src/main/cpp/config_holder.cc b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_writer.h similarity index 59% rename from java/gandiva/src/main/cpp/config_holder.cc rename to matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_writer.h index dfa6afce1992c..484d1aa252c57 100644 --- a/java/gandiva/src/main/cpp/config_holder.cc +++ b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_writer.h @@ -15,16 +15,21 @@ // specific language governing permissions and limitations // under the License. -#include +#include "arrow/ipc/writer.h" +#include "arrow/matlab/io/ipc/proxy/record_batch_writer.h" -#include "config_holder.h" +#include "libmexclass/proxy/Proxy.h" -namespace gandiva { -int64_t ConfigHolder::config_id_ = 1; +namespace arrow::matlab::io::ipc::proxy { -// map of configuration objects created so far -std::unordered_map> - ConfigHolder::configuration_map_; +class RecordBatchStreamWriter : public RecordBatchWriter { + public: + RecordBatchStreamWriter(std::shared_ptr writer); -std::mutex ConfigHolder::g_mtx_; -} // namespace gandiva + virtual ~RecordBatchStreamWriter() = default; + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); +}; + +} // namespace arrow::matlab::io::ipc::proxy diff --git a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_writer.cc b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_writer.cc new file mode 100644 index 0000000000000..beffcca0245f0 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_writer.cc @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/io/ipc/proxy/record_batch_writer.h" +#include "arrow/io/file.h" +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/tabular/proxy/record_batch.h" +#include "arrow/matlab/tabular/proxy/schema.h" +#include "arrow/matlab/tabular/proxy/table.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::io::ipc::proxy { + +RecordBatchWriter::RecordBatchWriter( + const std::shared_ptr writer) + : writer{std::move(writer)} { + REGISTER_METHOD(RecordBatchWriter, close); + REGISTER_METHOD(RecordBatchWriter, writeRecordBatch); + REGISTER_METHOD(RecordBatchWriter, writeTable); +} + +void RecordBatchWriter::writeRecordBatch(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using RecordBatchProxy = ::arrow::matlab::tabular::proxy::RecordBatch; + + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray record_batch_proxy_id_mda = + opts[0]["RecordBatchProxyID"]; + const uint64_t record_batch_proxy_id = record_batch_proxy_id_mda[0]; + + auto proxy = libmexclass::proxy::ProxyManager::getProxy(record_batch_proxy_id); + auto record_batch_proxy = std::static_pointer_cast(proxy); + auto record_batch = record_batch_proxy->unwrap(); + + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(writer->WriteRecordBatch(*record_batch), context, + error::IPC_RECORD_BATCH_WRITE_FAILED); +} + +void RecordBatchWriter::writeTable(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using TableProxy = ::arrow::matlab::tabular::proxy::Table; + + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray table_proxy_id_mda = opts[0]["TableProxyID"]; + const uint64_t table_proxy_id = table_proxy_id_mda[0]; + + auto proxy = libmexclass::proxy::ProxyManager::getProxy(table_proxy_id); + auto table_proxy = std::static_pointer_cast(proxy); + auto table = table_proxy->unwrap(); + + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(writer->WriteTable(*table), context, + error::IPC_RECORD_BATCH_WRITE_FAILED); +} + +void RecordBatchWriter::close(libmexclass::proxy::method::Context& context) { + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(writer->Close(), context, + error::IPC_RECORD_BATCH_WRITE_CLOSE_FAILED); +} + +} // namespace arrow::matlab::io::ipc::proxy diff --git a/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_writer.h b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_writer.h new file mode 100644 index 0000000000000..885a0cbf207fe --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_writer.h @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/ipc/writer.h" +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::io::ipc::proxy { + +class RecordBatchWriter : public libmexclass::proxy::Proxy { + public: + RecordBatchWriter(std::shared_ptr writer); + + virtual ~RecordBatchWriter() = default; + + protected: + std::shared_ptr writer; + + void writeRecordBatch(libmexclass::proxy::method::Context& context); + + void writeTable(libmexclass::proxy::method::Context& context); + + void close(libmexclass::proxy::method::Context& context); +}; + +} // namespace arrow::matlab::io::ipc::proxy diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 8326b4371917a..a08a7495c00c9 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -36,6 +36,7 @@ #include "arrow/matlab/io/feather/proxy/writer.h" #include "arrow/matlab/io/ipc/proxy/record_batch_file_reader.h" #include "arrow/matlab/io/ipc/proxy/record_batch_file_writer.h" +#include "arrow/matlab/io/ipc/proxy/record_batch_stream_writer.h" #include "arrow/matlab/tabular/proxy/record_batch.h" #include "arrow/matlab/tabular/proxy/schema.h" #include "arrow/matlab/tabular/proxy/table.h" @@ -111,6 +112,8 @@ libmexclass::proxy::MakeResult Factory::make_proxy( REGISTER_PROXY(arrow.c.proxy.RecordBatchImporter , arrow::matlab::c::proxy::RecordBatchImporter); REGISTER_PROXY(arrow.io.ipc.proxy.RecordBatchFileReader , arrow::matlab::io::ipc::proxy::RecordBatchFileReader); REGISTER_PROXY(arrow.io.ipc.proxy.RecordBatchFileWriter , arrow::matlab::io::ipc::proxy::RecordBatchFileWriter); + REGISTER_PROXY(arrow.io.ipc.proxy.RecordBatchStreamWriter , arrow::matlab::io::ipc::proxy::RecordBatchStreamWriter); + // clang-format on return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, diff --git a/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchFileWriter.m b/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchFileWriter.m index aee4acf5c16e6..ee1298c23706f 100644 --- a/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchFileWriter.m +++ b/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchFileWriter.m @@ -1,5 +1,5 @@ -%RECORDBATCHFILEWRITER Class for serializing record batches to a file using -% the IPC format. +%RECORDBATCHFILEWRITER Class for serializing record batches to the Arrow IPC File +% format. % Licensed to the Apache Software Foundation (ASF) under one or more % contributor license agreements. See the NOTICE file distributed with @@ -16,11 +16,7 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef RecordBatchFileWriter < matlab.mixin.Scalar - - properties(SetAccess=private, GetAccess=public, Hidden) - Proxy - end +classdef RecordBatchFileWriter < arrow.io.ipc.RecordBatchWriter methods function obj = RecordBatchFileWriter(filename, schema) @@ -30,48 +26,8 @@ end args = struct(Filename=filename, SchemaProxyID=schema.Proxy.ID); proxyName = "arrow.io.ipc.proxy.RecordBatchFileWriter"; - obj.Proxy = arrow.internal.proxy.create(proxyName, args); - end - - function writeRecordBatch(obj, recordBatch) - arguments - obj(1, 1) arrow.io.ipc.RecordBatchFileWriter - recordBatch(1, 1) arrow.tabular.RecordBatch - end - - args = struct(RecordBatchProxyID=recordBatch.Proxy.ID); - obj.Proxy.writeRecordBatch(args); - end - - function writeTable(obj, arrowTable) - arguments - obj(1, 1) arrow.io.ipc.RecordBatchFileWriter - arrowTable(1, 1) arrow.tabular.Table - end - - args = struct(TableProxyID=arrowTable.Proxy.ID); - obj.Proxy.writeTable(args); - end - - function write(obj, tabularObj) - arguments - obj(1, 1) arrow.io.ipc.RecordBatchFileWriter - tabularObj(1, 1) - end - if isa(tabularObj, "arrow.tabular.RecordBatch") - obj.writeRecordBatch(tabularObj); - elseif isa(tabularObj, "arrow.tabular.Table") - obj.writeTable(tabularObj); - else - id = "arrow:matlab:ipc:write:InvalidType"; - msg = "tabularObj input argument must be an instance of " + ... - "either arrow.tabular.RecordBatch or arrow.tabular.Table."; - error(id, msg); - end - end - - function close(obj) - obj.Proxy.close(); + proxy = arrow.internal.proxy.create(proxyName, args); + obj@arrow.io.ipc.RecordBatchWriter(proxy); end end -end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchStreamWriter.m b/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchStreamWriter.m new file mode 100644 index 0000000000000..17fe7184a8df8 --- /dev/null +++ b/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchStreamWriter.m @@ -0,0 +1,34 @@ +%RECORDBATCHSTREAMWRITER Class for serializing record batches to the Arrow +% IPC Streaming format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef RecordBatchStreamWriter < arrow.io.ipc.RecordBatchWriter + + methods + function obj = RecordBatchStreamWriter(filename, schema) + arguments + filename(1, 1) string {mustBeNonzeroLengthText} + schema(1, 1) arrow.tabular.Schema + end + args = struct(Filename=filename, SchemaProxyID=schema.Proxy.ID); + proxyName = "arrow.io.ipc.proxy.RecordBatchStreamWriter"; + proxy = arrow.internal.proxy.create(proxyName, args); + obj@arrow.io.ipc.RecordBatchWriter(proxy); + end + end +end + diff --git a/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchWriter.m b/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchWriter.m new file mode 100644 index 0000000000000..a662392cc6f47 --- /dev/null +++ b/matlab/src/matlab/+arrow/+io/+ipc/RecordBatchWriter.m @@ -0,0 +1,74 @@ +%RECORDBATCHWRITER Class for serializing record batches to the Arrow +% IPC format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef (Abstract) RecordBatchWriter < matlab.mixin.Scalar + + properties(SetAccess=private, GetAccess=public, Hidden) + Proxy + end + + methods + function obj = RecordBatchWriter(proxy) + arguments + proxy(1, 1) libmexclass.proxy.Proxy + end + obj.Proxy = proxy; + end + + function writeRecordBatch(obj, recordBatch) + arguments + obj(1, 1) arrow.io.ipc.RecordBatchWriter + recordBatch(1, 1) arrow.tabular.RecordBatch + end + + args = struct(RecordBatchProxyID=recordBatch.Proxy.ID); + obj.Proxy.writeRecordBatch(args); + end + + function writeTable(obj, arrowTable) + arguments + obj(1, 1) arrow.io.ipc.RecordBatchWriter + arrowTable(1, 1) arrow.tabular.Table + end + + args = struct(TableProxyID=arrowTable.Proxy.ID); + obj.Proxy.writeTable(args); + end + + function write(obj, tabularObj) + arguments + obj(1, 1) arrow.io.ipc.RecordBatchWriter + tabularObj(1, 1) + end + if isa(tabularObj, "arrow.tabular.RecordBatch") + obj.writeRecordBatch(tabularObj); + elseif isa(tabularObj, "arrow.tabular.Table") + obj.writeTable(tabularObj); + else + id = "arrow:io:ipc:write:InvalidType"; + msg = "Input must be an instance of " + ... + "either arrow.tabular.RecordBatch or arrow.tabular.Table."; + error(id, msg); + end + end + + function close(obj) + obj.Proxy.close(); + end + end +end \ No newline at end of file diff --git a/matlab/test/arrow/io/ipc/tRecordBatchFileWriter.m b/matlab/test/arrow/io/ipc/tRecordBatchWriter.m similarity index 77% rename from matlab/test/arrow/io/ipc/tRecordBatchFileWriter.m rename to matlab/test/arrow/io/ipc/tRecordBatchWriter.m index 25bbf4474edd4..55802e31f885d 100644 --- a/matlab/test/arrow/io/ipc/tRecordBatchFileWriter.m +++ b/matlab/test/arrow/io/ipc/tRecordBatchWriter.m @@ -1,4 +1,5 @@ -%TRECORDBATCHFILEWRITER Unit tests for arrow.io.ipc.RecordBatchFileWriter. +%TRECORDBATCHWRITER Unit tests for arrow.io.ipc.RecordBatchFileWriter +% and arrow.io.ipc.RecordBatchStreamWriter. % Licensed to the Apache Software Foundation (ASF) under one or more % contributor license agreements. See the NOTICE file distributed with @@ -15,7 +16,16 @@ % implied. See the License for the specific language governing % permissions and limitations under the License. -classdef tRecordBatchFileWriter < matlab.unittest.TestCase +classdef tRecordBatchWriter < matlab.unittest.TestCase + + properties(TestParameter) + WriterConstructor = struct(... + RecordBatchFileWriter=@arrow.io.ipc.RecordBatchFileWriter,... + RecordBatchStreamWriter=@arrow.io.ipc.RecordBatchStreamWriter... + ); + end + + methods function folder = setupTemporaryFolder(testCase) @@ -26,45 +36,45 @@ end methods (Test) - function ZeroLengthFilenameError(testCase) - % Verify RecordBatchFileWriter throws an exception with the + function ZeroLengthFilenameError(testCase, WriterConstructor) + % Verify RecordBatchWriter throws an exception with the % identifier MATLAB:validators:mustBeNonzeroLengthText if the % filename input argument given is a zero length string. schema = arrow.schema(arrow.field("A", arrow.float64())); - fcn = @() arrow.io.ipc.RecordBatchFileWriter("", schema); + fcn = @() WriterConstructor("", schema); testCase.verifyError(fcn, "MATLAB:validators:mustBeNonzeroLengthText"); end - function MissingStringFilenameError(testCase) - % Verify RecordBatchFileWriter throws an exception with the + function MissingStringFilenameError(testCase, WriterConstructor) + % Verify RecordBatchWriter throws an exception with the % identifier MATLAB:validators:mustBeNonzeroLengthText if the % filename input argument given is a missing string. schema = arrow.schema(arrow.field("A", arrow.float64())); - fcn = @() arrow.io.ipc.RecordBatchFileWriter(string(missing), schema); + fcn = @() WriterConstructor(string(missing), schema); testCase.verifyError(fcn, "MATLAB:validators:mustBeNonzeroLengthText"); end - function FilenameInvalidTypeError(testCase) - % Verify RecordBatchFileWriter throws an exception with the + function FilenameInvalidTypeError(testCase, WriterConstructor) + % Verify RecordBatchWriter throws an exception with the % identifier MATLAB:validators:UnableToConvert if the filename % input argument is neither a scalar string nor a char vector. schema = arrow.schema(arrow.field("A", arrow.float64())); - fcn = @() arrow.io.ipc.RecordBatchFileWriter(table, schema); + fcn = @() WriterConstructor(table, schema); testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); end - function InvalidSchemaType(testCase) - % Verify RecordBatchFileWriter throws an exception with the + function InvalidSchemaType(testCase, WriterConstructor) + % Verify RecordBatchWriter throws an exception with the % identifier MATLAB:validators:UnableToConvert if the schema % input argument is not an arrow.tabular.Schema instance. folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.field("A", arrow.float64()); - fcn = @() arrow.io.ipc.RecordBatchFileWriter(fname, schema); + fcn = @() WriterConstructor(fname, schema); testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); end - function writeRecordBatchInvalidType(testCase) + function writeRecordBatchInvalidType(testCase, WriterConstructor) % Verify writeRecordBatch throws an exception with the % identifier MATLAB:validators:UnableToConvert if the % recordBatch input argument given is not an @@ -72,26 +82,26 @@ function writeRecordBatchInvalidType(testCase) folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowTable = arrow.table(table([1 2 3 4]', VariableNames="A")); fcn = @() writer.writeRecordBatch(arrowTable); testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); end - function writeTableInvalidType(testCase) + function writeTableInvalidType(testCase, WriterConstructor) % Verify writeTable throws an exception with the % identifier MATLAB:validators:UnableToConvert if the table % input argument given is not an arrow.tabular.Table instance. folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowRecordBatch = arrow.recordBatch(table([1 2 3 4]', VariableNames="A")); fcn = @() writer.writeTable(arrowRecordBatch); testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); end - function writeInvalidType(testCase) + function writeInvalidType(testCase, WriterConstructor) % Verify writeTable throws an exception with the % identifier arrow:matlab:ipc:write:InvalidType if the % tabularObj input argument given is neither an @@ -99,12 +109,12 @@ function writeInvalidType(testCase) folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); fcn = @() writer.write(schema); - testCase.verifyError(fcn, "arrow:matlab:ipc:write:InvalidType"); + testCase.verifyError(fcn, "arrow:io:ipc:write:InvalidType"); end - function writeRecordBatchInvalidSchema(testCase) + function writeRecordBatchInvalidSchema(testCase, WriterConstructor) % Verify writeRecordBatch throws an exception with the % identifier arrow:io:ipc:FailedToWriteRecordBatch if the % schema of the given record batch does match the expected @@ -112,28 +122,28 @@ function writeRecordBatchInvalidSchema(testCase) folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowRecordBatch = arrow.recordBatch(table([1 2 3 4]', VariableNames="B")); fcn = @() writer.writeRecordBatch(arrowRecordBatch); testCase.verifyError(fcn, "arrow:io:ipc:FailedToWriteRecordBatch"); end - function writeTableInvalidSchema(testCase) + function writeTableInvalidSchema(testCase, WriterConstructor) % Verify writeTable throws an exception with the % identifier arrow:io:ipc:FailedToWriteRecordBatch if the % schema of the given table does match the expected schema. folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowTable = arrow.table(table([1 2 3 4]', VariableNames="B")); fcn = @() writer.writeTable(arrowTable); testCase.verifyError(fcn, "arrow:io:ipc:FailedToWriteRecordBatch"); end - function writeInvalidSchema(testCase) + function writeInvalidSchema(testCase, WriterConstructor) % Verify write throws an exception with the % identifier arrow:io:ipc:FailedToWriteRecordBatch if the % schema of the given record batch or table does match the @@ -141,7 +151,7 @@ function writeInvalidSchema(testCase) folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowTable = arrow.table(table([1 2 3 4]', VariableNames="B")); fcn = @() writer.write(arrowTable); @@ -152,39 +162,39 @@ function writeInvalidSchema(testCase) testCase.verifyError(fcn, "arrow:io:ipc:FailedToWriteRecordBatch"); end - function writeRecordBatchSmoke(testCase) + function writeRecordBatchSmoke(testCase, WriterConstructor) % Verify writeRecordBatch does not error or issue a warning % if it successfully writes the record batch to the file. folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowRecordBatch = arrow.recordBatch(table([1 2 3 4]', VariableNames="A")); fcn = @() writer.writeRecordBatch(arrowRecordBatch); testCase.verifyWarningFree(fcn); end - function writeTableBatchSmoke(testCase) + function writeTableBatchSmoke(testCase, WriterConstructor) % Verify writeTable does not error or issue a warning % if it successfully writes the table to the file. folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowTable = arrow.table(table([1 2 3 4]', VariableNames="A")); fcn = @() writer.writeTable(arrowTable); testCase.verifyWarningFree(fcn); end - function writeSmoke(testCase) + function writeSmoke(testCase, WriterConstructor) % Verify write does not error or issue a warning if it % successfully writes the record batch or table to the file. folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowRecordBatch = arrow.recordBatch(table([1 2 3 4]', VariableNames="A")); fcn = @() writer.write(arrowRecordBatch); @@ -195,13 +205,13 @@ function writeSmoke(testCase) testCase.verifyWarningFree(fcn); end - function closeSmoke(testCase) + function closeSmoke(testCase, WriterConstructor) % Verify close does not error or issue a warning if it was % successful. folder = testCase.setupTemporaryFolder(); fname = fullfile(folder, "data.arrow"); schema = arrow.schema(arrow.field("A", arrow.float64())); - writer = arrow.io.ipc.RecordBatchFileWriter(fname, schema); + writer = WriterConstructor(fname, schema); arrowTable = arrow.table(table([1 2 3 4]', VariableNames="A")); writer.write(arrowTable); fcn = @() writer.close(); diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 8016cbf261b7c..29a737a6ecf25 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -81,7 +81,9 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_reader.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_file_writer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_writer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/ipc/proxy/record_batch_stream_writer.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index c39a1129ac17a..80d1cd31ac231 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -166,8 +166,17 @@ if($ENV{PYODIDE}) # modules (at least under Pyodide it does). set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) set(Python3_LIBRARY $ENV{CPYTHONLIB}) - set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) set(Python3_EXECUTABLE) + execute_process(COMMAND ${Python3_EXECUTABLE} -c + "import numpy; print(numpy.__version__)" + OUTPUT_VARIABLE PYODIDE_NUMPY_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCH "^([0-9]+)" PYODIDE_NUMPY_MAJOR_VERSION ${PYODIDE_NUMPY_VERSION}) + if(PYODIDE_NUMPY_MAJOR_VERSION GREATER_EQUAL 2) + set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/_core/include) + else() + set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) + endif() set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) # we set the c and cxx compiler manually to bypass pywasmcross # which is pyodide's way of messing with C++ build parameters. diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 8c8c09265d0bf..d00a731324c92 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -166,7 +166,7 @@ def print_entry(label, value): float16, float32, float64, binary, string, utf8, binary_view, string_view, large_binary, large_string, large_utf8, - decimal128, decimal256, + decimal32, decimal64, decimal128, decimal256, list_, large_list, list_view, large_list_view, map_, struct, union, sparse_union, dense_union, @@ -180,7 +180,8 @@ def print_entry(label, value): ListViewType, LargeListViewType, MapType, UnionType, SparseUnionType, DenseUnionType, TimestampType, Time32Type, Time64Type, DurationType, - FixedSizeBinaryType, Decimal128Type, Decimal256Type, + FixedSizeBinaryType, + Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, BaseExtensionType, ExtensionType, RunEndEncodedType, Bool8Type, FixedShapeTensorType, JsonType, OpaqueType, UuidType, @@ -216,7 +217,8 @@ def print_entry(label, value): Date32Array, Date64Array, TimestampArray, Time32Array, Time64Array, DurationArray, MonthDayNanoIntervalArray, - Decimal128Array, Decimal256Array, StructArray, ExtensionArray, + Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, + StructArray, ExtensionArray, RunEndEncodedArray, Bool8Array, FixedShapeTensorArray, JsonArray, OpaqueArray, UuidArray, scalar, NA, _NULL as NULL, Scalar, @@ -224,7 +226,7 @@ def print_entry(label, value): Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar, UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar, HalfFloatScalar, FloatScalar, DoubleScalar, - Decimal128Scalar, Decimal256Scalar, + Decimal32Scalar, Decimal64Scalar, Decimal128Scalar, Decimal256Scalar, ListScalar, LargeListScalar, FixedSizeListScalar, ListViewScalar, LargeListViewScalar, Date32Scalar, Date64Scalar, diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 8bddc34e1000b..f86caf1433d4e 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2327,6 +2327,15 @@ cdef class FixedSizeBinaryArray(Array): Concrete class for Arrow arrays of a fixed-size binary data type. """ +cdef class Decima32Array(FixedSizeBinaryArray): + """ + Concrete class for Arrow arrays of decimal32 data type. + """ + +cdef class Decimal64Array(FixedSizeBinaryArray): + """ + Concrete class for Arrow arrays of decimal64 data type. + """ cdef class Decimal128Array(FixedSizeBinaryArray): """ @@ -4043,7 +4052,7 @@ cdef class StructArray(Array): memory_pool : MemoryPool (optional) For memory allocations, if required, otherwise uses default pool. type : pyarrow.StructType (optional) - Struct type for name and type of each child. + Struct type for name and type of each child. Returns ------- @@ -4705,6 +4714,8 @@ cdef dict _array_classes = { _Type_STRING_VIEW: StringViewArray, _Type_DICTIONARY: DictionaryArray, _Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray, + _Type_DECIMAL32: Decimal32Array, + _Type_DECIMAL64: Decimal64Array, _Type_DECIMAL128: Decimal128Array, _Type_DECIMAL256: Decimal256Array, _Type_STRUCT: StructArray, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 8bf61b73cc211..b2edeb0b4192f 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -45,6 +45,16 @@ cdef extern from "arrow/util/key_value_metadata.h" namespace "arrow" nogil: c_bool Contains(const c_string& key) const +cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil: + cdef cppclass CDecimal32" arrow::Decimal32": + c_string ToString(int32_t scale) const + + +cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil: + cdef cppclass CDecimal64" arrow::Decimal64": + c_string ToString(int32_t scale) const + + cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil: cdef cppclass CDecimal128" arrow::Decimal128": c_string ToString(int32_t scale) const @@ -110,6 +120,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: _Type_FLOAT" arrow::Type::FLOAT" _Type_DOUBLE" arrow::Type::DOUBLE" + _Type_DECIMAL32" arrow::Type::DECIMAL32" + _Type_DECIMAL64" arrow::Type::DECIMAL64" _Type_DECIMAL128" arrow::Type::DECIMAL128" _Type_DECIMAL256" arrow::Type::DECIMAL256" @@ -453,6 +465,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: int byte_width() int bit_width() + cdef cppclass CDecimal32Type \ + " arrow::Decimal32Type"(CFixedSizeBinaryType): + CDecimal32Type(int precision, int scale) + int precision() + int scale() + + cdef cppclass CDecimal64Type \ + " arrow::Decimal64Type"(CFixedSizeBinaryType): + CDecimal64Type(int precision, int scale) + int precision() + int scale() + cdef cppclass CDecimal128Type \ " arrow::Decimal128Type"(CFixedSizeBinaryType): CDecimal128Type(int precision, int scale) @@ -680,6 +704,16 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CFixedSizeBinaryArray" arrow::FixedSizeBinaryArray"(CArray): const uint8_t* GetValue(int i) + cdef cppclass CDecimal32Array" arrow::Decimal32Array"( + CFixedSizeBinaryArray + ): + c_string FormatValue(int i) + + cdef cppclass CDecimal64Array" arrow::Decimal64Array"( + CFixedSizeBinaryArray + ): + c_string FormatValue(int i) + cdef cppclass CDecimal128Array" arrow::Decimal128Array"( CFixedSizeBinaryArray ): @@ -1263,6 +1297,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CDoubleScalar" arrow::DoubleScalar"(CScalar): double value + cdef cppclass CDecimal32Scalar" arrow::Decimal32Scalar"(CScalar): + CDecimal32 value + + cdef cppclass CDecimal64Scalar" arrow::Decimal64Scalar"(CScalar): + CDecimal64 value + cdef cppclass CDecimal128Scalar" arrow::Decimal128Scalar"(CScalar): CDecimal128 value diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index f3d4e1eec0899..bc9811b92b007 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -185,6 +185,16 @@ cdef class FixedSizeBinaryType(DataType): const CFixedSizeBinaryType* fixed_size_binary_type +cdef class Decimal32Type(FixedSizeBinaryType): + cdef: + const CDecimal32Type* decimal32_type + + +cdef class Decimal64Type(FixedSizeBinaryType): + cdef: + const CDecimal64Type* decimal64_type + + cdef class Decimal128Type(FixedSizeBinaryType): cdef: const CDecimal128Type* decimal128_type @@ -430,6 +440,14 @@ cdef class FixedSizeBinaryArray(Array): pass +cdef class Decimal32Array(FixedSizeBinaryArray): + pass + + +cdef class Decimal64Array(FixedSizeBinaryArray): + pass + + cdef class Decimal128Array(FixedSizeBinaryArray): pass diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 6b82eb6566896..2c92ecbfa7344 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -87,9 +87,9 @@ def set_cpu_count(int count): def is_threading_enabled() -> bool: """ - Returns True if threading is enabled in libarrow. + Returns True if threading is enabled in libarrow. - If it isn't enabled, then python shouldn't create any + If it isn't enabled, then python shouldn't create any threads either, because we're probably on a system where threading doesn't work (e.g. Emscripten). """ @@ -109,6 +109,8 @@ Type_INT64 = _Type_INT64 Type_HALF_FLOAT = _Type_HALF_FLOAT Type_FLOAT = _Type_FLOAT Type_DOUBLE = _Type_DOUBLE +Type_DECIMAL32 = _Type_DECIMAL32 +Type_DECIMAL64 = _Type_DECIMAL64 Type_DECIMAL128 = _Type_DECIMAL128 Type_DECIMAL256 = _Type_DECIMAL256 Type_DATE32 = _Type_DATE32 diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 5a930a41f0300..d0582f825b529 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -181,11 +181,10 @@ def get_column_metadata(column, name, arrow_type, field_name): ) ) - assert field_name is None or isinstance(field_name, str), \ - str(type(field_name)) + assert isinstance(field_name, str), str(type(field_name)) return { 'name': name, - 'field_name': 'None' if field_name is None else field_name, + 'field_name': field_name, 'pandas_type': logical_type, 'numpy_type': string_dtype, 'metadata': extra_metadata, @@ -193,7 +192,8 @@ def get_column_metadata(column, name, arrow_type, field_name): def construct_metadata(columns_to_convert, df, column_names, index_levels, - index_descriptors, preserve_index, types): + index_descriptors, preserve_index, types, + column_field_names=None): """Returns a dictionary containing enough metadata to reconstruct a pandas DataFrame as an Arrow Table, including index columns. @@ -201,6 +201,8 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels, ---------- columns_to_convert : list[pd.Series] df : pandas.DataFrame + column_names : list[str | None] + column_field_names: list[str] index_levels : List[pd.Index] index_descriptors : List[Dict] preserve_index : bool @@ -210,6 +212,12 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels, ------- dict """ + if column_field_names is None: + # backwards compatibility for external projects that are using + # `construct_metadata` such as cudf + # see https://github.com/apache/arrow/pull/44963#discussion_r1875771953 + column_field_names = [str(name) for name in column_names] + num_serialized_index_levels = len([descr for descr in index_descriptors if not isinstance(descr, dict)]) # Use ntypes instead of Python shorthand notation [:-len(x)] as [:-0] @@ -219,11 +227,11 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels, index_types = types[ntypes - num_serialized_index_levels:] column_metadata = [] - for col, sanitized_name, arrow_type in zip(columns_to_convert, - column_names, df_types): - metadata = get_column_metadata(col, name=sanitized_name, + for col, name, field_name, arrow_type in zip(columns_to_convert, column_names, + column_field_names, df_types): + metadata = get_column_metadata(col, name=name, arrow_type=arrow_type, - field_name=sanitized_name) + field_name=field_name) column_metadata.append(metadata) index_column_metadata = [] @@ -368,6 +376,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns): return _get_columns_to_convert_given_schema(df, schema, preserve_index) column_names = [] + column_field_names = [] index_levels = ( _get_index_level_values(df.index) if preserve_index is not False @@ -388,6 +397,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns): columns_to_convert.append(col) convert_fields.append(None) column_names.append(name) + column_field_names.append(str(name)) index_descriptors = [] index_column_names = [] @@ -403,7 +413,7 @@ def _get_columns_to_convert(df, schema, preserve_index, columns): index_column_names.append(name) index_descriptors.append(descr) - all_names = column_names + index_column_names + all_names = column_field_names + index_column_names # all_names : all of the columns in the resulting table including the data # columns and serialized index columns @@ -416,8 +426,8 @@ def _get_columns_to_convert(df, schema, preserve_index, columns): # to be converted to Arrow format # columns_fields : specified column to use for coercion / casting # during serialization, if a Schema was provided - return (all_names, column_names, index_column_names, index_descriptors, - index_levels, columns_to_convert, convert_fields) + return (all_names, column_names, column_field_names, index_column_names, + index_descriptors, index_levels, columns_to_convert, convert_fields) def _get_columns_to_convert_given_schema(df, schema, preserve_index): @@ -462,8 +472,6 @@ def _get_columns_to_convert_given_schema(df, schema, preserve_index): "specified schema".format(name)) is_index = True - name = _column_name_to_strings(name) - if _pandas_api.is_sparse(col): raise TypeError( "Sparse pandas data (column {}) not supported.".format(name)) @@ -480,8 +488,8 @@ def _get_columns_to_convert_given_schema(df, schema, preserve_index): all_names = column_names + index_column_names - return (all_names, column_names, index_column_names, index_descriptors, - index_levels, columns_to_convert, convert_fields) + return (all_names, column_names, column_names, index_column_names, + index_descriptors, index_levels, columns_to_convert, convert_fields) def _get_index_level(df, name): @@ -539,6 +547,7 @@ def _resolve_columns_of_interest(df, schema, columns): def dataframe_to_types(df, preserve_index, columns=None): (all_names, column_names, + column_field_names, _, index_descriptors, index_columns, @@ -563,8 +572,8 @@ def dataframe_to_types(df, preserve_index, columns=None): types.append(type_) metadata = construct_metadata( - columns_to_convert, df, column_names, index_columns, - index_descriptors, preserve_index, types + columns_to_convert, df, column_names, index_columns, index_descriptors, + preserve_index, types, column_field_names=column_field_names ) return all_names, types, metadata @@ -574,6 +583,7 @@ def dataframe_to_arrays(df, schema, preserve_index, nthreads=1, columns=None, safe=True): (all_names, column_names, + column_field_names, index_column_names, index_descriptors, index_columns, @@ -642,13 +652,12 @@ def _can_definitely_zero_copy(arr): if schema is None: fields = [] for name, type_ in zip(all_names, types): - name = name if name is not None else 'None' fields.append(pa.field(name, type_)) schema = pa.schema(fields) pandas_metadata = construct_metadata( - columns_to_convert, df, column_names, index_columns, - index_descriptors, preserve_index, types + columns_to_convert, df, column_names, index_columns, index_descriptors, + preserve_index, types, column_field_names=column_field_names ) metadata = deepcopy(schema.metadata) if schema.metadata else dict() metadata.update(pandas_metadata) diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi index 913e25e308254..d1fa1192debc3 100644 --- a/python/pyarrow/public-api.pxi +++ b/python/pyarrow/public-api.pxi @@ -111,6 +111,10 @@ cdef api object pyarrow_wrap_data_type( out = DurationType.__new__(DurationType) elif type.get().id() == _Type_FIXED_SIZE_BINARY: out = FixedSizeBinaryType.__new__(FixedSizeBinaryType) + elif type.get().id() == _Type_DECIMAL32: + out = Decimal32Type.__new__(Decimal32Type) + elif type.get().id() == _Type_DECIMAL64: + out = Decimal64Type.__new__(Decimal64Type) elif type.get().id() == _Type_DECIMAL128: out = Decimal128Type.__new__(Decimal128Type) elif type.get().id() == _Type_DECIMAL256: diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi index 2bfdcddf30736..2235cd0b981a6 100644 --- a/python/pyarrow/scalar.pxi +++ b/python/pyarrow/scalar.pxi @@ -336,6 +336,46 @@ cdef class DoubleScalar(Scalar): return sp.value if sp.is_valid else None +cdef class Decimal32Scalar(Scalar): + """ + Concrete class for decimal32 scalars. + """ + + def as_py(self): + """ + Return this value as a Python Decimal. + """ + cdef: + CDecimal32Scalar* sp = self.wrapped.get() + CDecimal32Type* dtype = sp.type.get() + if sp.is_valid: + return _pydecimal.Decimal( + frombytes(sp.value.ToString(dtype.scale())) + ) + else: + return None + + +cdef class Decimal64Scalar(Scalar): + """ + Concrete class for decimal64 scalars. + """ + + def as_py(self): + """ + Return this value as a Python Decimal. + """ + cdef: + CDecimal64Scalar* sp = self.wrapped.get() + CDecimal64Type* dtype = sp.type.get() + if sp.is_valid: + return _pydecimal.Decimal( + frombytes(sp.value.ToString(dtype.scale())) + ) + else: + return None + + cdef class Decimal128Scalar(Scalar): """ Concrete class for decimal128 scalars. @@ -1132,6 +1172,8 @@ cdef dict _scalar_classes = { _Type_HALF_FLOAT: HalfFloatScalar, _Type_FLOAT: FloatScalar, _Type_DOUBLE: DoubleScalar, + _Type_DECIMAL32: Decimal32Scalar, + _Type_DECIMAL64: Decimal64Scalar, _Type_DECIMAL128: Decimal128Scalar, _Type_DECIMAL256: Decimal256Scalar, _Type_DATE32: Date32Scalar, diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc index 110dab7d35538..10c4d0e16000b 100644 --- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc +++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc @@ -1317,15 +1317,8 @@ struct ObjectWriterVisitor { out_values); } - Status Visit(const Decimal32Type& type) { - return Status::NotImplemented("Decimal32 type not yet implemented"); - } - - Status Visit(const Decimal64Type& type) { - return Status::NotImplemented("Decimal64 type not yet implemented"); - } - - Status Visit(const Decimal128Type& type) { + template + Status VisitDecimal(const DecimalT& type) { OwnedRef decimal; OwnedRef Decimal; RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); @@ -1333,7 +1326,7 @@ struct ObjectWriterVisitor { PyObject* decimal_constructor = Decimal.obj(); for (int c = 0; c < data.num_chunks(); c++) { - const auto& arr = checked_cast(*data.chunk(c)); + const auto& arr = checked_cast(*data.chunk(c)); for (int64_t i = 0; i < arr.length(); ++i) { if (arr.IsNull(i)) { @@ -1350,29 +1343,20 @@ struct ObjectWriterVisitor { return Status::OK(); } - Status Visit(const Decimal256Type& type) { - OwnedRef decimal; - OwnedRef Decimal; - RETURN_NOT_OK(internal::ImportModule("decimal", &decimal)); - RETURN_NOT_OK(internal::ImportFromModule(decimal.obj(), "Decimal", &Decimal)); - PyObject* decimal_constructor = Decimal.obj(); + Status Visit(const Decimal32Type& type) { + return VisitDecimal(type); + } - for (int c = 0; c < data.num_chunks(); c++) { - const auto& arr = checked_cast(*data.chunk(c)); + Status Visit(const Decimal64Type& type) { + return VisitDecimal(type); + } - for (int64_t i = 0; i < arr.length(); ++i) { - if (arr.IsNull(i)) { - Py_INCREF(Py_None); - *out_values++ = Py_None; - } else { - *out_values++ = - internal::DecimalFromString(decimal_constructor, arr.FormatValue(i)); - RETURN_IF_PYERROR(); - } - } - } + Status Visit(const Decimal128Type& type) { + return VisitDecimal(type); + } - return Status::OK(); + Status Visit(const Decimal256Type& type) { + return VisitDecimal(type); } template diff --git a/python/pyarrow/src/arrow/python/decimal.cc b/python/pyarrow/src/arrow/python/decimal.cc index 0c00fcfaa8e59..e6caff2201ddc 100644 --- a/python/pyarrow/src/arrow/python/decimal.cc +++ b/python/pyarrow/src/arrow/python/decimal.cc @@ -164,6 +164,24 @@ Status InternalDecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, } // namespace +Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, + Decimal32* out) { + return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out); +} + +Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal32* out) { + return InternalDecimalFromPyObject(obj, arrow_type, out); +} + +Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, + Decimal64* out) { + return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out); +} + +Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal64* out) { + return InternalDecimalFromPyObject(obj, arrow_type, out); +} + Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, Decimal128* out) { return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out); diff --git a/python/pyarrow/src/arrow/python/decimal.h b/python/pyarrow/src/arrow/python/decimal.h index 1187037aed29e..83ded0b82b922 100644 --- a/python/pyarrow/src/arrow/python/decimal.h +++ b/python/pyarrow/src/arrow/python/decimal.h @@ -56,6 +56,40 @@ ARROW_PYTHON_EXPORT PyObject* DecimalFromString(PyObject* decimal_constructor, const std::string& decimal_string); +// \brief Convert a Python decimal to an Arrow Decimal128 object +// \param[in] python_decimal A Python decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation +ARROW_PYTHON_EXPORT +Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, + Decimal32* out); + +// \brief Convert a Python object to an Arrow Decimal128 object +// \param[in] python_decimal A Python int or decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation +ARROW_PYTHON_EXPORT +Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal32* out); + +// \brief Convert a Python decimal to an Arrow Decimal128 object +// \param[in] python_decimal A Python decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation +ARROW_PYTHON_EXPORT +Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type, + Decimal64* out); + +// \brief Convert a Python object to an Arrow Decimal128 object +// \param[in] python_decimal A Python int or decimal.Decimal instance +// \param[in] arrow_type An instance of arrow::DecimalType +// \param[out] out A pointer to a Decimal128 +// \return The status of the operation +ARROW_PYTHON_EXPORT +Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type, Decimal64* out); + // \brief Convert a Python decimal to an Arrow Decimal128 object // \param[in] python_decimal A Python decimal.Decimal instance // \param[in] arrow_type An instance of arrow::DecimalType diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index e7195e99072b0..709338b4e7756 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -260,6 +260,18 @@ class PyValue { return value; } + static Result Convert(const Decimal32Type* type, const O&, I obj) { + Decimal32 value; + RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value)); + return value; + } + + static Result Convert(const Decimal64Type* type, const O&, I obj) { + Decimal64 value; + RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value)); + return value; + } + static Result Convert(const Decimal128Type* type, const O&, I obj) { Decimal128 value; RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value)); diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py index 7a1b31a4d9d77..450cce74f1d43 100644 --- a/python/pyarrow/tests/strategies.py +++ b/python/pyarrow/tests/strategies.py @@ -92,6 +92,16 @@ pa.float32(), pa.float64() ]) +decimal32_type = st.builds( + pa.decimal32, + precision=st.integers(min_value=1, max_value=9), + scale=st.integers(min_value=1, max_value=9) +) +decimal64_type = st.builds( + pa.decimal64, + precision=st.integers(min_value=1, max_value=18), + scale=st.integers(min_value=1, max_value=18) +) decimal128_type = st.builds( pa.decimal128, precision=st.integers(min_value=1, max_value=38), diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index c16d2f9aacf74..e6fcd6149ee04 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1900,7 +1900,9 @@ def test_fsl_to_fsl_cast(value_type): FloatToDecimalCase = namedtuple('FloatToDecimalCase', ('precision', 'scale', 'float_val')) -decimal_type_traits = [DecimalTypeTraits('decimal128', pa.decimal128, 38), +decimal_type_traits = [DecimalTypeTraits('decimal32', pa.decimal32, 9), + DecimalTypeTraits('decimal64', pa.decimal64, 18), + DecimalTypeTraits('decimal128', pa.decimal128, 38), DecimalTypeTraits('decimal256', pa.decimal256, 76)] @@ -1991,7 +1993,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx, # very high precisions as rounding errors can accumulate in # the iterative algorithm (GH-35576). diff_digits = abs(actual - expected) * 10**decimal_ty.scale - limit = 2 if decimal_ty.precision < max_precision - 1 else 4 + limit = 2 if decimal_ty.precision < max_precision - 2 else 4 assert diff_digits <= limit, ( f"float_val = {float_val!r}, precision={decimal_ty.precision}, " f"expected = {expected!r}, actual = {actual!r}, " @@ -2041,6 +2043,11 @@ def test_cast_float_to_decimal_random(float_ty, decimal_traits): mantissa_digits = math.floor(math.log10(2**mantissa_bits)) max_precision = decimal_traits.max_precision + # For example, decimal32 <-> float64 + if max_precision < mantissa_digits: + mantissa_bits = math.floor(math.log2(10**max_precision)) + mantissa_digits = math.floor(math.log10(2**mantissa_bits)) + with decimal.localcontext() as ctx: precision = mantissa_digits ctx.prec = precision @@ -3369,9 +3376,10 @@ def create_sample_expressions(): g = pc.scalar(pa.scalar(1)) h = pc.scalar(np.int64(2)) j = pc.scalar(False) + k = pc.scalar(0) # These expression consist entirely of literals - literal_exprs = [a, b, c, d, e, g, h, j] + literal_exprs = [a, b, c, d, e, g, h, j, k] # These expressions include at least one function call exprs_with_call = [a == b, a != b, a > b, c & j, c | j, ~c, d.is_valid(), @@ -3380,6 +3388,8 @@ def create_sample_expressions(): pc.multiply(a, b), pc.power(a, a), pc.sqrt(a), pc.exp(b), pc.cos(b), pc.sin(b), pc.tan(b), pc.acos(b), pc.atan(b), pc.asin(b), pc.atan2(b, b), + pc.sinh(a), pc.cosh(a), pc.tanh(a), + pc.asinh(a), pc.acosh(b), pc.atanh(k), pc.abs(b), pc.sign(a), pc.bit_wise_not(a), pc.bit_wise_and(a, a), pc.bit_wise_or(a, a), pc.bit_wise_xor(a, a), pc.is_nan(b), pc.is_finite(b), diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index c3589877e6423..07286125c4cf6 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -1592,7 +1592,7 @@ def test_sequence_mixed_types_with_specified_type_fails(): def test_sequence_decimal(): data = [decimal.Decimal('1234.183'), decimal.Decimal('8094.234')] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal32, pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=7, scale=3)) assert arr.to_pylist() == data @@ -1601,28 +1601,28 @@ def test_sequence_decimal_different_precisions(): data = [ decimal.Decimal('1234234983.183'), decimal.Decimal('80943244.234') ] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=13, scale=3)) assert arr.to_pylist() == data def test_sequence_decimal_no_scale(): data = [decimal.Decimal('1234234983'), decimal.Decimal('8094324')] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=10)) assert arr.to_pylist() == data def test_sequence_decimal_negative(): data = [decimal.Decimal('-1234.234983'), decimal.Decimal('-8.094324')] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=10, scale=6)) assert arr.to_pylist() == data def test_sequence_decimal_no_whole_part(): data = [decimal.Decimal('-.4234983'), decimal.Decimal('.0103943')] - for type in [pa.decimal128, pa.decimal256]: + for type in [pa.decimal32, pa.decimal64, pa.decimal128, pa.decimal256]: arr = pa.array(data, type=type(precision=7, scale=7)) assert arr.to_pylist() == data diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py index 3bb4440e89750..978c92307a69e 100644 --- a/python/pyarrow/tests/test_json.py +++ b/python/pyarrow/tests/test_json.py @@ -256,7 +256,9 @@ def test_explicit_schema_decimal(self): expected = { 'a': [Decimal("1"), Decimal("1.45"), Decimal("-23.456"), None], } - for type_factory in (pa.decimal128, pa.decimal256): + + decimal_types = (pa.decimal32, pa.decimal64, pa.decimal128, pa.decimal256) + for type_factory in decimal_types: schema = pa.schema([('a', type_factory(9, 4))]) opts = ParseOptions(explicit_schema=schema) table = self.read_bytes(rows, parse_options=opts) diff --git a/python/pyarrow/tests/test_memory.py b/python/pyarrow/tests/test_memory.py index b1eef176665af..6ed999db42cee 100644 --- a/python/pyarrow/tests/test_memory.py +++ b/python/pyarrow/tests/test_memory.py @@ -17,7 +17,6 @@ import contextlib import os -import platform import signal import subprocess import sys @@ -30,15 +29,19 @@ pytestmark = pytest.mark.processes possible_backends = ["system", "jemalloc", "mimalloc"] +# Backends which are expected to be present in all builds of PyArrow, +# except if the user manually recompiled Arrow C++. +mandatory_backends = ["system", "mimalloc"] -should_have_jemalloc = (sys.platform == "linux" and platform.machine() == 'x86_64') -should_have_mimalloc = sys.platform == "win32" + +def backend_factory(backend_name): + return getattr(pa, f"{backend_name}_memory_pool") def supported_factories(): yield pa.default_memory_pool - for backend in pa.supported_memory_backends(): - yield getattr(pa, f"{backend}_memory_pool") + for backend_name in pa.supported_memory_backends(): + yield backend_factory(backend_name) @contextlib.contextmanager @@ -149,17 +152,12 @@ def check_env_var(name, expected, *, expect_warning=False): def test_env_var(): - check_env_var("system", ["system"]) - if should_have_jemalloc: - check_env_var("jemalloc", ["jemalloc"]) - if should_have_mimalloc: - check_env_var("mimalloc", ["mimalloc"]) + for backend_name in mandatory_backends: + check_env_var(backend_name, [backend_name]) check_env_var("nonexistent", possible_backends, expect_warning=True) -def test_specific_memory_pools(): - specific_pools = set() - +def test_memory_pool_factories(): def check(factory, name, *, can_fail=False): if can_fail: try: @@ -169,23 +167,16 @@ def check(factory, name, *, can_fail=False): else: pool = factory() assert pool.backend_name == name - specific_pools.add(pool) - check(pa.system_memory_pool, "system") - check(pa.jemalloc_memory_pool, "jemalloc", - can_fail=not should_have_jemalloc) - check(pa.mimalloc_memory_pool, "mimalloc", - can_fail=not should_have_mimalloc) + for backend_name in possible_backends: + check(backend_factory(backend_name), backend_name, + can_fail=backend_name not in mandatory_backends) def test_supported_memory_backends(): backends = pa.supported_memory_backends() - - assert "system" in backends - if should_have_jemalloc: - assert "jemalloc" in backends - if should_have_mimalloc: - assert "mimalloc" in backends + assert set(backends) >= set(mandatory_backends) + assert set(backends) <= set(possible_backends) def run_debug_memory_pool(pool_factory, env_value): @@ -246,6 +237,9 @@ def test_debug_memory_pool_warn(pool_factory): def check_debug_memory_pool_disabled(pool_factory, env_value, msg): + if sys.maxsize < 2**32: + # GH-45011: mimalloc may print warnings in this test on 32-bit Linux, ignore. + pytest.skip("Test may fail on 32-bit platforms") res = run_debug_memory_pool(pool_factory.__name__, env_value) # The subprocess either returned successfully or was killed by a signal # (due to writing out of bounds), depending on the underlying allocator. diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index 0b2055018f695..dbba7852190f4 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -165,6 +165,8 @@ def test_set_timezone_db_path_non_windows(): pa.Time32Type, pa.Time64Type, pa.TimestampType, + pa.Decimal32Type, + pa.Decimal64Type, pa.Decimal128Type, pa.Decimal256Type, pa.DictionaryType, diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py index 1b467d523304c..b0f9e813b103d 100644 --- a/python/pyarrow/tests/test_orc.py +++ b/python/pyarrow/tests/test_orc.py @@ -15,9 +15,14 @@ # specific language governing permissions and limitations # under the License. -import pytest import decimal import datetime +from pathlib import Path +import shutil +import subprocess +import sys + +import pytest import pyarrow as pa from pyarrow import fs @@ -140,6 +145,57 @@ def test_example_using_json(filename, datadir): check_example_file(path, table, need_fix=True) +def test_timezone_database_absent(datadir): + # Example file relies on the timezone "US/Pacific". It should gracefully + # fail, not crash, if the timezone database is not found. + path = datadir / 'TestOrcFile.testDate1900.orc' + code = f"""if 1: + import os + os.environ['TZDIR'] = '/tmp/non_existent' + + from pyarrow import orc + try: + orc_file = orc.ORCFile({str(path)!r}) + orc_file.read() + except Exception as e: + assert "time zone database" in str(e).lower(), e + else: + assert False, "Should have raised exception" + """ + subprocess.run([sys.executable, "-c", code], check=True) + + +def test_timezone_absent(datadir, tmpdir): + # Example file relies on the timezone "US/Pacific". It should gracefully + # fail, not crash, if the timezone database is present but the timezone + # is not found (GH-40633). + source_tzdir = Path('/usr/share/zoneinfo') + if not source_tzdir.exists(): + pytest.skip(f"Test needs timezone database in {source_tzdir}") + tzdir = Path(tmpdir / 'zoneinfo') + try: + shutil.copytree(source_tzdir, tzdir, symlinks=True) + except OSError as e: + pytest.skip(f"Failed to copy timezone database: {e}") + (tzdir / 'US' / 'Pacific').unlink(missing_ok=True) + + path = datadir / 'TestOrcFile.testDate1900.orc' + code = f"""if 1: + import os + os.environ['TZDIR'] = {str(tzdir)!r} + + from pyarrow import orc + orc_file = orc.ORCFile({str(path)!r}) + try: + orc_file.read() + except Exception as e: + assert "zoneinfo/US/Pacific" in str(e), e + else: + assert False, "Should have raised exception" + """ + subprocess.run([sys.executable, "-c", code], check=True) + + def test_orcfile_empty(datadir): from pyarrow import orc diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index bdcb6c2b42d78..b6d36787fbd37 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -615,6 +615,8 @@ def test_type_schema_pickling(pickle_module): pa.date64(), pa.timestamp('ms'), pa.timestamp('ns'), + pa.decimal32(9, 3), + pa.decimal64(11, 4), pa.decimal128(12, 2), pa.decimal256(76, 38), pa.field('a', 'string', metadata={b'foo': b'bar'}), diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index de439b6bb8cd7..926de46318036 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -57,6 +57,8 @@ def get_many_types(): pa.float16(), pa.float32(), pa.float64(), + pa.decimal32(9, 4), + pa.decimal64(18, 4), pa.decimal128(19, 4), pa.decimal256(76, 38), pa.string(), @@ -139,18 +141,38 @@ def test_null_field_may_not_be_non_nullable(): def test_is_decimal(): + decimal32 = pa.decimal32(9, 4) + decimal64 = pa.decimal64(18, 4) decimal128 = pa.decimal128(19, 4) decimal256 = pa.decimal256(76, 38) int32 = pa.int32() + assert types.is_decimal(decimal32) + assert types.is_decimal(decimal64) assert types.is_decimal(decimal128) assert types.is_decimal(decimal256) assert not types.is_decimal(int32) + assert types.is_decimal32(decimal32) + assert not types.is_decimal32(decimal64) + assert not types.is_decimal32(decimal128) + assert not types.is_decimal32(decimal256) + assert not types.is_decimal32(int32) + + assert not types.is_decimal64(decimal32) + assert types.is_decimal64(decimal64) + assert not types.is_decimal64(decimal128) + assert not types.is_decimal64(decimal256) + assert not types.is_decimal64(int32) + + assert not types.is_decimal128(decimal32) + assert not types.is_decimal128(decimal64) assert types.is_decimal128(decimal128) assert not types.is_decimal128(decimal256) assert not types.is_decimal128(int32) + assert not types.is_decimal256(decimal32) + assert not types.is_decimal256(decimal64) assert not types.is_decimal256(decimal128) assert types.is_decimal256(decimal256) assert not types.is_decimal256(int32) @@ -970,6 +992,8 @@ def test_bit_and_byte_width(): (pa.float16(), 16, 2), (pa.timestamp('s'), 64, 8), (pa.date32(), 32, 4), + (pa.decimal32(9, 4), 32, 4), + (pa.decimal64(18, 4), 64, 8), (pa.decimal128(19, 4), 128, 16), (pa.decimal256(76, 38), 256, 32), (pa.binary(42), 42 * 8, 42), @@ -1002,6 +1026,14 @@ def test_fixed_size_binary_byte_width(): def test_decimal_properties(): + ty = pa.decimal32(9, 4) + assert ty.byte_width == 4 + assert ty.precision == 9 + assert ty.scale == 4 + ty = pa.decimal64(18, 4) + assert ty.byte_width == 8 + assert ty.precision == 18 + assert ty.scale == 4 ty = pa.decimal128(19, 4) assert ty.byte_width == 16 assert ty.precision == 19 @@ -1013,6 +1045,18 @@ def test_decimal_properties(): def test_decimal_overflow(): + pa.decimal32(1, 0) + pa.decimal32(9, 0) + for i in (0, -1, 10): + with pytest.raises(ValueError): + pa.decimal32(i, 0) + + pa.decimal64(1, 0) + pa.decimal64(18, 0) + for i in (0, -1, 19): + with pytest.raises(ValueError): + pa.decimal64(i, 0) + pa.decimal128(1, 0) pa.decimal128(38, 0) for i in (0, -1, 39): diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 0d6787cf2a049..3caf068a4c9b1 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -73,7 +73,10 @@ def _get_pandas_type_map(): _Type_STRING: np.object_, _Type_LIST: np.object_, _Type_MAP: np.object_, + _Type_DECIMAL32: np.object_, + _Type_DECIMAL64: np.object_, _Type_DECIMAL128: np.object_, + _Type_DECIMAL256: np.object_, }) return _pandas_type_map @@ -1417,6 +1420,104 @@ cdef class FixedSizeBinaryType(DataType): return binary, (self.byte_width,) +cdef class Decimal32Type(FixedSizeBinaryType): + """ + Concrete class for decimal32 data types. + + Examples + -------- + Create an instance of decimal32 type: + + >>> import pyarrow as pa + >>> pa.decimal32(5, 2) + Decimal32Type(decimal32(5, 2)) + """ + + cdef void init(self, const shared_ptr[CDataType]& type) except *: + FixedSizeBinaryType.init(self, type) + self.decimal32_type = type.get() + + def __reduce__(self): + return decimal32, (self.precision, self.scale) + + @property + def precision(self): + """ + The decimal precision, in number of decimal digits (an integer). + + Examples + -------- + >>> import pyarrow as pa + >>> t = pa.decimal32(5, 2) + >>> t.precision + 5 + """ + return self.decimal32_type.precision() + + @property + def scale(self): + """ + The decimal scale (an integer). + + Examples + -------- + >>> import pyarrow as pa + >>> t = pa.decimal32(5, 2) + >>> t.scale + 2 + """ + return self.decimal32_type.scale() + + +cdef class Decimal64Type(FixedSizeBinaryType): + """ + Concrete class for decimal64 data types. + + Examples + -------- + Create an instance of decimal64 type: + + >>> import pyarrow as pa + >>> pa.decimal64(5, 2) + Decimal64Type(decimal64(5, 2)) + """ + + cdef void init(self, const shared_ptr[CDataType]& type) except *: + FixedSizeBinaryType.init(self, type) + self.decimal64_type = type.get() + + def __reduce__(self): + return decimal64, (self.precision, self.scale) + + @property + def precision(self): + """ + The decimal precision, in number of decimal digits (an integer). + + Examples + -------- + >>> import pyarrow as pa + >>> t = pa.decimal64(5, 2) + >>> t.precision + 5 + """ + return self.decimal64_type.precision() + + @property + def scale(self): + """ + The decimal scale (an integer). + + Examples + -------- + >>> import pyarrow as pa + >>> t = pa.decimal64(5, 2) + >>> t.scale + 2 + """ + return self.decimal64_type.scale() + + cdef class Decimal128Type(FixedSizeBinaryType): """ Concrete class for decimal128 data types. @@ -2549,7 +2650,11 @@ cdef class Field(_Weakrefable): @property def metadata(self): """ - The field metadata. + The field metadata (if any is set). + + Returns + ------- + metadata : dict or None Examples -------- @@ -2982,11 +3087,11 @@ cdef class Schema(_Weakrefable): @property def metadata(self): """ - The schema's metadata. + The schema's metadata (if any is set). Returns ------- - metadata: dict + metadata: dict or None Examples -------- @@ -4496,6 +4601,116 @@ def float64(): return primitive_type(_Type_DOUBLE) +cpdef DataType decimal32(int precision, int scale=0): + """ + Create decimal type with precision and scale and 32-bit width. + + Arrow decimals are fixed-point decimal numbers encoded as a scaled + integer. The precision is the number of significant digits that the + decimal type can represent; the scale is the number of digits after + the decimal point (note the scale can be negative). + + As an example, ``decimal32(7, 3)`` can exactly represent the numbers + 1234.567 and -1234.567 (encoded internally as the 32-bit integers + 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567. + + ``decimal32(5, -3)`` can exactly represent the number 12345000 + (encoded internally as the 32-bit integer 12345), but neither + 123450000 nor 1234500. + + If you need a precision higher than 9 significant digits, consider + using ``decimal64``, ``decimal128``, or ``decimal256``. + + Parameters + ---------- + precision : int + Must be between 1 and 9 + scale : int + + Returns + ------- + decimal_type : Decimal32Type + + Examples + -------- + Create an instance of decimal type: + + >>> import pyarrow as pa + >>> pa.decimal32(5, 2) + Decimal32Type(decimal32(5, 2)) + + Create an array with decimal type: + + >>> import decimal + >>> a = decimal.Decimal('123.45') + >>> pa.array([a], pa.decimal32(5, 2)) + + [ + 123.45 + ] + """ + cdef shared_ptr[CDataType] decimal_type + if precision < 1 or precision > 9: + raise ValueError("precision should be between 1 and 9") + decimal_type.reset(new CDecimal32Type(precision, scale)) + return pyarrow_wrap_data_type(decimal_type) + + +cpdef DataType decimal64(int precision, int scale=0): + """ + Create decimal type with precision and scale and 64-bit width. + + Arrow decimals are fixed-point decimal numbers encoded as a scaled + integer. The precision is the number of significant digits that the + decimal type can represent; the scale is the number of digits after + the decimal point (note the scale can be negative). + + As an example, ``decimal64(7, 3)`` can exactly represent the numbers + 1234.567 and -1234.567 (encoded internally as the 64-bit integers + 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567. + + ``decimal64(5, -3)`` can exactly represent the number 12345000 + (encoded internally as the 64-bit integer 12345), but neither + 123450000 nor 1234500. + + If you need a precision higher than 18 significant digits, consider + using ``decimal128``, or ``decimal256``. + + Parameters + ---------- + precision : int + Must be between 1 and 18 + scale : int + + Returns + ------- + decimal_type : Decimal64Type + + Examples + -------- + Create an instance of decimal type: + + >>> import pyarrow as pa + >>> pa.decimal64(5, 2) + Decimal64Type(decimal64(5, 2)) + + Create an array with decimal type: + + >>> import decimal + >>> a = decimal.Decimal('123.45') + >>> pa.array([a], pa.decimal64(5, 2)) + + [ + 123.45 + ] + """ + cdef shared_ptr[CDataType] decimal_type + if precision < 1 or precision > 18: + raise ValueError("precision should be between 1 and 18") + decimal_type.reset(new CDecimal64Type(precision, scale)) + return pyarrow_wrap_data_type(decimal_type) + + cpdef DataType decimal128(int precision, int scale=0): """ Create decimal type with precision and scale and 128-bit width. diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py index 66b1ec33953a9..2bb5cfcf8b739 100644 --- a/python/pyarrow/types.py +++ b/python/pyarrow/types.py @@ -32,7 +32,8 @@ lib.Type_UINT64} _INTEGER_TYPES = _SIGNED_INTEGER_TYPES | _UNSIGNED_INTEGER_TYPES _FLOATING_TYPES = {lib.Type_HALF_FLOAT, lib.Type_FLOAT, lib.Type_DOUBLE} -_DECIMAL_TYPES = {lib.Type_DECIMAL128, lib.Type_DECIMAL256} +_DECIMAL_TYPES = {lib.Type_DECIMAL32, lib.Type_DECIMAL64, lib.Type_DECIMAL128, + lib.Type_DECIMAL256} _DATE_TYPES = {lib.Type_DATE32, lib.Type_DATE64} _TIME_TYPES = {lib.Type_TIME32, lib.Type_TIME64} _INTERVAL_TYPES = {lib.Type_INTERVAL_MONTH_DAY_NANO} @@ -289,6 +290,16 @@ def is_decimal(t): return t.id in _DECIMAL_TYPES +@doc(is_null, datatype="decimal32") +def is_decimal32(t): + return t.id == lib.Type_DECIMAL32 + + +@doc(is_null, datatype="decimal64") +def is_decimal64(t): + return t.id == lib.Type_DECIMAL64 + + @doc(is_null, datatype="decimal128") def is_decimal128(t): return t.id == lib.Type_DECIMAL128 diff --git a/r/NEWS.md b/r/NEWS.md index 7132c5b244914..83d09157b9038 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -21,8 +21,6 @@ # arrow 18.1.0 -# arrow 18.0.0 - ## Minor improvements and fixes * Fix bindings to allow filtering a factor column in a Dataset using `%in%` (#43446) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 4c3b78e085c6e..4b54697d4bd90 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -62,7 +62,10 @@ supported_dplyr_methods <- list( relocate = NULL, compute = NULL, collapse = NULL, - distinct = "`.keep_all = TRUE` not supported", + distinct = c( + "`.keep_all = TRUE` returns a non-missing value if present,", + "only returning missing values if all are missing." + ), left_join = "the `copy` argument is ignored", right_join = "the `copy` argument is ignored", inner_join = "the `copy` argument is ignored", diff --git a/r/R/dplyr-distinct.R b/r/R/dplyr-distinct.R index 49948caa011e2..95fb837bd5d00 100644 --- a/r/R/dplyr-distinct.R +++ b/r/R/dplyr-distinct.R @@ -18,12 +18,6 @@ # The following S3 methods are registered on load if dplyr is present distinct.arrow_dplyr_query <- function(.data, ..., .keep_all = FALSE) { - if (.keep_all == TRUE) { - # TODO(ARROW-14045): the function is called "hash_one" (from ARROW-13993) - # May need to call it: `summarize(x = one(x), ...)` for x in non-group cols - arrow_not_supported("`distinct()` with `.keep_all = TRUE`") - } - original_gv <- dplyr::group_vars(.data) if (length(quos(...))) { # group_by() calls mutate() if there are any expressions in ... @@ -33,11 +27,28 @@ distinct.arrow_dplyr_query <- function(.data, ..., .keep_all = FALSE) { .data <- dplyr::group_by(.data, !!!syms(names(.data))) } - out <- dplyr::summarize(.data, .groups = "drop") + if (isTRUE(.keep_all)) { + # Note: in regular dplyr, `.keep_all = TRUE` returns the first row's value. + # However, Acero's `hash_one` function prefers returning non-null values. + # So, you'll get the same shape of data, but the values may differ. + keeps <- names(.data)[!(names(.data) %in% .data$group_by_vars)] + exprs <- lapply(keeps, function(x) call2("one", sym(x))) + names(exprs) <- keeps + } else { + exprs <- list() + } + + out <- dplyr::summarize(.data, !!!exprs, .groups = "drop") + # distinct() doesn't modify group by vars, so restore the original ones if (length(original_gv)) { out$group_by_vars <- original_gv } + if (isTRUE(.keep_all)) { + # Also ensure the column order matches the original + # summarize() will put the group_by_vars first + out <- dplyr::select(out, !!!syms(names(.data))) + } out } diff --git a/r/R/dplyr-funcs-agg.R b/r/R/dplyr-funcs-agg.R index 340ebe7adc90f..275fca36542bf 100644 --- a/r/R/dplyr-funcs-agg.R +++ b/r/R/dplyr-funcs-agg.R @@ -150,6 +150,13 @@ register_bindings_aggregate <- function() { options = list(skip_nulls = na.rm, min_count = 0L) ) }) + register_binding("arrow::one", function(...) { + set_agg( + fun = "one", + data = ensure_one_arg(list2(...), "one"), + options = list() + ) + }) } set_agg <- function(...) { diff --git a/r/tests/testthat/test-dplyr-distinct.R b/r/tests/testthat/test-dplyr-distinct.R index 4c7f8894cd4e4..e4d789e8e9146 100644 --- a/r/tests/testthat/test-dplyr-distinct.R +++ b/r/tests/testthat/test-dplyr-distinct.R @@ -26,11 +26,8 @@ test_that("distinct()", { compare_dplyr_binding( .input %>% distinct(some_grouping, lgl) %>% - collect() %>% - # GH-14947: column output order changed in dplyr 1.1.0, so we need - # to make the column order explicit until dplyr 1.1.0 is on CRAN - select(some_grouping, lgl) %>% - arrange(some_grouping, lgl), + arrange(some_grouping, lgl) %>% + collect(), tbl ) }) @@ -60,11 +57,8 @@ test_that("distinct() can retain groups", { .input %>% group_by(some_grouping, int) %>% distinct(lgl) %>% - collect() %>% - # GH-14947: column output order changed in dplyr 1.1.0, so we need - # to make the column order explicit until dplyr 1.1.0 is on CRAN - select(some_grouping, int, lgl) %>% - arrange(lgl, int), + arrange(lgl, int) %>% + collect(), tbl ) @@ -73,11 +67,8 @@ test_that("distinct() can retain groups", { .input %>% group_by(y = some_grouping, int) %>% distinct(x = lgl) %>% - collect() %>% - # GH-14947: column output order changed in dplyr 1.1.0, so we need - # to make the column order explicit until dplyr 1.1.0 is on CRAN - select(y, int, x) %>% - arrange(int), + arrange(int) %>% + collect(), tbl ) }) @@ -95,11 +86,8 @@ test_that("distinct() can contain expressions", { .input %>% group_by(lgl, int) %>% distinct(x = some_grouping + 1) %>% - collect() %>% - # GH-14947: column output order changed in dplyr 1.1.0, so we need - # to make the column order explicit until dplyr 1.1.0 is on CRAN - select(lgl, int, x) %>% - arrange(int), + arrange(int) %>% + collect(), tbl ) }) @@ -115,12 +103,57 @@ test_that("across() works in distinct()", { }) test_that("distinct() can return all columns", { - skip("ARROW-14045") - compare_dplyr_binding( - .input %>% - distinct(lgl, .keep_all = TRUE) %>% - collect() %>% - arrange(int), - tbl - ) + # hash_one prefers to keep non-null values, which is different from .keep_all in dplyr + # so we can't compare the result directly + expected <- tbl %>% + # Drop factor because of #44661: + # NotImplemented: Function 'hash_one' has no kernel matching input types + # (dictionary, uint8) + select(-fct) %>% + distinct(lgl, .keep_all = TRUE) %>% + arrange(int) + + with_table <- tbl %>% + arrow_table() %>% + select(-fct) %>% + distinct(lgl, .keep_all = TRUE) %>% + arrange(int) %>% + collect() + + expect_identical(dim(with_table), dim(expected)) + expect_identical(names(with_table), names(expected)) + + # Test with some mutation in there + expected <- tbl %>% + select(-fct) %>% + distinct(lgl, bigger = int * 10L, .keep_all = TRUE) %>% + arrange(int) + + with_table <- tbl %>% + arrow_table() %>% + select(-fct) %>% + distinct(lgl, bigger = int * 10, .keep_all = TRUE) %>% + arrange(int) %>% + collect() + + expect_identical(dim(with_table), dim(expected)) + expect_identical(names(with_table), names(expected)) + expect_identical(with_table$bigger, expected$bigger) + + # Mutation that overwrites + expected <- tbl %>% + select(-fct) %>% + distinct(lgl, int = int * 10L, .keep_all = TRUE) %>% + arrange(int) + + with_table <- tbl %>% + arrow_table() %>% + select(-fct) %>% + distinct(lgl, int = int * 10, .keep_all = TRUE) %>% + arrange(int) %>% + collect() + + expect_identical(dim(with_table), dim(expected)) + expect_identical(names(with_table), names(expected)) + expect_identical(with_table$int, expected$int) }) diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd index 07d858c6cd62a..69780bd64dfb2 100644 --- a/r/vignettes/install.Rmd +++ b/r/vignettes/install.Rmd @@ -433,7 +433,7 @@ compatible with the current version of the R package. Please check the "Known installation issues" below to see if any apply, and if none apply, set the environment variable `ARROW_R_DEV=TRUE` for more verbose output and try installing again. Then, -please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues) +please [report an issue](https://github.com/apache/arrow/issues/new/choose) and include the full installation output. ### Using system libraries @@ -465,7 +465,7 @@ update the libarrow system packages. If the R package finds and downloads a prebuilt binary of libarrow, but then the arrow package can't be loaded, perhaps with "undefined symbols" errors, -please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues). +please [report an issue](https://github.com/apache/arrow/issues/new/choose). This is likely a compiler mismatch and may be resolvable by setting some environment variables to instruct R to compile the packages to match libarrow. @@ -475,7 +475,7 @@ instead of downloading the prebuilt binary. That should guarantee that the compiler settings match. If a prebuilt libarrow binary wasn't found for your operating system but you think it should have been, -please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues) and share the console output. +please [report an issue](https://github.com/apache/arrow/issues/new/choose) and share the console output. You may also set the environment variable `ARROW_R_DEV=TRUE` for additional debug messages. @@ -485,7 +485,7 @@ If building libarrow from source fails, check the error message. (If you don't see an error message, only the `----- NOTE -----`, set the environment variable `ARROW_R_DEV=TRUE` to increase verbosity and retry installation.) The install script should work everywhere, so if libarrow fails to compile, -please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues) +please [report an issue](https://github.com/apache/arrow/issues/new/choose) so that we can improve the script. ## Contributing diff --git a/ruby/red-arrow/lib/arrow/list-array-builder.rb b/ruby/red-arrow/lib/arrow/list-array-builder.rb index 0b1d17f0a5491..d6975327b5130 100644 --- a/ruby/red-arrow/lib/arrow/list-array-builder.rb +++ b/ruby/red-arrow/lib/arrow/list-array-builder.rb @@ -54,8 +54,8 @@ def append_value(*args) when nil append_null when ::Array - return if value.empty? append_value_raw + return if value.empty? @value_builder ||= value_builder @value_builder.append(*value) else diff --git a/ruby/red-arrow/test/test-list-array-builder.rb b/ruby/red-arrow/test/test-list-array-builder.rb index 91105e92bf8d0..9fbd5e0976f5e 100644 --- a/ruby/red-arrow/test/test-list-array-builder.rb +++ b/ruby/red-arrow/test/test-list-array-builder.rb @@ -40,7 +40,7 @@ def setup builder = Arrow::ListArrayBuilder.new(data_type) builder.append_value([]) array = builder.finish - assert_equal([], array[0].to_a) + assert_equal([[]], array.to_a) end end diff --git a/java/gandiva/src/main/cpp/symbols.map b/swift/.editorconfig similarity index 78% rename from java/gandiva/src/main/cpp/symbols.map rename to swift/.editorconfig index e0f5def41f00e..ffaf7e240243c 100644 --- a/java/gandiva/src/main/cpp/symbols.map +++ b/swift/.editorconfig @@ -14,7 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -{ - global: extern "C++" { gandiva*; }; Java*; JNI*; - local: *; -}; + +# This is an EditorConfig file: https://editorconfig.org/ + +# See ../.editorconfig for inherited values + +[*.{c,cc,cpp,h,hh,hpp}] +indent_size = 4 +indent_style = space + +[*.swift] +indent_size = 4 +indent_style = space